This commit is contained in:
Glenn Jocher 2019-08-07 01:54:41 +02:00
parent b53d6d6ecf
commit 056976b4fc
1 changed files with 32 additions and 26 deletions

View File

@ -217,15 +217,22 @@ class LoadImagesAndLabels(Dataset): # for training/testing
if augment or image_weights: # cache labels for faster training if augment or image_weights: # cache labels for faster training
self.labels = [np.zeros((0, 5))] * n self.labels = [np.zeros((0, 5))] * n
extract_bounding_boxes = False extract_bounding_boxes = False
for i, file in enumerate(tqdm(self.label_files, desc='Caching labels')): pbar = tqdm(self.label_files, desc='Reading labels')
nm, nf, ne = 0, 0, 0 # number missing, number found, number empty
for i, file in enumerate(pbar):
try: try:
with open(file, 'r') as f: with open(file, 'r') as f:
l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
except:
nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing
continue
if l.shape[0]: if l.shape[0]:
assert l.shape[1] == 5, '> 5 label columns: %s' % file assert l.shape[1] == 5, '> 5 label columns: %s' % file
assert (l >= 0).all(), 'negative labels: %s' % file assert (l >= 0).all(), 'negative labels: %s' % file
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
self.labels[i] = l self.labels[i] = l
nf += 1 # file found
# Extract object detection boxes for a second stage classifier # Extract object detection boxes for a second stage classifier
if extract_bounding_boxes: if extract_bounding_boxes:
@ -233,24 +240,23 @@ class LoadImagesAndLabels(Dataset): # for training/testing
img = cv2.imread(str(p)) img = cv2.imread(str(p))
h, w, _ = img.shape h, w, _ = img.shape
for j, x in enumerate(l): for j, x in enumerate(l):
f = '%s%sclassification%s%g_%g_%s' % ( f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
p.parent.parent, os.sep, os.sep, x[0], j, p.name)
if not os.path.exists(Path(f).parent): if not os.path.exists(Path(f).parent):
os.makedirs(Path(f).parent) # make new output folder os.makedirs(Path(f).parent) # make new output folder
box = xywh2xyxy(x[1:].reshape(-1, 4)).ravel() box = xywh2xyxy(x[1:].reshape(-1, 4)).ravel()
box = np.clip(box, 0, 1) # clip boxes outside of image b = np.clip(box, 0, 1) # clip boxes outside of image
result = cv2.imwrite(f, img[int(box[1] * h):int(box[3] * h), ret_val = cv2.imwrite(f, img[int(b[1] * h):int(b[3] * h), int(b[0] * w):int(b[2] * w)])
int(box[0] * w):int(box[2] * w)]) assert ret_val, 'Failure extracting classifier boxes'
if not result: else:
print('stop') ne += 1 # file empty
except:
pass # print('Warning: missing labels for %s' % self.img_files[i]) # missing label file pbar.desc = 'Reading labels (%g found, %g missing, %g empty for %g images)' % (nf, nm, ne, n)
assert len(np.concatenate(self.labels, 0)) > 0, 'No labels found. Incorrect label paths provided.' assert nf > 0, 'No labels found. Recommend correcting image and label paths.'
# Cache images into memory for faster training (~5GB) # Cache images into memory for faster training (~5GB)
cache_images = False cache_images = False
if cache_images and augment: # if training if cache_images and augment: # if training
for i in tqdm(range(min(len(self.img_files), 10000)), desc='Caching images'): # max 10k images for i in tqdm(range(min(len(self.img_files), 10000)), desc='Reading images'): # max 10k images
img_path = self.img_files[i] img_path = self.img_files[i]
img = cv2.imread(img_path) # BGR img = cv2.imread(img_path) # BGR
assert img is not None, 'Image Not Found ' + img_path assert img is not None, 'Image Not Found ' + img_path