From 9a18166382d07985b6c2b671fdb4b4d5bfac1ef5 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 20 Sep 2019 14:58:57 +0200 Subject: [PATCH] updates --- utils/datasets.py | 56 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index e3804711..e5052f03 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -447,6 +447,10 @@ class LoadImagesAndLabels(Dataset): # for training/testing scale=hyp['scale'], shear=hyp['shear']) + # Cutout + if random.random() < 0.9: + labels = cutout(img, labels) + nL = len(labels) # number of labels if nL: # convert xyxy to xywh @@ -459,14 +463,14 @@ class LoadImagesAndLabels(Dataset): # for training/testing if self.augment: # random left-right flip lr_flip = True - if lr_flip and random.random() > 0.5: + if lr_flip and random.random() < 0.5: img = np.fliplr(img) if nL: labels[:, 1] = 1 - labels[:, 1] # random up-down flip ud_flip = False - if ud_flip and random.random() > 0.5: + if ud_flip and random.random() < 0.5: img = np.flipud(img) if nL: labels[:, 2] = 1 - labels[:, 2] @@ -596,6 +600,54 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10) return imw, targets +def cutout(image, labels): + # https://arxiv.org/abs/1708.04552 + # https://github.com/hysts/pytorch_cutout/blob/master/dataloader.py + # https://towardsdatascience.com/when-conventional-wisdom-fails-revisiting-data-augmentation-for-self-driving-cars-4831998c5509 + h, w = image.shape[:2] + + def bbox_ioa(box1, box2, x1y1x2y2=True): + # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2 + box2 = box2.transpose() + + # Get the coordinates of bounding boxes + # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + + # Intersection area + inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \ + (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) + + # box2 area + box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16 + + # Intersection over box2 area + return inter_area / box2_area + + # random mask_size up to 50% image size + mask_h = random.randint(1, int(h * 0.5)) + mask_w = random.randint(1, int(w * 0.5)) + + # box center + cx = random.randint(0, h) + cy = random.randint(0, w) + + xmin = max(0, cx - mask_w // 2) + ymin = max(0, cy - mask_h // 2) + xmax = min(w, xmin + mask_w) + ymax = min(h, ymin + mask_h) + + # apply random color mask + mask_color = [random.randint(0, 255) for _ in range(3)] + image[ymin:ymax, xmin:xmax] = mask_color + + # return unobscured labels + box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) + ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area + return labels[ioa < 0.8] # > 80% obscured labels removed + + def convert_images2bmp(): # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s for path in ['../coco/images/val2014/', '../coco/images/train2014/']: