From 9a18166382d07985b6c2b671fdb4b4d5bfac1ef5 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Fri, 20 Sep 2019 14:58:57 +0200
Subject: [PATCH] updates

---
 utils/datasets.py | 56 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/utils/datasets.py b/utils/datasets.py
index e380471..e5052f0 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -447,6 +447,10 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                                         scale=hyp['scale'],
                                         shear=hyp['shear'])
 
+            # Cutout
+            if random.random() < 0.9:
+                labels = cutout(img, labels)
+
         nL = len(labels)  # number of labels
         if nL:
             # convert xyxy to xywh
@@ -459,14 +463,14 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
         if self.augment:
             # random left-right flip
             lr_flip = True
-            if lr_flip and random.random() > 0.5:
+            if lr_flip and random.random() < 0.5:
                 img = np.fliplr(img)
                 if nL:
                     labels[:, 1] = 1 - labels[:, 1]
 
             # random up-down flip
             ud_flip = False
-            if ud_flip and random.random() > 0.5:
+            if ud_flip and random.random() < 0.5:
                 img = np.flipud(img)
                 if nL:
                     labels[:, 2] = 1 - labels[:, 2]
@@ -596,6 +600,54 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10)
     return imw, targets
 
 
+def cutout(image, labels):
+    # https://arxiv.org/abs/1708.04552
+    # https://github.com/hysts/pytorch_cutout/blob/master/dataloader.py
+    # https://towardsdatascience.com/when-conventional-wisdom-fails-revisiting-data-augmentation-for-self-driving-cars-4831998c5509
+    h, w = image.shape[:2]
+
+    def bbox_ioa(box1, box2, x1y1x2y2=True):
+        # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
+        box2 = box2.transpose()
+
+        # Get the coordinates of bounding boxes
+        # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+
+        # Intersection area
+        inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
+                     (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
+
+        # box2 area
+        box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
+
+        # Intersection over box2 area
+        return inter_area / box2_area
+
+    # random mask_size up to 50% image size
+    mask_h = random.randint(1, int(h * 0.5))
+    mask_w = random.randint(1, int(w * 0.5))
+
+    # box center
+    cx = random.randint(0, h)
+    cy = random.randint(0, w)
+
+    xmin = max(0, cx - mask_w // 2)
+    ymin = max(0, cy - mask_h // 2)
+    xmax = min(w, xmin + mask_w)
+    ymax = min(h, ymin + mask_h)
+
+    # apply random color mask
+    mask_color = [random.randint(0, 255) for _ in range(3)]
+    image[ymin:ymax, xmin:xmax] = mask_color
+
+    # return unobscured labels
+    box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
+    ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
+    return labels[ioa < 0.8]  # > 80% obscured labels removed
+
+
 def convert_images2bmp():
     # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
     for path in ['../coco/images/val2014/', '../coco/images/train2014/']: