From a18ad6025fb939f5be275aeae525a991e6ad5a46 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 8 Oct 2019 13:25:50 +0200
Subject: [PATCH] updates

---
 utils/datasets.py | 208 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 144 insertions(+), 64 deletions(-)

diff --git a/utils/datasets.py b/utils/datasets.py
index 6f891365..4a5152dc 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -99,7 +99,7 @@ class LoadImages:  # for inference
             print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
 
         # Padded resize
-        img, *_ = letterbox(img0, new_shape=self.img_size)
+        img = letterbox(img0, new_shape=self.img_size)[0]
 
         # Normalize RGB
         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
@@ -172,7 +172,7 @@ class LoadWebcam:  # for inference
         print('webcam %g: ' % self.count, end='')
 
         # Padded resize
-        img, *_ = letterbox(img0, new_shape=self.img_size)
+        img = letterbox(img0, new_shape=self.img_size)[0]
 
         # Normalize RGB
         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
@@ -406,69 +406,54 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
         label_path = self.label_files[index]
         hyp = self.hyp
 
-        # Load image
-        img = self.imgs[index]
-        if img is None:
-            img = cv2.imread(img_path)  # BGR
-            assert img is not None, 'Image Not Found ' + img_path
-            r = self.img_size / max(img.shape)  # size ratio
-            if self.augment and r < 1:  # if training (NOT testing), downsize to inference shape
-                h, w, _ = img.shape
-                img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)  # INTER_LINEAR fastest
+        mosaic = True  # load 4 images at a time into a mosaic
+        if mosaic:
+            # Load mosaic
+            img, labels = load_mosaic(self, index)
+            h, w, _ = img.shape
 
-        # Augment colorspace
-        augment_hsv = True
-        if self.augment and augment_hsv:
-            # SV augmentation by 50%
-            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  # hue, sat, val
-            S = img_hsv[:, :, 1].astype(np.float32)  # saturation
-            V = img_hsv[:, :, 2].astype(np.float32)  # value
-
-            a = random.uniform(-1, 1) * hyp['hsv_s'] + 1
-            b = random.uniform(-1, 1) * hyp['hsv_v'] + 1
-            S *= a
-            V *= b
-
-            img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
-            img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
-            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
-
-        # Letterbox
-        h, w, _ = img.shape
-        if self.rect:
-            shape = self.batch_shapes[self.batch[index]]
-            img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='rect')
         else:
-            shape = self.img_size
-            img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='square')
+            # Load image
+            img = load_image(self, index)
 
-        # Load labels
-        labels = []
-        if os.path.isfile(label_path):
-            x = self.labels[index]
-            if x is None:  # labels not preloaded
-                with open(label_path, 'r') as f:
-                    x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
+            # Letterbox
+            h, w, _ = img.shape
+            if self.rect:
+                img, ratio, padw, padh = letterbox(img, self.batch_shapes[self.batch[index]], mode='rect')
+            else:
+                img, ratio, padw, padh = letterbox(img, self.img_size, mode='square')
 
-            if x.size > 0:
-                # Normalized xywh to pixel xyxy format
-                labels = x.copy()
-                labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw
-                labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh
-                labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw
-                labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh
+            # Load labels
+            labels = []
+            if os.path.isfile(label_path):
+                x = self.labels[index]
+                if x is None:  # labels not preloaded
+                    with open(label_path, 'r') as f:
+                        x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
+
+                if x.size > 0:
+                    # Normalized xywh to pixel xyxy format
+                    labels = x.copy()
+                    labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
+                    labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
+                    labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
+                    labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh
 
-        # Augment image and labels
         if self.augment:
-            img, labels = random_affine(img, labels,
-                                        degrees=hyp['degrees'],
-                                        translate=hyp['translate'],
-                                        scale=hyp['scale'],
-                                        shear=hyp['shear'])
+            # Augment colorspace
+            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=0.0)
 
-            # Cutout
-            if random.random() < 0.9:
-                labels = cutout(img, labels)
+            # Augment imagespace
+            g = 0.0 if mosaic else 1.0  # do not augment mosaics
+            img, labels = random_affine(img, labels,
+                                        degrees=hyp['degrees'] * g,
+                                        translate=hyp['translate'] * g,
+                                        scale=hyp['scale'] * g,
+                                        shear=hyp['shear'] * g)
+
+            # Apply cutouts
+            # if random.random() < 0.9:
+            #     labels = cutout(img, labels)
 
         nL = len(labels)  # number of labels
         if nL:
@@ -513,17 +498,112 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
         return torch.stack(img, 0), torch.cat(label, 0), path, hw
 
 
+def load_image(self, index):
+    # loads 1 image from dataset
+    img = self.imgs[index]
+    if img is None:
+        img_path = self.img_files[index]
+        img = cv2.imread(img_path)  # BGR
+        assert img is not None, 'Image Not Found ' + img_path
+        r = self.img_size / max(img.shape)  # size ratio
+        if self.augment and r < 1:  # if training (NOT testing), downsize to inference shape
+            h, w, _ = img.shape
+            img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)  # _LINEAR fastest
+    return img
+
+
+def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
+    # SV augmentation by 50%
+    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  # hue, sat, val
+    S = img_hsv[:, :, 1].astype(np.float32)  # saturation
+    V = img_hsv[:, :, 2].astype(np.float32)  # value
+
+    a = random.uniform(-1, 1) * sgain + 1
+    b = random.uniform(-1, 1) * vgain + 1
+    S *= a
+    V *= b
+
+    img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
+    img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
+    cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
+
+
+def load_mosaic(self, index):
+    # loads up images in a mosaic
+
+    labels4 = []
+    s = self.img_size
+    xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)]  # mosaic center x, y
+    img4 = np.zeros((s * 2, s * 2, 3), dtype=np.uint8) + 128  # base image with 4 tiles
+    indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)]  # 3 additional image indices
+    for i, index in enumerate(indices):
+        # Load image
+        img = load_image(self, index)
+        h, w, _ = img.shape
+
+        # merge img into img4
+        if i == 0:  # top left
+            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+        elif i == 1:  # top right
+            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+        elif i == 2:  # bottom left
+            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
+        elif i == 3:  # bottom right
+            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+        padw = x1a - x1b
+        padh = y1a - y1b
+
+        # Load labels
+        label_path = self.label_files[index]
+        if os.path.isfile(label_path):
+            x = self.labels[index]
+            if x is None:  # labels not preloaded
+                with open(label_path, 'r') as f:
+                    x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
+
+            if x.size > 0:
+                # Normalized xywh to pixel xyxy format
+                labels = x.copy()
+                labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
+                labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
+                labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
+                labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
+
+            labels4.append(labels)
+    labels4 = np.concatenate(labels4, 0)
+
+    # hyp = self.hyp
+    # img4, labels4 = random_affine(img4, labels4,
+    #                               degrees=hyp['degrees'],
+    #                               translate=hyp['translate'],
+    #                               scale=hyp['scale'],
+    #                               shear=hyp['shear'])
+
+    # Center crop
+    a = s // 2
+    img4 = img4[a:a + s, a:a + s]
+    labels4[:, 1:] -= a
+
+    return img4, labels4
+
+
 def letterbox(img, new_shape=416, color=(128, 128, 128), mode='auto', interp=cv2.INTER_AREA):
     # Resize a rectangular image to a 32 pixel multiple rectangle
     # https://github.com/ultralytics/yolov3/issues/232
     shape = img.shape[:2]  # current shape [height, width]
 
     if isinstance(new_shape, int):
-        ratio = float(new_shape) / max(shape)
+        r = float(new_shape) / max(shape)  # ratio  = new / old
     else:
-        ratio = max(new_shape) / max(shape)  # ratio  = new / old
-    ratiow, ratioh = ratio, ratio
-    new_unpad = (int(round(shape[1] * ratio)), int(round(shape[0] * ratio)))
+        r = max(new_shape) / max(shape)
+    ratio = r, r  # width, height ratios
+    new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))
 
     # Compute padding https://github.com/ultralytics/yolov3/issues/232
     if mode is 'auto':  # minimum rectangle
@@ -538,14 +618,14 @@ def letterbox(img, new_shape=416, color=(128, 128, 128), mode='auto', interp=cv2
     elif mode is 'scaleFill':
         dw, dh = 0.0, 0.0
         new_unpad = (new_shape, new_shape)
-        ratiow, ratioh = new_shape / shape[1], new_shape / shape[0]
+        ratio = new_shape / shape[1], new_shape / shape[0]  # width, height ratios
 
     if shape[::-1] != new_unpad:  # resize
         img = cv2.resize(img, new_unpad, interpolation=interp)  # INTER_AREA is better, INTER_LINEAR is faster
     top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
     left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
     img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
-    return img, ratiow, ratioh, dw, dh
+    return img, ratio, dw, dh
 
 
 def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10):