updates

2019-10-08 13:25:50 +02:00 · 2019-10-08 13:25:50 +02:00 · a18ad6025f
parent cfc562c2c8
commit a18ad6025f
1 changed files with 144 additions and 64 deletions
--- a/utils/datasets.py
+++ b/utils/datasets.py
@ -99,7 +99,7 @@ class LoadImages:  # for inference
            print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
        # Padded resize
-        img, *_ = letterbox(img0, new_shape=self.img_size)
+        img = letterbox(img0, new_shape=self.img_size)[0]
        # Normalize RGB
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
@ -172,7 +172,7 @@ class LoadWebcam:  # for inference
        print('webcam %g: ' % self.count, end='')
        # Padded resize
-        img, *_ = letterbox(img0, new_shape=self.img_size)
+        img = letterbox(img0, new_shape=self.img_size)[0]
        # Normalize RGB
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
@ -406,69 +406,54 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
        label_path = self.label_files[index]
        hyp = self.hyp
-        # Load image
+        mosaic = True  # load 4 images at a time into a mosaic
-        img = self.imgs[index]
+        if mosaic:
-        if img is None:
+            # Load mosaic
-            img = cv2.imread(img_path)  # BGR
+            img, labels = load_mosaic(self, index)
-            assert img is not None, 'Image Not Found ' + img_path
+            h, w, _ = img.shape
            r = self.img_size / max(img.shape)  # size ratio
            if self.augment and r < 1:  # if training (NOT testing), downsize to inference shape
                h, w, _ = img.shape
                img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)  # INTER_LINEAR fastest
        # Augment colorspace
        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  # hue, sat, val
            S = img_hsv[:, :, 1].astype(np.float32)  # saturation
            V = img_hsv[:, :, 2].astype(np.float32)  # value
            a = random.uniform(-1, 1) * hyp['hsv_s'] + 1
            b = random.uniform(-1, 1) * hyp['hsv_v'] + 1
            S *= a
            V *= b
            img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
            img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
        # Letterbox
        h, w, _ = img.shape
        if self.rect:
            shape = self.batch_shapes[self.batch[index]]
            img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='rect')
        else:
-            shape = self.img_size
+            # Load image
-            img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='square')
+            img = load_image(self, index)
-        # Load labels
+            # Letterbox
-        labels = []
+            h, w, _ = img.shape
-        if os.path.isfile(label_path):
+            if self.rect:
-            x = self.labels[index]
+                img, ratio, padw, padh = letterbox(img, self.batch_shapes[self.batch[index]], mode='rect')
-            if x is None:  # labels not preloaded
+            else:
-                with open(label_path, 'r') as f:
+                img, ratio, padw, padh = letterbox(img, self.img_size, mode='square')
                    x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
-            if x.size > 0:
+            # Load labels
-                # Normalized xywh to pixel xyxy format
+            labels = []
-                labels = x.copy()
+            if os.path.isfile(label_path):
-                labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw
+                x = self.labels[index]
-                labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh
+                if x is None:  # labels not preloaded
-                labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw
+                    with open(label_path, 'r') as f:
-                labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh
+                        x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
                    labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
                    labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
                    labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh
        # Augment image and labels
        if self.augment:
-            img, labels = random_affine(img, labels,
+            # Augment colorspace
-                                        degrees=hyp['degrees'],
+            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=0.0)
                                        translate=hyp['translate'],
                                        scale=hyp['scale'],
                                        shear=hyp['shear'])
-            # Cutout
+            # Augment imagespace
-            if random.random() < 0.9:
+            g = 0.0 if mosaic else 1.0  # do not augment mosaics
-                labels = cutout(img, labels)
+            img, labels = random_affine(img, labels,
                                        degrees=hyp['degrees'] * g,
                                        translate=hyp['translate'] * g,
                                        scale=hyp['scale'] * g,
                                        shear=hyp['shear'] * g)
            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)
        nL = len(labels)  # number of labels
        if nL:
@ -513,17 +498,112 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
        return torch.stack(img, 0), torch.cat(label, 0), path, hw
 def load_image(self, index):
    # loads 1 image from dataset
    img = self.imgs[index]
    if img is None:
        img_path = self.img_files[index]
        img = cv2.imread(img_path)  # BGR
        assert img is not None, 'Image Not Found ' + img_path
        r = self.img_size / max(img.shape)  # size ratio
        if self.augment and r < 1:  # if training (NOT testing), downsize to inference shape
            h, w, _ = img.shape
            img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)  # _LINEAR fastest
    return img
 def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
    # SV augmentation by 50%
    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  # hue, sat, val
    S = img_hsv[:, :, 1].astype(np.float32)  # saturation
    V = img_hsv[:, :, 2].astype(np.float32)  # value
    a = random.uniform(-1, 1) * sgain + 1
    b = random.uniform(-1, 1) * vgain + 1
    S *= a
    V *= b
    img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
    img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
    cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
 def load_mosaic(self, index):
    # loads up images in a mosaic
    labels4 = []
    s = self.img_size
    xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)]  # mosaic center x, y
    img4 = np.zeros((s * 2, s * 2, 3), dtype=np.uint8) + 128  # base image with 4 tiles
    indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)]  # 3 additional image indices
    for i, index in enumerate(indices):
        # Load image
        img = load_image(self, index)
        h, w, _ = img.shape
        # merge img into img4
        if i == 0:  # top left
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
        elif i == 1:  # top right
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
        elif i == 2:  # bottom left
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
        elif i == 3:  # bottom right
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
        padw = x1a - x1b
        padh = y1a - y1b
        # Load labels
        label_path = self.label_files[index]
        if os.path.isfile(label_path):
            x = self.labels[index]
            if x is None:  # labels not preloaded
                with open(label_path, 'r') as f:
                    x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
            labels4.append(labels)
    labels4 = np.concatenate(labels4, 0)
    # hyp = self.hyp
    # img4, labels4 = random_affine(img4, labels4,
    #                               degrees=hyp['degrees'],
    #                               translate=hyp['translate'],
    #                               scale=hyp['scale'],
    #                               shear=hyp['shear'])
    # Center crop
    a = s // 2
    img4 = img4[a:a + s, a:a + s]
    labels4[:, 1:] -= a
    return img4, labels4
 def letterbox(img, new_shape=416, color=(128, 128, 128), mode='auto', interp=cv2.INTER_AREA):
    # Resize a rectangular image to a 32 pixel multiple rectangle
    # https://github.com/ultralytics/yolov3/issues/232
    shape = img.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
-        ratio = float(new_shape) / max(shape)
+        r = float(new_shape) / max(shape)  # ratio  = new / old
    else:
-        ratio = max(new_shape) / max(shape)  # ratio  = new / old
+        r = max(new_shape) / max(shape)
-    ratiow, ratioh = ratio, ratio
+    ratio = r, r  # width, height ratios
-    new_unpad = (int(round(shape[1] * ratio)), int(round(shape[0] * ratio)))
+    new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))
    # Compute padding https://github.com/ultralytics/yolov3/issues/232
    if mode is 'auto':  # minimum rectangle
@ -538,14 +618,14 @@ def letterbox(img, new_shape=416, color=(128, 128, 128), mode='auto', interp=cv2
    elif mode is 'scaleFill':
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape, new_shape)
-        ratiow, ratioh = new_shape / shape[1], new_shape / shape[0]
+        ratio = new_shape / shape[1], new_shape / shape[0]  # width, height ratios
    if shape[::-1] != new_unpad:  # resize
        img = cv2.resize(img, new_unpad, interpolation=interp)  # INTER_AREA is better, INTER_LINEAR is faster
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
-    return img, ratiow, ratioh, dw, dh
+    return img, ratio, dw, dh
 def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10):