updates

2019-03-31 19:57:44 +02:00 · 2019-03-31 19:57:44 +02:00 · 09b02d2029
parent 8901e96a38
commit 09b02d2029
2 changed files with 19 additions and 17 deletions
--- a/utils/datasets.py
+++ b/utils/datasets.py
@ -100,8 +100,9 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
        assert len(self.img_files) > 0, 'No images found in %s' % path
        self.img_size = img_size
        self.augment = augment
-        self.label_files = [x.replace('images', 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')
-                            for x in self.img_files]
+        self.label_files = [
+            x.replace('images', 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')
+            for x in self.img_files]

    def __len__(self):
        return len(self.img_files)
@ -116,7 +117,7 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
-            fraction = 0.50
+            fraction = 0.50  # must be < 1.0
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            S = img_hsv[:, :, 1].astype(np.float32)
            V = img_hsv[:, :, 2].astype(np.float32)
@ -124,15 +125,15 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
            a = (random.random() * 2 - 1) * fraction + 1
            S *= a
            if a > 1:
-                np.clip(S, a_min=0, a_max=255, out=S)
+                np.clip(S, None, 255, out=S)

            a = (random.random() * 2 - 1) * fraction + 1
            V *= a
            if a > 1:
-                np.clip(V, a_min=0, a_max=255, out=V)
+                np.clip(V, None, 255, out=V)

-            img_hsv[:, :, 1] = S.astype(np.uint8)
-            img_hsv[:, :, 2] = V.astype(np.uint8)
+            img_hsv[:, :, 1] = S  # .astype(np.uint8)
+            img_hsv[:, :, 2] = V  # .astype(np.uint8)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        h, w, _ = img.shape
@ -196,7 +197,8 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
        return torch.stack(img, 0), torch.cat(label, 0), path, hw


-def letterbox(img, height=416, color=(127.5, 127.5, 127.5)):  # resize a rectangular image to a padded square
+def letterbox(img, height=416, color=(127.5, 127.5, 127.5)):
+    # Resize a rectangular image to a padded square
    shape = img.shape[:2]  # shape = [height, width]
    ratio = float(height) / max(shape)  # ratio  = old / new
    new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
@ -256,7 +258,7 @@ def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=
        y = xy[:, [1, 3, 5, 7]]
        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

-        # apply angle-based reduction
+        # apply angle-based reduction of bounding boxes
        radians = a * math.pi / 180
        reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
        x = (xy[:, 2] + xy[:, 0]) / 2
--- a/utils/utils.py
+++ b/utils/utils.py
@ -249,7 +249,7 @@ def wh_iou(box1, box2):

 def compute_loss(p, targets):  # predictions, targets
    FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor
-    loss, lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0])
+    lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0])
    txy, twh, tcls, indices = targets
    MSE = nn.MSELoss()
    CE = nn.CrossEntropyLoss()
@ -267,13 +267,13 @@ def compute_loss(p, targets):  # predictions, targets
            pi = pi0[b, a, gj, gi]  # predictions closest to anchors
            tconf[b, a, gj, gi] = 1  # conf

-            lxy += k * MSE(torch.sigmoid(pi[..., 0:2]), txy[i])  # xy
-            lwh += k * MSE(pi[..., 2:4], twh[i])  # wh
-            lcls += (k / 4) * CE(pi[..., 5:], tcls[i])
+            lxy += k * MSE(torch.sigmoid(pi[..., 0:2]), txy[i])  # xy loss
+            lwh += k * MSE(pi[..., 2:4], twh[i])  # wh loss
+            lcls += (k / 4) * CE(pi[..., 5:], tcls[i])  # class_conf loss

        # pos_weight = FT([gp[i] / min(gp) * 4.])
        # BCE = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
-        lconf += (k * 64) * BCE(pi0[..., 4], tconf)
+        lconf += (k * 64) * BCE(pi0[..., 4], tconf)  # obj_conf loss
    loss = lxy + lwh + lconf + lcls

    # Add to dictionary
@ -300,7 +300,7 @@ def build_targets(model, targets):
        iou = [wh_iou(x, gwh) for x in anchor_vec]
        iou, a = torch.stack(iou, 0).max(0)  # best iou and anchor

-        # reject below threshold ious (OPTIONAL)
+        # reject below threshold ious (OPTIONAL, increases P, lowers R)
        reject = True
        if reject:
            j = iou > 0.01
@ -309,7 +309,7 @@ def build_targets(model, targets):
            t = targets

        # Indices
-        b, c = t[:, 0:2].long().t()  # target image, class
+        b, c = t[:, :2].long().t()  # target image, class
        gxy = t[:, 2:4] * nG
        gi, gj = gxy.long().t()  # grid_i, grid_j
        indices.append((b, a, gj, gi))
@ -370,7 +370,7 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        pred[:, :4] = xywh2xyxy(pred[:, :4])
-        pred[:, 4] *= class_conf   # improves mAP from 0.549 to 0.551
+        pred[:, 4] *= class_conf  # improves mAP from 0.549 to 0.551

        # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
        pred = torch.cat((pred[:, :5], class_conf.unsqueeze(1), class_pred), 1)