From 09b02d2029ffa57880793c2e593e2b54175307ad Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 31 Mar 2019 19:57:44 +0200 Subject: [PATCH] updates --- utils/datasets.py | 20 +++++++++++--------- utils/utils.py | 16 ++++++++-------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index 5128bfe8..b6397fbd 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -100,8 +100,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing assert len(self.img_files) > 0, 'No images found in %s' % path self.img_size = img_size self.augment = augment - self.label_files = [x.replace('images', 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt') - for x in self.img_files] + self.label_files = [ + x.replace('images', 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt') + for x in self.img_files] def __len__(self): return len(self.img_files) @@ -116,7 +117,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% - fraction = 0.50 + fraction = 0.50 # must be < 1.0 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) S = img_hsv[:, :, 1].astype(np.float32) V = img_hsv[:, :, 2].astype(np.float32) @@ -124,15 +125,15 @@ class LoadImagesAndLabels(Dataset): # for training/testing a = (random.random() * 2 - 1) * fraction + 1 S *= a if a > 1: - np.clip(S, a_min=0, a_max=255, out=S) + np.clip(S, None, 255, out=S) a = (random.random() * 2 - 1) * fraction + 1 V *= a if a > 1: - np.clip(V, a_min=0, a_max=255, out=V) + np.clip(V, None, 255, out=V) - img_hsv[:, :, 1] = S.astype(np.uint8) - img_hsv[:, :, 2] = V.astype(np.uint8) + img_hsv[:, :, 1] = S # .astype(np.uint8) + img_hsv[:, :, 2] = V # .astype(np.uint8) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) h, w, _ = img.shape @@ -196,7 +197,8 @@ class LoadImagesAndLabels(Dataset): # for training/testing return torch.stack(img, 0), torch.cat(label, 0), path, hw -def letterbox(img, height=416, color=(127.5, 127.5, 127.5)): # resize a rectangular image to a padded square +def letterbox(img, height=416, color=(127.5, 127.5, 127.5)): + # Resize a rectangular image to a padded square shape = img.shape[:2] # shape = [height, width] ratio = float(height) / max(shape) # ratio = old / new new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) @@ -256,7 +258,7 @@ def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale= y = xy[:, [1, 3, 5, 7]] xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T - # apply angle-based reduction + # apply angle-based reduction of bounding boxes radians = a * math.pi / 180 reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 x = (xy[:, 2] + xy[:, 0]) / 2 diff --git a/utils/utils.py b/utils/utils.py index dbc9e82a..6e70331c 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -249,7 +249,7 @@ def wh_iou(box1, box2): def compute_loss(p, targets): # predictions, targets FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor - loss, lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0]) + lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]) txy, twh, tcls, indices = targets MSE = nn.MSELoss() CE = nn.CrossEntropyLoss() @@ -267,13 +267,13 @@ def compute_loss(p, targets): # predictions, targets pi = pi0[b, a, gj, gi] # predictions closest to anchors tconf[b, a, gj, gi] = 1 # conf - lxy += k * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy - lwh += k * MSE(pi[..., 2:4], twh[i]) # wh - lcls += (k / 4) * CE(pi[..., 5:], tcls[i]) + lxy += k * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss + lwh += k * MSE(pi[..., 2:4], twh[i]) # wh loss + lcls += (k / 4) * CE(pi[..., 5:], tcls[i]) # class_conf loss # pos_weight = FT([gp[i] / min(gp) * 4.]) # BCE = nn.BCEWithLogitsLoss(pos_weight=pos_weight) - lconf += (k * 64) * BCE(pi0[..., 4], tconf) + lconf += (k * 64) * BCE(pi0[..., 4], tconf) # obj_conf loss loss = lxy + lwh + lconf + lcls # Add to dictionary @@ -300,7 +300,7 @@ def build_targets(model, targets): iou = [wh_iou(x, gwh) for x in anchor_vec] iou, a = torch.stack(iou, 0).max(0) # best iou and anchor - # reject below threshold ious (OPTIONAL) + # reject below threshold ious (OPTIONAL, increases P, lowers R) reject = True if reject: j = iou > 0.01 @@ -309,7 +309,7 @@ def build_targets(model, targets): t = targets # Indices - b, c = t[:, 0:2].long().t() # target image, class + b, c = t[:, :2].long().t() # target image, class gxy = t[:, 2:4] * nG gi, gj = gxy.long().t() # grid_i, grid_j indices.append((b, a, gj, gi)) @@ -370,7 +370,7 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): # Box (center x, center y, width, height) to (x1, y1, x2, y2) pred[:, :4] = xywh2xyxy(pred[:, :4]) - pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551 + pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551 # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred) pred = torch.cat((pred[:, :5], class_conf.unsqueeze(1), class_pred), 1)