diff --git a/models.py b/models.py index 2b82ce69..b6538fc4 100755 --- a/models.py +++ b/models.py @@ -141,7 +141,7 @@ class YOLOLayer(nn.Module): self.grid_xy = torch.cat((self.grid_x, self.grid_y), 2) self.anchor_wh = torch.cat((self.anchor_w, self.anchor_h), 2) / nG - def forward(self, p, targets=None, batch_report=False, var=None): + def forward(self, p, targets=None, var=None): FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor bs = 1 if ONNX_EXPORT else p.shape[0] # batch size nG = self.nG # number of grid points @@ -178,18 +178,7 @@ class YOLOLayer(nn.Module): # width = ((w.data * 2) ** 2) * self.anchor_w # height = ((h.data * 2) ** 2) * self.anchor_h - p_boxes = None - if batch_report: - # Predicted boxes: add offset and scale with anchors (in grid space, i.e. 0-13) - gx = x.data + self.grid_x[:, :, :nG, :nG] - gy = y.data + self.grid_y[:, :, :nG, :nG] - p_boxes = torch.stack((gx - width / 2, - gy - height / 2, - gx + width / 2, - gy + height / 2), 4) # x1y1x2y2 - - tx, ty, tw, th, mask, tcls, TP, FP, FN, TC = \ - build_targets(p_boxes, p_conf, p_cls, targets, self.anchor_wh, self.nA, self.nC, nG, batch_report) + tx, ty, tw, th, mask, tcls = build_targets(targets, self.anchor_wh, self.nA, self.nC, nG) tcls = tcls[mask] if x.is_cuda: @@ -214,26 +203,9 @@ class YOLOLayer(nn.Module): lconf = (k * 64) * BCEWithLogitsLoss(p_conf, mask.float()) # Sum loss components - balance_losses_flag = False - if balance_losses_flag: - k = 1 / self.loss_means.clone() - loss = (lx * k[0] + ly * k[1] + lw * k[2] + lh * k[3] + lconf * k[4] + lcls * k[5]) / k.mean() + loss = lx + ly + lw + lh + lconf + lcls - self.loss_means = self.loss_means * 0.99 + \ - FT([lx.data, ly.data, lw.data, lh.data, lconf.data, lcls.data]) * 0.01 - else: - loss = lx + ly + lw + lh + lconf + lcls - - # Sum False Positives from unassigned anchors - FPe = torch.zeros(self.nC) - if batch_report: - i = torch.sigmoid(p_conf[~mask]) > 0.5 - if i.sum() > 0: - FP_classes = torch.argmax(p_cls[~mask][i], 1) - FPe = torch.bincount(FP_classes, minlength=self.nC).float().cpu() # extra FPs - - return loss, loss.item(), lx.item(), ly.item(), lw.item(), lh.item(), lconf.item(), lcls.item(), \ - nT, TP, FP, FPe, FN, TC + return loss, loss.item(), lx.item(), ly.item(), lw.item(), lh.item(), lconf.item(), lcls.item(), nT else: if ONNX_EXPORT: @@ -273,9 +245,9 @@ class Darknet(nn.Module): self.module_defs[0]['height'] = img_size self.hyperparams, self.module_list = create_modules(self.module_defs) self.img_size = img_size - self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT', 'TP', 'FP', 'FPe', 'FN', 'TC'] + self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT'] - def forward(self, x, targets=None, batch_report=False, var=0): + def forward(self, x, targets=None, var=0): self.losses = defaultdict(float) is_training = targets is not None layer_outputs = [] @@ -296,7 +268,7 @@ class Darknet(nn.Module): elif module_def['type'] == 'yolo': # Train phase: get loss if is_training: - x, *losses = module[0](x, targets, batch_report, var) + x, *losses = module[0](x, targets, var) for name, loss in zip(self.loss_names, losses): self.losses[name] += loss # Test phase: Get detections @@ -306,29 +278,7 @@ class Darknet(nn.Module): layer_outputs.append(x) if is_training: - if batch_report: - self.losses['TC'] /= 3 # target category - metrics = torch.zeros(3, len(self.losses['FPe'])) # TP, FP, FN - - ui = np.unique(self.losses['TC'])[1:] - for i in ui: - j = self.losses['TC'] == float(i) - metrics[0, i] = (self.losses['TP'][j] > 0).sum().float() # TP - metrics[1, i] = (self.losses['FP'][j] > 0).sum().float() # FP - metrics[2, i] = (self.losses['FN'][j] == 3).sum().float() # FN - metrics[1] += self.losses['FPe'] - - self.losses['TP'] = metrics[0].sum() - self.losses['FP'] = metrics[1].sum() - self.losses['FN'] = metrics[2].sum() - self.losses['metrics'] = metrics - else: - self.losses['TP'] = 0 - self.losses['FP'] = 0 - self.losses['FN'] = 0 - self.losses['nT'] /= 3 - self.losses['TC'] = 0 if ONNX_EXPORT: output = torch.cat(output, 1) # merge the 3 layers 85 x (507, 2028, 8112) to 85 x 10647 diff --git a/train.py b/train.py index 293ceee1..4364712d 100644 --- a/train.py +++ b/train.py @@ -20,7 +20,6 @@ def train( batch_size=16, accumulated_batches=1, weights='weights', - report=False, multi_scale=False, freeze_backbone=True, var=0, @@ -30,7 +29,7 @@ def train( if multi_scale: # pass maximum multi_scale size img_size = 608 else: - torch.backends.cudnn.benchmark = True + torch.backends.cudnn.benchmark = True # unsuitable for multiscale latest = os.path.join(weights, 'latest.pt') best = os.path.join(weights, 'best.pt') @@ -93,12 +92,11 @@ def train( model_info(model) t0 = time.time() - mean_recall, mean_precision = 0, 0 for epoch in range(epochs): epoch += start_epoch - print(('%8s%12s' + '%10s' * 14) % ('Epoch', 'Batch', 'x', 'y', 'w', 'h', 'conf', 'cls', 'total', 'P', 'R', - 'nTargets', 'TP', 'FP', 'FN', 'time')) + print(('%8s%12s' + '%10s' * 9) % ( + 'Epoch', 'Batch', 'x', 'y', 'w', 'h', 'conf', 'cls', 'total', 'nTargets', 'time')) # Update scheduler (automatic) # scheduler.step() @@ -124,7 +122,6 @@ def train( ui = -1 rloss = defaultdict(float) # running loss - metrics = torch.zeros(3, num_classes) optimizer.zero_grad() for i, (imgs, targets) in enumerate(dataloader): if sum([len(x) for x in targets]) < 1: # if no targets continue @@ -137,7 +134,7 @@ def train( g['lr'] = lr # Compute loss, compute gradient, update parameters - loss = model(imgs.to(device), targets, batch_report=report, var=var) + loss = model(imgs.to(device), targets, var=var) loss.backward() # accumulate gradient for x batches before optimizing @@ -150,27 +147,10 @@ def train( for key, val in model.losses.items(): rloss[key] = (rloss[key] * ui + val) / (ui + 1) - if report: - TP, FP, FN = metrics - metrics += model.losses['metrics'] - - # Precision - precision = TP / (TP + FP) - k = (TP + FP) > 0 - if k.sum() > 0: - mean_precision = precision[k].mean() - - # Recall - recall = TP / (TP + FN) - k = (TP + FN) > 0 - if k.sum() > 0: - mean_recall = recall[k].mean() - - s = ('%8s%12s' + '%10.3g' * 14) % ( + s = ('%8s%12s' + '%10.3g' * 9) % ( '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, len(dataloader) - 1), rloss['x'], rloss['y'], rloss['w'], rloss['h'], rloss['conf'], rloss['cls'], - rloss['loss'], mean_precision, mean_recall, model.losses['nT'], model.losses['TP'], - model.losses['FP'], model.losses['FN'], time.time() - t0) + rloss['loss'], model.losses['nT'], time.time() - t0) t0 = time.time() print(s) @@ -214,9 +194,8 @@ if __name__ == '__main__': parser.add_argument('--img-size', type=int, default=32 * 13, help='pixels') parser.add_argument('--weights', type=str, default='weights', help='path to store weights') parser.add_argument('--resume', action='store_true', help='resume training flag') - parser.add_argument('--report', action='store_true', help='report TP, FP, FN, P and R per batch (slower)') parser.add_argument('--freeze', action='store_true', help='freeze darknet53.conv.74 layers for first epoch') - parser.add_argument('--var', type=float, default=0, help='optional test variable') + parser.add_argument('--var', type=float, default=0, help='test variable') opt = parser.parse_args() print(opt, end='\n\n') @@ -231,7 +210,6 @@ if __name__ == '__main__': batch_size=opt.batch_size, accumulated_batches=opt.accumulated_batches, weights=opt.weights, - report=opt.report, multi_scale=opt.multi_scale, freeze_backbone=opt.freeze, var=opt.var, diff --git a/utils/utils.py b/utils/utils.py index 7209c9bf..3dd2dcd1 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -214,7 +214,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True): return inter_area / (b1_area + b2_area - inter_area + 1e-16) -def build_targets(pred_boxes, pred_conf, pred_cls, target, anchor_wh, nA, nC, nG, batch_report): +def build_targets(target, anchor_wh, nA, nC, nG): """ returns nT, nCorrect, tx, ty, tw, th, tconf, tcls """ @@ -226,9 +226,6 @@ def build_targets(pred_boxes, pred_conf, pred_cls, target, anchor_wh, nA, nC, nG th = torch.zeros(nB, nA, nG, nG) tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0) tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0) # nC = number of classes - TP = torch.ByteTensor(nB, max(nT)).fill_(0) - FP = torch.ByteTensor(nB, max(nT)).fill_(0) - FN = torch.ByteTensor(nB, max(nT)).fill_(0) TC = torch.ShortTensor(nB, max(nT)).fill_(-1) # target category for b in range(nB): @@ -293,18 +290,7 @@ def build_targets(pred_boxes, pred_conf, pred_cls, target, anchor_wh, nA, nC, nG tcls[b, a, gj, gi, tc] = 1 tconf[b, a, gj, gi] = 1 - if batch_report: - # predicted classes and confidence - tb = torch.cat((gx - gw / 2, gy - gh / 2, gx + gw / 2, gy + gh / 2)).view(4, -1).t() # target boxes - pcls = torch.argmax(pred_cls[b, a, gj, gi], 1).cpu() - pconf = torch.sigmoid(pred_conf[b, a, gj, gi]).cpu() - iou_pred = bbox_iou(tb, pred_boxes[b, a, gj, gi].cpu()) - - TP[b, i] = (pconf > 0.5) & (iou_pred > 0.5) & (pcls == tc) - FP[b, i] = (pconf > 0.5) & (TP[b, i] == 0) # coordinates or class are wrong - FN[b, i] = pconf <= 0.5 # confidence score is too low (set to zero) - - return tx, ty, tw, th, tconf, tcls, TP, FP, FN, TC + return tx, ty, tw, th, tconf, tcls def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):