From a722601ef61149cc9e5135f58c762310627c970a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 20 Sep 2018 18:03:19 +0200 Subject: [PATCH] Adam to SGD with burn-in --- detect.py | 3 +-- models.py | 25 +++++++++++++++++-------- test.py | 3 +-- train.py | 40 ++++++++++++++++++++++------------------ utils/utils.py | 12 +++++++----- 5 files changed, 48 insertions(+), 35 deletions(-) diff --git a/detect.py b/detect.py index 5b6c3ea3..1abfda65 100755 --- a/detect.py +++ b/detect.py @@ -18,7 +18,7 @@ parser.add_argument('-txt_out', type=bool, default=False) parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path') parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file') -parser.add_argument('-conf_thres', type=float, default=0.98, help='object confidence threshold') +parser.add_argument('-conf_thres', type=float, default=0.80, help='object confidence threshold') parser.add_argument('-nms_thres', type=float, default=0.45, help='iou threshold for non-maximum suppression') parser.add_argument('-batch_size', type=int, default=1, help='size of the batches') parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension') @@ -33,7 +33,6 @@ def detect(opt): # Load model model = Darknet(opt.cfg, opt.img_size) - #weights_path = 'checkpoints/yolov3.weights' weights_path = 'checkpoints/yolov3.pt' if weights_path.endswith('.weights'): # saved in darknet format load_weights(model, weights_path) diff --git a/models.py b/models.py index d9ed6689..3da8fa55 100755 --- a/models.py +++ b/models.py @@ -100,7 +100,7 @@ class YOLOLayer(nn.Module): self.anchor_w = self.scaled_anchors[:, 0:1].view((1, nA, 1, 1)) self.anchor_h = self.scaled_anchors[:, 1:2].view((1, nA, 1, 1)) - def forward(self, p, targets=None, requestPrecision=False, epoch=None): + def forward(self, p, targets=None, requestPrecision=False): FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor bs = p.shape[0] # batch size @@ -117,10 +117,18 @@ class YOLOLayer(nn.Module): # Get outputs x = torch.sigmoid(p[..., 0]) # Center x y = torch.sigmoid(p[..., 1]) # Center y - w = p[..., 2] # Width - h = p[..., 3] # Height - width = torch.exp(w.data) * self.anchor_w - height = torch.exp(h.data) * self.anchor_h + + # Width and height (yolo method) + # w = p[..., 2] # Width + # h = p[..., 3] # Height + # width = torch.exp(w.data) * self.anchor_w + # height = torch.exp(h.data) * self.anchor_h + + # Width and height (power method) + w = torch.sigmoid(p[..., 2]) # Width + h = torch.sigmoid(p[..., 3]) # Height + width = ((w.data * 2) ** 2) * self.anchor_w + height = ((h.data * 2) ** 2) * self.anchor_h # Add offset and scale with anchors (in grid space, i.e. 0-13) pred_boxes = FT(bs, self.nA, nG, nG, 4) @@ -151,6 +159,7 @@ class YOLOLayer(nn.Module): # Mask outputs to ignore non-existing objects (but keep confidence predictions) nM = mask.sum().float() + batch_size = len(targets) nT = sum([len(x) for x in targets]) if nM > 0: lx = 5 * MSELoss(x[mask], tx[mask]) @@ -166,7 +175,7 @@ class YOLOLayer(nn.Module): lconf += 0.5 * nM * BCEWithLogitsLoss2(pred_conf[~mask], mask[~mask].float()) - loss = lx + ly + lw + lh + lconf + lcls + loss = (lx + ly + lw + lh + lconf + lcls) / batch_size # Sum False Positives from unnasigned anchors i = torch.sigmoid(pred_conf[~mask]) > 0.99 @@ -202,7 +211,7 @@ class Darknet(nn.Module): self.img_size = img_size self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT', 'TP', 'FP', 'FPe', 'FN', 'TC'] - def forward(self, x, targets=None, requestPrecision=False, epoch=None): + def forward(self, x, targets=None, requestPrecision=False): is_training = targets is not None output = [] self.losses = defaultdict(float) @@ -220,7 +229,7 @@ class Darknet(nn.Module): elif module_def['type'] == 'yolo': # Train phase: get loss if is_training: - x, *losses = module[0](x, targets, requestPrecision, epoch) + x, *losses = module[0](x, targets, requestPrecision) for name, loss in zip(self.loss_names, losses): self.losses[name] += loss # Test phase: Get detections diff --git a/test.py b/test.py index 5ff81729..64415126 100644 --- a/test.py +++ b/test.py @@ -7,7 +7,7 @@ parser = argparse.ArgumentParser() parser.add_argument('-batch_size', type=int, default=32, help='size of each image batch') parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='path to model config file') parser.add_argument('-data_config_path', type=str, default='cfg/coco.data', help='path to data config file') -parser.add_argument('-weights_path', type=str, default='checkpoints/yolov3.weights', help='path to weights file') +parser.add_argument('-weights_path', type=str, default='checkpoints/yolov3.pt', help='path to weights file') parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file') parser.add_argument('-iou_thres', type=float, default=0.5, help='iou threshold required to qualify as detected') parser.add_argument('-conf_thres', type=float, default=0.5, help='object confidence threshold') @@ -106,7 +106,6 @@ for batch_i, (imgs, targets) in enumerate(dataloader): correct.append(0) # Compute Average Precision (AP) per class - # target_cls = annotations[:, 0] if annotations.size(0) > 1 else annotations[0] AP = ap_per_class(tp=correct, conf=detections[:, 4], pred_cls=detections[:, 6], target_cls=target_cls) # Compute mean AP for this image diff --git a/train.py b/train.py index 25580ab8..7ab90867 100644 --- a/train.py +++ b/train.py @@ -65,9 +65,8 @@ def main(opt): # p.requires_grad = False # Set optimizer - # optimizer = torch.optim.SGD(model.parameters(), lr=.001, momentum=.9, weight_decay=5e-4, nesterov=True) # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters())) - optimizer = torch.optim.Adam(model.parameters()) + optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters())) optimizer.load_state_dict(checkpoint['optimizer']) start_epoch = checkpoint['epoch'] + 1 @@ -79,12 +78,12 @@ def main(opt): print('Using ', torch.cuda.device_count(), ' GPUs') model = nn.DataParallel(model) model.to(device).train() - # optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=.9, weight_decay=5e-4) - optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=5e-4) + + # Set optimizer + # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=5e-4) + optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=.9, weight_decay=5e-4, nesterov=True) # Set scheduler - # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 24, eta_min=0.00001, last_epoch=-1) - # y = 0.001 * exp(-0.00921 * x) # 1e-4 @ 250, 1e-5 @ 500 # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99082, last_epoch=start_epoch - 1) modelinfo(model) @@ -94,35 +93,40 @@ def main(opt): for epoch in range(opt.epochs): epoch += start_epoch - # Multi-Scale Training - # img_size = random.choice(range(10, 20)) * 32 + # Multi-Scale YOLO Training + # img_size = random.choice(range(10, 20)) * 32 # 320 - 608 pixels # dataloader = load_images_and_labels(train_path, batch_size=opt.batch_size, img_size=img_size, augment=True) # print('Running this epoch with image size %g' % img_size) - # Update scheduler - # if epoch % 25 == 0: - # scheduler.last_epoch = -1 # for cosine annealing, restart every 25 epochs + # Update scheduler (automatic) # scheduler.step() - # if epoch <= 100: + + # Update scheduler (manual) # for g in optimizer.param_groups: - # g['lr'] = 0.0005 * (0.992 ** epoch) # 1/10 th every 250 epochs - # g['lr'] = 0.001 * (0.9773 ** epoch) # 1/10 th every 100 epochs - # g['lr'] = 0.0005 * (0.955 ** epoch) # 1/10 th every 50 epochs - # g['lr'] = 0.0005 * (0.926 ** epoch) # 1/10 th every 30 epochs + # g['lr'] = 1e-3 * (g ** epoch) # 1/10th every [30, 50, 100, 250] epochs using g = [.926, .955, .977, .992] ui = -1 rloss = defaultdict(float) # running loss metrics = torch.zeros(4, num_classes) for i, (imgs, targets) in enumerate(dataloader): - if sum([len(x) for x in targets]) < 1: # if no targets continue continue - loss = model(imgs.to(device), targets, requestPrecision=True, epoch=epoch) + # SGD burn-in + if (epoch == 0) & (i <= 1000): + power = 4 + lr = 1e-3 * (i / 1000) ** power + for g in optimizer.param_groups: + g['lr'] = lr + # print('SGD Burn-In LR = %9.5g' % lr, end='') + + # Compute loss, compute gradient, update parameters + loss = model(imgs.to(device), targets, requestPrecision=True) optimizer.zero_grad() loss.backward() optimizer.step() + # Compute running epoch-means of tracked metrics ui += 1 metrics += model.losses['metrics'] for key, val in model.losses.items(): diff --git a/utils/utils.py b/utils/utils.py index 13b6e031..61da640c 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -262,12 +262,14 @@ def build_targets(pred_boxes, pred_conf, pred_cls, target, anchor_wh, nA, nC, nG # Coordinates tx[b, a, gj, gi] = gx - gi.float() ty[b, a, gj, gi] = gy - gj.float() - # Width and height (sqrt method) - # tw[b, a, gj, gi] = torch.sqrt(gw / anchor_wh[a, 0]) / 2 - # th[b, a, gj, gi] = torch.sqrt(gh / anchor_wh[a, 1]) / 2 + + # Width and height (power method) + tw[b, a, gj, gi] = torch.sqrt(gw / anchor_wh[a, 0]) / 2 + th[b, a, gj, gi] = torch.sqrt(gh / anchor_wh[a, 1]) / 2 + # Width and height (yolov3 method) - tw[b, a, gj, gi] = torch.log(gw / anchor_wh[a, 0] + 1e-16) - th[b, a, gj, gi] = torch.log(gh / anchor_wh[a, 1] + 1e-16) + # tw[b, a, gj, gi] = torch.log(gw / anchor_wh[a, 0] + 1e-16) + # th[b, a, gj, gi] = torch.log(gh / anchor_wh[a, 1] + 1e-16) # One-hot encoding of label tcls[b, a, gj, gi, tc] = 1