From a722601ef61149cc9e5135f58c762310627c970a Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 20 Sep 2018 18:03:19 +0200
Subject: [PATCH] Adam to SGD with burn-in

---
 detect.py      |  3 +--
 models.py      | 25 +++++++++++++++++--------
 test.py        |  3 +--
 train.py       | 40 ++++++++++++++++++++++------------------
 utils/utils.py | 12 +++++++-----
 5 files changed, 48 insertions(+), 35 deletions(-)

diff --git a/detect.py b/detect.py
index 5b6c3ea3..1abfda65 100755
--- a/detect.py
+++ b/detect.py
@@ -18,7 +18,7 @@ parser.add_argument('-txt_out', type=bool, default=False)
 
 parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
 parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file')
-parser.add_argument('-conf_thres', type=float, default=0.98, help='object confidence threshold')
+parser.add_argument('-conf_thres', type=float, default=0.80, help='object confidence threshold')
 parser.add_argument('-nms_thres', type=float, default=0.45, help='iou threshold for non-maximum suppression')
 parser.add_argument('-batch_size', type=int, default=1, help='size of the batches')
 parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension')
@@ -33,7 +33,6 @@ def detect(opt):
     # Load model
     model = Darknet(opt.cfg, opt.img_size)
 
-    #weights_path = 'checkpoints/yolov3.weights'
     weights_path = 'checkpoints/yolov3.pt'
     if weights_path.endswith('.weights'):  # saved in darknet format
         load_weights(model, weights_path)
diff --git a/models.py b/models.py
index d9ed6689..3da8fa55 100755
--- a/models.py
+++ b/models.py
@@ -100,7 +100,7 @@ class YOLOLayer(nn.Module):
         self.anchor_w = self.scaled_anchors[:, 0:1].view((1, nA, 1, 1))
         self.anchor_h = self.scaled_anchors[:, 1:2].view((1, nA, 1, 1))
 
-    def forward(self, p, targets=None, requestPrecision=False, epoch=None):
+    def forward(self, p, targets=None, requestPrecision=False):
         FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor
 
         bs = p.shape[0]  # batch size
@@ -117,10 +117,18 @@ class YOLOLayer(nn.Module):
         # Get outputs
         x = torch.sigmoid(p[..., 0])  # Center x
         y = torch.sigmoid(p[..., 1])  # Center y
-        w = p[..., 2]  # Width
-        h = p[..., 3]  # Height
-        width = torch.exp(w.data) * self.anchor_w
-        height = torch.exp(h.data) * self.anchor_h
+
+        # Width and height (yolo method)
+        # w = p[..., 2]  # Width
+        # h = p[..., 3]  # Height
+        # width = torch.exp(w.data) * self.anchor_w
+        # height = torch.exp(h.data) * self.anchor_h
+
+        # Width and height (power method)
+        w = torch.sigmoid(p[..., 2])  # Width
+        h = torch.sigmoid(p[..., 3])  # Height
+        width = ((w.data * 2) ** 2) * self.anchor_w
+        height = ((h.data * 2) ** 2) * self.anchor_h
 
         # Add offset and scale with anchors (in grid space, i.e. 0-13)
         pred_boxes = FT(bs, self.nA, nG, nG, 4)
@@ -151,6 +159,7 @@ class YOLOLayer(nn.Module):
 
             # Mask outputs to ignore non-existing objects (but keep confidence predictions)
             nM = mask.sum().float()
+            batch_size = len(targets)
             nT = sum([len(x) for x in targets])
             if nM > 0:
                 lx = 5 * MSELoss(x[mask], tx[mask])
@@ -166,7 +175,7 @@ class YOLOLayer(nn.Module):
 
             lconf += 0.5 * nM * BCEWithLogitsLoss2(pred_conf[~mask], mask[~mask].float())
 
-            loss = lx + ly + lw + lh + lconf + lcls
+            loss = (lx + ly + lw + lh + lconf + lcls) / batch_size
 
             # Sum False Positives from unnasigned anchors
             i = torch.sigmoid(pred_conf[~mask]) > 0.99
@@ -202,7 +211,7 @@ class Darknet(nn.Module):
         self.img_size = img_size
         self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT', 'TP', 'FP', 'FPe', 'FN', 'TC']
 
-    def forward(self, x, targets=None, requestPrecision=False, epoch=None):
+    def forward(self, x, targets=None, requestPrecision=False):
         is_training = targets is not None
         output = []
         self.losses = defaultdict(float)
@@ -220,7 +229,7 @@ class Darknet(nn.Module):
             elif module_def['type'] == 'yolo':
                 # Train phase: get loss
                 if is_training:
-                    x, *losses = module[0](x, targets, requestPrecision, epoch)
+                    x, *losses = module[0](x, targets, requestPrecision)
                     for name, loss in zip(self.loss_names, losses):
                         self.losses[name] += loss
                 # Test phase: Get detections
diff --git a/test.py b/test.py
index 5ff81729..64415126 100644
--- a/test.py
+++ b/test.py
@@ -7,7 +7,7 @@ parser = argparse.ArgumentParser()
 parser.add_argument('-batch_size', type=int, default=32, help='size of each image batch')
 parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='path to model config file')
 parser.add_argument('-data_config_path', type=str, default='cfg/coco.data', help='path to data config file')
-parser.add_argument('-weights_path', type=str, default='checkpoints/yolov3.weights', help='path to weights file')
+parser.add_argument('-weights_path', type=str, default='checkpoints/yolov3.pt', help='path to weights file')
 parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file')
 parser.add_argument('-iou_thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
 parser.add_argument('-conf_thres', type=float, default=0.5, help='object confidence threshold')
@@ -106,7 +106,6 @@ for batch_i, (imgs, targets) in enumerate(dataloader):
                     correct.append(0)
 
         # Compute Average Precision (AP) per class
-        # target_cls = annotations[:, 0] if annotations.size(0) > 1 else annotations[0]
         AP = ap_per_class(tp=correct, conf=detections[:, 4], pred_cls=detections[:, 6], target_cls=target_cls)
 
         # Compute mean AP for this image
diff --git a/train.py b/train.py
index 25580ab8..7ab90867 100644
--- a/train.py
+++ b/train.py
@@ -65,9 +65,8 @@ def main(opt):
         #         p.requires_grad = False
 
         # Set optimizer
-        # optimizer = torch.optim.SGD(model.parameters(), lr=.001, momentum=.9, weight_decay=5e-4, nesterov=True)
         # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
-        optimizer = torch.optim.Adam(model.parameters())
+        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()))
         optimizer.load_state_dict(checkpoint['optimizer'])
 
         start_epoch = checkpoint['epoch'] + 1
@@ -79,12 +78,12 @@ def main(opt):
             print('Using ', torch.cuda.device_count(), ' GPUs')
             model = nn.DataParallel(model)
         model.to(device).train()
-        # optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=.9, weight_decay=5e-4)
-        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=5e-4)
+
+        # Set optimizer
+        # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=5e-4)
+        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=.9, weight_decay=5e-4, nesterov=True)
 
     # Set scheduler
-    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 24, eta_min=0.00001, last_epoch=-1)
-    # y = 0.001 * exp(-0.00921 * x)  # 1e-4 @ 250, 1e-5 @ 500
     # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99082, last_epoch=start_epoch - 1)
 
     modelinfo(model)
@@ -94,35 +93,40 @@ def main(opt):
     for epoch in range(opt.epochs):
         epoch += start_epoch
 
-        # Multi-Scale Training
-        # img_size = random.choice(range(10, 20)) * 32
+        # Multi-Scale YOLO Training
+        # img_size = random.choice(range(10, 20)) * 32  # 320 - 608 pixels
         # dataloader = load_images_and_labels(train_path, batch_size=opt.batch_size, img_size=img_size, augment=True)
         # print('Running this epoch with image size %g' % img_size)
 
-        # Update scheduler
-        # if epoch % 25 == 0:
-        #     scheduler.last_epoch = -1  # for cosine annealing, restart every 25 epochs
+        # Update scheduler (automatic)
         # scheduler.step()
-        # if epoch <= 100:
+
+        # Update scheduler (manual)
         # for g in optimizer.param_groups:
-        # g['lr'] = 0.0005 * (0.992 ** epoch)  # 1/10 th every 250 epochs
-        # g['lr'] = 0.001 * (0.9773 ** epoch)  # 1/10 th every 100 epochs
-        # g['lr'] = 0.0005 * (0.955 ** epoch)  # 1/10 th every 50 epochs
-        # g['lr'] = 0.0005 * (0.926 ** epoch)  # 1/10 th every 30 epochs
+        #     g['lr'] = 1e-3 * (g ** epoch)  # 1/10th every [30, 50, 100, 250] epochs using g = [.926, .955, .977, .992]
 
         ui = -1
         rloss = defaultdict(float)  # running loss
         metrics = torch.zeros(4, num_classes)
         for i, (imgs, targets) in enumerate(dataloader):
-
             if sum([len(x) for x in targets]) < 1:  # if no targets continue
                 continue
 
-            loss = model(imgs.to(device), targets, requestPrecision=True, epoch=epoch)
+            # SGD burn-in
+            if (epoch == 0) & (i <= 1000):
+                power = 4
+                lr = 1e-3 * (i / 1000) ** power
+                for g in optimizer.param_groups:
+                    g['lr'] = lr
+                # print('SGD Burn-In LR = %9.5g' % lr, end='')
+
+            # Compute loss, compute gradient, update parameters
+            loss = model(imgs.to(device), targets, requestPrecision=True)
             optimizer.zero_grad()
             loss.backward()
             optimizer.step()
 
+            # Compute running epoch-means of tracked metrics
             ui += 1
             metrics += model.losses['metrics']
             for key, val in model.losses.items():
diff --git a/utils/utils.py b/utils/utils.py
index 13b6e031..61da640c 100755
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -262,12 +262,14 @@ def build_targets(pred_boxes, pred_conf, pred_cls, target, anchor_wh, nA, nC, nG
         # Coordinates
         tx[b, a, gj, gi] = gx - gi.float()
         ty[b, a, gj, gi] = gy - gj.float()
-        # Width and height (sqrt method)
-        # tw[b, a, gj, gi] = torch.sqrt(gw / anchor_wh[a, 0]) / 2
-        # th[b, a, gj, gi] = torch.sqrt(gh / anchor_wh[a, 1]) / 2
+
+        # Width and height (power method)
+        tw[b, a, gj, gi] = torch.sqrt(gw / anchor_wh[a, 0]) / 2
+        th[b, a, gj, gi] = torch.sqrt(gh / anchor_wh[a, 1]) / 2
+
         # Width and height (yolov3 method)
-        tw[b, a, gj, gi] = torch.log(gw / anchor_wh[a, 0] + 1e-16)
-        th[b, a, gj, gi] = torch.log(gh / anchor_wh[a, 1] + 1e-16)
+        # tw[b, a, gj, gi] = torch.log(gw / anchor_wh[a, 0] + 1e-16)
+        # th[b, a, gj, gi] = torch.log(gh / anchor_wh[a, 1] + 1e-16)
 
         # One-hot encoding of label
         tcls[b, a, gj, gi, tc] = 1