From 790e25592fd08073a46fce1cad83544bdb0b4f3b Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sat, 24 Aug 2019 20:55:01 +0200
Subject: [PATCH] removed xy/wh loss reporting

---
 test.py        | 18 +++++++++---------
 train.py       | 20 ++++++++++++++++----
 utils/utils.py | 23 +++++++++++++++--------
 3 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/test.py b/test.py
index bf063102..0ea8c0b3 100644
--- a/test.py
+++ b/test.py
@@ -205,12 +205,12 @@ if __name__ == '__main__':
     print(opt)
 
     with torch.no_grad():
-        results = test(opt.cfg,
-                       opt.data,
-                       opt.weights,
-                       opt.batch_size,
-                       opt.img_size,
-                       opt.iou_thres,
-                       opt.conf_thres,
-                       opt.nms_thres,
-                       opt.save_json)
+        test(opt.cfg,
+             opt.data,
+             opt.weights,
+             opt.batch_size,
+             opt.img_size,
+             opt.iou_thres,
+             opt.conf_thres,
+             opt.nms_thres,
+             opt.save_json)
diff --git a/train.py b/train.py
index 3eed7edd..9f27c853 100644
--- a/train.py
+++ b/train.py
@@ -39,7 +39,7 @@ def train():
     cfg = opt.cfg
     data = opt.data
     img_size = opt.img_size
-    epochs = opt.epochs  # 500200 batches at bs 16, 117263 images = 273 epochs
+    epochs = 1 if opt.prebias else opt.epochs  # 500200 batches at bs 16, 117263 images = 273 epochs
     batch_size = opt.batch_size
     accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
     weights = opt.weights  # initial training weights
@@ -105,7 +105,7 @@ def train():
         # possible weights are 'yolov3.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
         cutoff = load_darknet_weights(model, weights)
 
-    if opt.transfer:  # transfer learning edge (yolo) layers
+    if opt.transfer or opt.prebias:  # transfer learning edge (yolo) layers
         nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters'])  # yolo layer size (i.e. 255)
 
         for x in optimizer.param_groups:
@@ -114,7 +114,12 @@ def train():
             x['momentum'] *= 0.9
 
         for p in model.parameters():
-            p.requires_grad = True if p.shape[0] == nf else False
+            if opt.prebias and p.numel() == nf:  # train yolo biases only
+                p.requires_grad = True
+            elif opt.transfer and p.shape[0] == nf:  # train yolo biases+weights only
+                p.requires_grad = True
+            else:
+                p.requires_grad = False
 
     # Scheduler https://github.com/ultralytics/yolov3/issues/238
     # lf = lambda x: 1 - x / epochs  # linear ramp to zero
@@ -349,6 +354,7 @@ if __name__ == '__main__':
     parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
     parser.add_argument('--weights', type=str, default='', help='initial weights')  # i.e. weights/darknet.53.conv.74
     parser.add_argument('--arc', type=str, default='default', help='yolo architecture')  # default, uCE, uBCE
+    parser.add_argument('--prebias', action='store_true', help='transfer-learn yolo biases prior to training')
     opt = parser.parse_args()
     opt.weights = 'weights/last.pt' if opt.resume else opt.weights
     print(opt)
@@ -363,7 +369,13 @@ if __name__ == '__main__':
         except:
             pass
 
-        results = train()
+        if opt.prebias:
+            train()  # transfer-learn yolo biases for 1 epoch
+            create_backbone('weights/last.pt')  # saved results as backbone.pt
+            opt.weights = 'weights/backbone.pt'  # assign backbone
+            opt.prebias = False  # disable prebias and train normally
+
+        train()
 
     else:  # Evolve hyperparameters (optional)
         opt.notest = True  # only test final epoch
diff --git a/utils/utils.py b/utils/utils.py
index b6977ee2..f13ccea1 100755
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -562,20 +562,27 @@ def print_model_biases(model):
               'classification: %.2f+/-%.2f' % (b[:, 5:].mean(), b[:, 5:].std()))
 
 
-def strip_optimizer(f='weights/best.pt'):  # from utils.utils import *; strip_optimizer()
+def strip_optimizer(f='weights/last.pt'):  # from utils.utils import *; strip_optimizer()
     # Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
     x = torch.load(f)
     x['optimizer'] = None
-    # x['training_results'] = None
-    # x['epoch'] = -1
-    # for p in x['model'].values():
-    #     try:
-    #         p.requires_grad = True
-    #     except:
-    #         pass
     torch.save(x, f)
 
 
+def create_backbone(f='weights/last.pt'):  # from utils.utils import *; create_backbone()
+    # create a backbone from a *.pt file
+    x = torch.load(f)
+    x['optimizer'] = None
+    x['training_results'] = None
+    x['epoch'] = -1
+    for p in x['model'].values():
+        try:
+            p.requires_grad = True
+        except:
+            pass
+    torch.save(x, 'weights/backbone.pt')
+
+
 def coco_class_count(path='../coco/labels/train2014/'):
     # Histogram of occurrences per class
     nc = 80  # number classes