From b07ee41867e87aff9847d5f4a4e1de9ac464b529 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 27 Nov 2018 18:14:48 +0100 Subject: [PATCH] updates --- README.md | 2 +- models.py | 18 +++++++++++++++--- train.py | 40 ++++++++++++++++++++++++---------------- 3 files changed, 40 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 481915c2..0c5cd987 100755 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ The https://github.com/ultralytics/yolov3 repo contains inference and training c # Requirements -Python 3.6 or later with the following `pip3 install -U -r requirements.txt` packages: +Python 3.7 or later with the following `pip3 install -U -r requirements.txt` packages: - `numpy` - `torch` diff --git a/models.py b/models.py index fd404375..7f746442 100755 --- a/models.py +++ b/models.py @@ -102,8 +102,9 @@ class YOLOLayer(nn.Module): self.weights = class_weights() self.loss_means = torch.ones(6) + self.tx, self.ty, self.tw, self.th = [], [], [], [] - def forward(self, p, targets=None, batch_report=False): + def forward(self, p, targets=None, batch_report=False, var=None): FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor bs = p.shape[0] # batch size @@ -171,6 +172,17 @@ class YOLOLayer(nn.Module): lw = k * MSELoss(w[mask], tw[mask]) lh = k * MSELoss(h[mask], th[mask]) + # self.tx.extend(tx[mask].data.numpy()) + # self.ty.extend(ty[mask].data.numpy()) + # self.tw.extend(tw[mask].data.numpy()) + # self.th.extend(th[mask].data.numpy()) + # print([np.mean(self.tx), np.std(self.tx)],[np.mean(self.ty), np.std(self.ty)],[np.mean(self.tw), np.std(self.tw)],[np.mean(self.th), np.std(self.th)]) + # [0.5040668, 0.2885492] [0.51384246, 0.28328574] [-0.4754091, 0.57951087] [-0.25998235, 0.44858757] + # [0.50184494, 0.2858976] [0.51747805, 0.2896323] [0.12962963, 0.6263085] [-0.2722081, 0.61574113] + # [0.5032071, 0.28825334] [0.5063132, 0.2808862] [0.21124361, 0.44760725] [0.35445485, 0.6427766] + # import matplotlib.pyplot as plt + # plt.hist(self.x) + # lconf = k * BCEWithLogitsLoss(pred_conf[mask], mask[mask].float()) lconf = (k * 10) * BCEWithLogitsLoss(pred_conf, mask.float()) @@ -227,7 +239,7 @@ class Darknet(nn.Module): self.img_size = img_size self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT', 'TP', 'FP', 'FPe', 'FN', 'TC'] - def forward(self, x, targets=None, batch_report=False): + def forward(self, x, targets=None, batch_report=False, var=0): is_training = targets is not None output = [] self.losses = defaultdict(float) @@ -245,7 +257,7 @@ class Darknet(nn.Module): elif module_def['type'] == 'yolo': # Train phase: get loss if is_training: - x, *losses = module[0](x, targets, batch_report) + x, *losses = module[0](x, targets, batch_report, var) for name, loss in zip(self.loss_names, losses): self.losses[name] += loss # Test phase: Get detections diff --git a/train.py b/train.py index 2f200ac8..60fa867d 100644 --- a/train.py +++ b/train.py @@ -14,7 +14,9 @@ parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file p parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension') parser.add_argument('-resume', default=False, help='resume training flag') parser.add_argument('-batch_report', default=False, help='report TP, FP, FN, P and R per batch (slower)') -parser.add_argument('-optimizer', default='SGD', help='Optimizer') +parser.add_argument('-optimizer', default='SGD', help='optimizer') +parser.add_argument('-freeze_darknet53', default=False, help='freeze darknet53.conv.74 layers for first epoch') +parser.add_argument('-var', type=float, default=0, help='optional test variable') opt = parser.parse_args() print(opt) @@ -51,9 +53,7 @@ def main(opt): # Get dataloader dataloader = load_images_and_labels(train_path, batch_size=opt.batch_size, img_size=opt.img_size, augment=True) - # Reload saved optimizer state - start_epoch = 0 - best_loss = float('inf') + lr0 = 0.001 if opt.resume: checkpoint = torch.load('weights/latest.pt', map_location='cpu') @@ -69,10 +69,7 @@ def main(opt): # p.requires_grad = False # Set optimizer - if opt.optimizer is 'Adam': - optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=5e-4) - else: - optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, momentum=.9, weight_decay=5e-4) + optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr0, momentum=.9) start_epoch = checkpoint['epoch'] + 1 if checkpoint['optimizer'] is not None: @@ -82,6 +79,9 @@ def main(opt): del checkpoint # current, saved else: + start_epoch = 0 + best_loss = float('inf') + # Initialize model with darknet53 weights (optional) if not os.path.isfile('weights/darknet53.conv.74'): os.system('wget https://pjreddie.com/media/files/darknet53.conv.74 -P weights') @@ -93,10 +93,7 @@ def main(opt): model.to(device).train() # Set optimizer - if opt.optimizer is 'Adam': - optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=5e-4) - else: - optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, momentum=.9, weight_decay=5e-4) + optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr0, momentum=.9) # Set scheduler # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[54, 61], gamma=0.1) @@ -114,12 +111,23 @@ def main(opt): # Update scheduler (manual) at 0, 54, 61 epochs to 1e-3, 1e-4, 1e-5 if epoch > 50: - lr = 1e-4 + lr = lr0 / 10 else: - lr = 1e-3 + lr = lr0 for g in optimizer.param_groups: g['lr'] = lr + # Freeze darknet53.conv.74 layers for first epoch + if opt.freeze_darknet53: + if epoch == 0: + for i, (name, p) in enumerate(model.named_parameters()): + if int(name.split('.')[1]) < 75: # if layer < 75 + p.requires_grad = False + elif epoch == 1: + for i, (name, p) in enumerate(model.named_parameters()): + if int(name.split('.')[1]) < 75: # if layer < 75 + p.requires_grad = True + ui = -1 rloss = defaultdict(float) # running loss metrics = torch.zeros(3, num_classes) @@ -130,12 +138,12 @@ def main(opt): # SGD burn-in if (epoch == 0) & (i <= 1000): - lr = 1e-3 * (i / 1000) ** 4 + lr = lr0 * (i / 1000) ** 4 for g in optimizer.param_groups: g['lr'] = lr # Compute loss, compute gradient, update parameters - loss = model(imgs.to(device), targets, batch_report=opt.batch_report) + loss = model(imgs.to(device), targets, batch_report=opt.batch_report, var=opt.var) loss.backward() # accumulated_batches = 1 # accumulate gradient for 4 batches before stepping optimizer