diff --git a/models.py b/models.py index 12cb50d7..bcee5c67 100755 --- a/models.py +++ b/models.py @@ -31,7 +31,7 @@ def create_modules(module_defs): padding=pad, bias=not bn)) if bn: - modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters)) + modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1)) if mdef['activation'] == 'leaky': # modules.add_module('activation', nn.PReLU(num_parameters=filters, init=0.1)) modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) diff --git a/train.py b/train.py index c58f85f9..cfc10e9f 100644 --- a/train.py +++ b/train.py @@ -186,7 +186,6 @@ def train(cfg, nb = len(dataloader) maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP, F1, test_loss - # n_burnin = min(round(nb / 5 + 1), 1000) # burn-in batches t0 = time.time() for epoch in range(start_epoch, epochs): model.train() @@ -215,22 +214,26 @@ def train(cfg, imgs = imgs.to(device) targets = targets.to(device) - # Multi-Scale training TODO: short-side to 32-multiple https://github.com/ultralytics/yolov3/issues/358 + # Multi-Scale training if multi_scale: if (i + nb * epoch) / accumulate % 10 == 0: #  adjust (67% - 150%) every 10 batches img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32 sf = img_size / max(imgs.shape[2:]) # scale factor if sf != 1: - ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]] # new shape + ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]] # new shape (stretched to 32-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Plot images with bounding boxes if epoch == 0 and i == 0: plot_images(imgs=imgs, targets=targets, paths=paths, fname='train_batch%g.jpg' % i) - # SGD burn-in + # Hyperparameter burn-in + # n_burnin = min(round(nb / 5 + 1), 1000) # burn-in batches # if epoch == 0 and i <= n_burnin: - # g = (i / n_burnin) ** 4 # gain + # for m in model.named_modules(): + # if m[0].endswith('BatchNorm2d'): + # m[1].momentum = 1 - i / n_burnin * 0.99 # BatchNorm2d momentum falls from 1 - 0.01 + # g = (i / n_burnin) ** 4 # gain rises from 0 - 1 # for x in optimizer.param_groups: # x['lr'] = hyp['lr0'] * g # x['weight_decay'] = hyp['weight_decay'] * g