diff --git a/train.py b/train.py index 74cebf30..576decf2 100644 --- a/train.py +++ b/train.py @@ -64,7 +64,6 @@ def train( epochs=100, # 500200 batches at bs 4, 117263 images = 68 epochs batch_size=16, accumulate=4, # effective bs = 64 = batch_size * accumulate - multi_scale=True, freeze_backbone=False, transfer=False # Transfer learning (train only YOLO layers) ): @@ -73,12 +72,13 @@ def train( latest = weights + 'latest.pt' best = weights + 'best.pt' device = torch_utils.select_device() - torch.backends.cudnn.benchmark = True # unsuitable for multiscale + torch.backends.cudnn.benchmark = True # possibly unsuitable for multiscale + img_size_test = img_size # image size for testing - if multi_scale: - min_size = round(img_size / 32 / 1.5) - max_size = round(img_size / 32 * 1.5) - img_size = max_size * 32 # initiate with maximum multi_scale size + if opt.multi_scale: + img_size_min = round(img_size / 32 / 1.5) + img_size_max = round(img_size / 32 * 1.5) + img_size = img_size_max * 32 # initiate with maximum multi_scale size # opt.num_workers = 0 # bug https://github.com/ultralytics/yolov3/issues/174 # Configure run @@ -87,7 +87,7 @@ def train( nc = int(data_dict['classes']) # number of classes # Initialize model - model = Darknet(cfg, img_size).to(device) + model = Darknet(cfg).to(device) # Optimizer optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], weight_decay=hyp['weight_decay']) @@ -144,8 +144,7 @@ def train( img_size, batch_size, augment=True, - rect=False, - multi_scale=multi_scale) + rect=False) # Initialize distributed training if torch.cuda.device_count() > 1: @@ -204,6 +203,14 @@ def train( imgs = imgs.to(device) targets = targets.to(device) + # Multi-Scale training + if opt.multi_scale: + if (i + 1 + nb * epoch) % 10 == 0: #  adjust (67% - 150%) every 10 batches + img_size = random.choice(range(img_size_min, img_size_max + 1)) * 32 + print('multi_scale img_size = %g' % img_size) + scale_factor = img_size / max(imgs.shape[-2:]) + imgs = F.interpolate(imgs, scale_factor=scale_factor, mode='bilinear', align_corners=False) + # Plot images with bounding boxes if epoch == 0 and i == 0: plot_images(imgs=imgs, targets=targets, fname='train_batch0.jpg') @@ -243,22 +250,10 @@ def train( t = time.time() print(s) - # Multi-Scale training (67% - 150%) every 10 batches - if multi_scale and (i + 1) % 10 == 0: - dataset.img_size = random.choice(range(min_size, max_size + 1)) * 32 - dataloader = DataLoader(dataset, - batch_size=batch_size, - num_workers=opt.num_workers, - shuffle=True, # disable rectangular training if True - pin_memory=True, - collate_fn=dataset.collate_fn) - - print('multi_scale img_size = %g' % dataset.img_size) - # Calculate mAP (always test final epoch, skip first 5 if opt.nosave) if not (opt.notest or (opt.nosave and epoch < 10)) or epoch == epochs - 1: with torch.no_grad(): - results, maps = test.test(cfg, data_cfg, batch_size=batch_size, img_size=img_size, model=model, + results, maps = test.test(cfg, data_cfg, batch_size=batch_size, img_size=img_size_test, model=model, conf_thres=0.1) # Write epoch results @@ -316,7 +311,7 @@ if __name__ == '__main__': parser.add_argument('--accumulate', type=int, default=4, help='accumulate gradient x batches before optimizing') parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') parser.add_argument('--data-cfg', type=str, default='data/coco_64img.data', help='coco.data file path') - parser.add_argument('--multi-scale', action='store_true', help='random image sizes per batch 320 - 608') + parser.add_argument('--multi-scale', action='store_false', help='random image sizes per batch 320 - 608') parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') parser.add_argument('--resume', action='store_true', help='resume training flag') parser.add_argument('--transfer', action='store_true', help='transfer learning flag') @@ -346,7 +341,6 @@ if __name__ == '__main__': epochs=opt.epochs, batch_size=opt.batch_size, accumulate=opt.accumulate, - multi_scale=opt.multi_scale, ) # Evolve hyperparameters (optional) @@ -383,7 +377,6 @@ if __name__ == '__main__': epochs=opt.epochs, batch_size=opt.batch_size, accumulate=opt.accumulate, - multi_scale=opt.multi_scale, ) mutation_fitness = results[2] diff --git a/utils/datasets.py b/utils/datasets.py index 6ad6c564..d9749ca6 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -130,8 +130,7 @@ class LoadWebcam: # for inference class LoadImagesAndLabels(Dataset): # for training/testing - def __init__(self, path, img_size=416, batch_size=16, augment=False, rect=True, image_weights=False, - multi_scale=False): + def __init__(self, path, img_size=416, batch_size=16, augment=False, rect=True, image_weights=False): with open(path, 'r') as f: img_files = f.read().splitlines() self.img_files = list(filter(lambda x: len(x) > 0, img_files)) @@ -153,11 +152,6 @@ class LoadImagesAndLabels(Dataset): # for training/testing replace('.bmp', '.txt'). replace('.png', '.txt') for x in self.img_files] - multi_scale = False - if multi_scale: - s = img_size / 32 - self.multi_scale = ((np.linspace(0.5, 1.5, nb) * s).round().astype(np.int) * 32) - # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: from PIL import Image @@ -256,7 +250,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing shape = self.batch_shapes[self.batch[index]] img, ratio, padw, padh = letterbox(img, new_shape=shape, mode='rect') else: - shape = int(self.multi_scale[self.batch[index]]) if hasattr(self, 'multi_scale') else self.img_size + shape = self.img_size img, ratio, padw, padh = letterbox(img, new_shape=shape, mode='square') # Load labels