diff --git a/cfg/yolov4-tiny.cfg b/cfg/yolov4-tiny.cfg new file mode 100644 index 00000000..dc6f5bfb --- /dev/null +++ b/cfg/yolov4-tiny.cfg @@ -0,0 +1,281 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=1 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +################################## + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +scale_x_y = 1.05 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +nms_kind=greedynms +beta_nms=0.6 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 23 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 1,2,3 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +scale_x_y = 1.05 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +ignore_thresh = .7 +truth_thresh = 1 +random=0 +resize=1.5 +nms_kind=greedynms +beta_nms=0.6 diff --git a/models.py b/models.py index 0323b25e..d5cfc02a 100755 --- a/models.py +++ b/models.py @@ -438,7 +438,7 @@ def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights'): target = weights.rsplit('.', 1)[0] + '.pt' torch.save(chkpt, target) - print("Success: converted '%s' to 's%'" % (weights, target)) + print("Success: converted '%s' to '%s'" % (weights, target)) else: print('Error: extension not supported.') diff --git a/test.py b/test.py index ace09951..3e739d8e 100644 --- a/test.py +++ b/test.py @@ -23,6 +23,7 @@ def test(cfg, multi_label=True): # Initialize/load model and set device if model is None: + is_training = False device = torch_utils.select_device(opt.device, batch_size=batch_size) verbose = opt.task == 'test' @@ -47,6 +48,7 @@ def test(cfg, if device.type != 'cpu' and torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: # called by train.py + is_training = True device = next(model.parameters()).device # get model device verbose = False @@ -61,7 +63,7 @@ def test(cfg, # Dataloader if dataloader is None: - dataset = LoadImagesAndLabels(path, imgsz, batch_size, rect=True, single_cls=opt.single_cls) + dataset = LoadImagesAndLabels(path, imgsz, batch_size, rect=True, single_cls=opt.single_cls, pad=0.5) batch_size = min(batch_size, len(dataset)) dataloader = DataLoader(dataset, batch_size=batch_size, @@ -91,7 +93,7 @@ def test(cfg, t0 += torch_utils.time_synchronized() - t # Compute loss - if hasattr(model, 'hyp'): # if model has loss hyperparameters + if is_training: # if model has loss hyperparameters loss += compute_loss(train_out, targets, model)[1][:3] # GIoU, obj, cls # Run NMS diff --git a/train.py b/train.py index fc967146..ee9938c7 100644 --- a/train.py +++ b/train.py @@ -66,7 +66,7 @@ def train(hyp): imgsz_min, imgsz_max, imgsz_test = opt.img_size # img sizes (min, max, test) # Image Sizes - gs = 64 # (pixels) grid size + gs = 32 # (pixels) grid size assert math.fmod(imgsz_min, gs) == 0, '--img-size %g must be a %g-multiple' % (imgsz_min, gs) opt.multi_scale |= imgsz_min != imgsz_max # multi if different (min, max) if opt.multi_scale: @@ -119,34 +119,49 @@ def train(hyp): if weights.endswith('.pt'): # pytorch format print("LOADIN MODEL") # possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc. - chkpt = torch.load(weights, map_location=device) + ckpt = torch.load(weights, map_location=device) # load model try: - chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} - model.load_state_dict(chkpt['model'], strict=False) + ckpt['model'] = {k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel()} + model.load_state_dict(ckpt['model'], strict=False) except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \ "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights) raise KeyError(s) from e # load optimizer - if chkpt['optimizer'] is not None: - optimizer.load_state_dict(chkpt['optimizer']) - best_fitness = chkpt['best_fitness'] + if ckpt['optimizer'] is not None: + optimizer.load_state_dict(ckpt['optimizer']) + best_fitness = ckpt['best_fitness'] # load results - if chkpt.get('training_results') is not None: + if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: - file.write(chkpt['training_results']) # write results.txt + file.write(ckpt['training_results']) # write results.txt - start_epoch = chkpt['epoch'] + 1 - del chkpt + # epochs + start_epoch = ckpt['epoch'] + 1 + if epochs < start_epoch: + print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % + (opt.weights, ckpt['epoch'], epochs)) + epochs += ckpt['epoch'] # finetune additional epochs + + del ckpt elif len(weights) > 0: # darknet format # possible weights are '*.weights', 'yolov3-tiny.conv.15', 'darknet53.conv.74' etc. load_darknet_weights(model, weights) + if opt.freeze_layers: + output_layer_indices = [idx - 1 for idx, module in enumerate(model.module_list) if isinstance(module, YOLOLayer)] + freeze_layer_indices = [x for x in range(len(model.module_list)) if + (x not in output_layer_indices) and + (x - 1 not in output_layer_indices)] + for idx in freeze_layer_indices: + for parameter in model.module_list[idx].parameters(): + parameter.requires_grad_(False) + # Mixed precision training https://github.com/NVIDIA/apex if mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) @@ -343,7 +358,7 @@ def train(hyp): save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint - chkpt = {'epoch': epoch, + ckpt = {'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema.module.state_dict() if hasattr(model, 'module') else ema.ema.state_dict(), @@ -352,10 +367,10 @@ def train(hyp): if epoch % opt.save_every_nth_epoch == 0: torch.save(chkpt, f'yolo_{epoch}.pt') # Save last, best and delete - torch.save(chkpt, last) + torch.save(ckpt, last) if (best_fitness == fi) and not final_epoch: - torch.save(chkpt, best) - del chkpt + torch.save(ckpt, best) + del ckpt # end epoch ---------------------------------------------------------------------------------------------------- @@ -400,9 +415,10 @@ if __name__ == '__main__': parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1 or cpu)') parser.add_argument('--adam', action='store_true', help='use adam optimizer') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') + parser.add_argument('--freeze-layers', action='store_true', help='Freeze non-output layers') parser.add_argument('--save-every-nth-epoch', type=int, help='Saving every n-th epoth') opt = parser.parse_args() - #opt.weights = last if opt.resume else opt.weights + #opt.weights = last if opt.resume and not opt.weights else opt.weights #check_git_status() opt.cfg = check_file(opt.cfg) # check file opt.data = check_file(opt.data) # check file diff --git a/utils/datasets.py b/utils/datasets.py index b73c9e8c..2e56f217 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -18,7 +18,7 @@ from utils.utils import xyxy2xywh, xywh2xyxy help_url = 'https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data' img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng'] -vid_formats = ['.mov', '.avi', '.mp4'] +vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv'] # Get orientation exif tag for orientation in ExifTags.TAGS.keys(): @@ -62,7 +62,8 @@ class LoadImages: # for inference self.new_video(videos[0]) # new video else: self.cap = None - assert self.nF > 0, 'No images or videos found in ' + path + assert self.nF > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \ + (path, img_formats, vid_formats) def __iter__(self): self.count = 0 @@ -256,7 +257,7 @@ class LoadStreams: # multiple IP or RTSP cameras class LoadImagesAndLabels(Dataset): # for training/testing def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False): + cache_images=False, single_cls=False, pad=0.0): try: path = str(Path(path)) # os-agnostic parent = str(Path(path).parent) + os.sep @@ -290,20 +291,22 @@ class LoadImagesAndLabels(Dataset): # for training/testing self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files] + # Read image shapes (wh) + sp = path.replace('.txt', '') + '.shapes' # shapefile path + try: + with open(sp, 'r') as f: # read existing shapefile + s = [x.split() for x in f.read().splitlines()] + assert len(s) == n, 'Shapefile out of sync' + except: + s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')] + np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) + + self.shapes = np.array(s, dtype=np.float64) + # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: - # Read image shapes (wh) - sp = path.replace('.txt', '') + '.shapes' # shapefile path - try: - with open(sp, 'r') as f: # read existing shapefile - s = [x.split() for x in f.read().splitlines()] - assert len(s) == n, 'Shapefile out of sync' - except: - s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')] - np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) - # Sort by aspect ratio - s = np.array(s, dtype=np.float64) + s = self.shapes # wh ar = s[:, 1] / s[:, 0] # aspect ratio irect = ar.argsort() self.img_files = [self.img_files[i] for i in irect] @@ -321,7 +324,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing elif mini > 1: shapes[i] = [1, 1 / mini] - self.batch_shapes = np.ceil(np.array(shapes) * img_size / 64.).astype(np.int) * 64 + self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32 # Cache labels self.imgs = [None] * n @@ -529,7 +532,7 @@ def load_image(self, index): assert img is not None, 'Image Not Found ' + path h0, w0 = img.shape[:2] # orig hw r = self.img_size / max(h0, w0) # resize image to img_size - if r < 1 or (self.augment and r != 1): # always resize down, only resize up if training with augmentation + if r != 1: # always resize down, only resize up if training with augmentation interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized