From 1990cd8013720f031a78369792faebf0589aa19e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 30 Jun 2019 00:38:32 +0200 Subject: [PATCH 01/19] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index dee86399..001668ee 100755 --- a/README.md +++ b/README.md @@ -211,7 +211,6 @@ Computing mAP: 100%|████████████████████ Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.331 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.517 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.618 - ``` # Citation From 388b66dcd0e3ed0ba2262d901017edbd49217ff7 Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Sun, 30 Jun 2019 15:24:34 +0200 Subject: [PATCH 02/19] updates --- train.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/train.py b/train.py index bb6f7763..9154e230 100644 --- a/train.py +++ b/train.py @@ -148,12 +148,13 @@ def train( collate_fn=dataset.collate_fn) # Mixed precision training https://github.com/NVIDIA/apex - try: - from apex import amp - model, optimizer = amp.initialize(model, optimizer, opt_level='O1') - mixed_precision = True - except: # not installed: install help: https://github.com/NVIDIA/apex/issues/259 - mixed_precision = False + mixed_precision = True + if mixed_precision: + try: + from apex import amp + model, optimizer = amp.initialize(model, optimizer, opt_level='O1') + except: # not installed: install help: https://github.com/NVIDIA/apex/issues/259 + mixed_precision = False # Start training model.hyp = hyp # attach hyperparameters to model @@ -343,10 +344,10 @@ if __name__ == '__main__': # Mutate hyperparameters old_hyp = hyp.copy() init_seeds(seed=int(time.time())) - s = [.4, .4, .4, .4, .4, .4, .4, .4, .4 * 0, .4 * 0, .04 * 0, .4 * 0] # fractional sigmas + s = [.2, .2, .2, .2, .2, .2, .2, .2, .2 * 0, .2 * 0, .05 * 0, .2 * 0] # fractional sigmas for i, k in enumerate(hyp.keys()): - x = (np.random.randn(1) * s[i] + 1) ** 1.1 # plt.hist(x.ravel(), 100) - hyp[k] = hyp[k] * float(x) # vary by about 30% 1sigma + x = (np.random.randn(1) * s[i] + 1) ** 3.0 # plt.hist(x.ravel(), 300) + hyp[k] *= float(x) # vary by about 30% 1sigma # Clip to limits keys = ['lr0', 'iou_t', 'momentum', 'weight_decay'] From db2674aa316a8aac7c6a96d450b1f1e63eef98d5 Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Sun, 30 Jun 2019 17:34:29 +0200 Subject: [PATCH 03/19] updates --- train.py | 49 +++++++++++++++++++++++++++---------------------- utils/utils.py | 12 ++++++------ 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/train.py b/train.py index 9154e230..790fc20e 100644 --- a/train.py +++ b/train.py @@ -11,27 +11,30 @@ from models import * from utils.datasets import * from utils.utils import * -# Hyperparameters: train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve 0.087 0.281 0.109 0.121 -hyp = {'giou': .035, # giou loss gain - 'xy': 0.20, # xy loss gain - 'wh': 0.10, # wh loss gain - 'cls': 0.035, # cls loss gain - 'cls_pw': 79.0, # cls BCELoss positive_weight - 'conf': 1.61, # conf loss gain - 'conf_pw': 3.53, # conf BCELoss positive_weight - 'iou_t': 0.29, # iou target-anchor training threshold +# 0.149 0.241 0.126 0.156 6.85 1.008 1.421 0.07989 16.94 6.215 10.61 4.272 0.251 0.001 -4 0.9 0.0005 320 64-1 giou +hyp = {'giou': 1.008, # giou loss gain + 'xy': 1.421, # xy loss gain + 'wh': 0.07989, # wh loss gain + 'cls': 16.94, # cls loss gain + 'cls_pw': 6.215, # cls BCELoss positive_weight + 'conf': 10.61, # conf loss gain + 'conf_pw': 4.272, # conf BCELoss positive_weight + 'iou_t': 0.251, # iou target-anchor training threshold 'lr0': 0.001, # initial learning rate 'lrf': -4., # final learning rate = lr0 * (10 ** lrf) 'momentum': 0.90, # SGD momentum 'weight_decay': 0.0005} # optimizer weight decay -# hyp = {'giou': 1.0, # giou loss gain -# 'xy': 1.0, # xy loss gain -# 'wh': 1.0, # wh loss gain -# 'cls': 1.0, # cls loss gain + +# 0.0945 0.279 0.114 0.131 25 0.035 0.2 0.1 0.035 79 1.61 3.53 0.29 0.001 -4 0.9 0.0005 320 64-1 +# 0.112 0.265 0.111 0.144 12.6 0.035 0.2 0.1 0.035 79 1.61 3.53 0.29 0.001 -4 0.9 0.0005 320 32-2 +# hyp = {'giou': .035, # giou loss gain +# 'xy': 0.20, # xy loss gain +# 'wh': 0.10, # wh loss gain +# 'cls': 0.035, # cls loss gain # 'cls_pw': 79.0, # cls BCELoss positive_weight -# 'conf': 1.0, # conf loss gain -# 'conf_pw': 6.0, # conf BCELoss positive_weight +# 'conf': 1.61, # conf loss gain +# 'conf_pw': 3.53, # conf BCELoss positive_weight # 'iou_t': 0.29, # iou target-anchor training threshold # 'lr0': 0.001, # initial learning rate # 'lrf': -4., # final learning rate = lr0 * (10 ** lrf) @@ -167,7 +170,8 @@ def train( t, t0 = time.time(), time.time() for epoch in range(start_epoch, epochs): model.train() - print(('\n%8s%12s' + '%10s' * 7) % ('Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'targets', 'time')) + print(('\n%8s%12s' + '%10s' * 7) % + ('Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'targets', 'img_size')) # Update scheduler scheduler.step() @@ -184,15 +188,16 @@ def train( # dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # random weighted index mloss = torch.zeros(5).to(device) # mean losses - for i, (imgs, targets, _, _) in enumerate(dataloader): + pbar = tqdm(enumerate(dataloader), total=nb) # progress bar + for i, (imgs, targets, _, _) in pbar: imgs = imgs.to(device) targets = targets.to(device) # Multi-Scale training if multi_scale: - if (i + 1 + nb * epoch) / accumulate % 10 == 0: #  adjust (67% - 150%) every 10 batches + if (i + nb * epoch) / accumulate % 10 == 0: #  adjust (67% - 150%) every 10 batches img_size = random.choice(range(img_size_min, img_size_max + 1)) * 32 - print('img_size = %g' % img_size) + # print('img_size = %g' % img_size) scale_factor = img_size / max(imgs.shape[-2:]) imgs = F.interpolate(imgs, scale_factor=scale_factor, mode='bilinear', align_corners=False) @@ -229,11 +234,11 @@ def train( # Print batch results mloss = (mloss * i + loss_items) / (i + 1) # update mean losses + # s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nb - 1), *mloss, len(targets), time.time() - t) s = ('%8s%12s' + '%10.3g' * 7) % ( - '%g/%g' % (epoch, epochs - 1), - '%g/%g' % (i, nb - 1), *mloss, len(targets), time.time() - t) + '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nb - 1), *mloss, len(targets), img_size) t = time.time() - print(s) + pbar.set_description(s) # print(s) # Report time dt = (time.time() - t0) / 3600 diff --git a/utils/utils.py b/utils/utils.py index 4c0b179c..7d756f1d 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -284,7 +284,7 @@ def compute_loss(p, targets, model, giou_loss=False): # predictions, targets, m # Compute losses bs = p[0].shape[0] # batch size - k = bs # loss gain + k = bs / 64 # loss gain for i, pi0 in enumerate(p): # layer i predictions, i b, a, gj, gi = indices[i] # image, anchor, gridy, gridx tconf = torch.zeros_like(pi0[..., 0]) # conf @@ -303,12 +303,12 @@ def compute_loss(p, targets, model, giou_loss=False): # predictions, targets, m lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i]) # wh yolo loss - # tclsm = torch.zeros_like(pi[..., 5:]) - # tclsm[range(len(b)), tcls[i]] = 1.0 - # lcls += (k * h['cls']) * BCEcls(pi[..., 5:], tclsm) # class_conf loss - lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i]) # class_conf loss + tclsm = torch.zeros_like(pi[..., 5:]) + tclsm[range(len(b)), tcls[i]] = 1.0 + lcls += (k * h['cls']) * BCEcls(pi[..., 5:], tclsm) # cls loss (BCE) + # lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i]) # cls loss (CE) - # # Append to text file + # Append targets to text file # with open('targets.txt', 'a') as file: # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] From 32f5ea955b6c279c1128ad2c523067b3200be9af Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Sun, 30 Jun 2019 17:47:10 +0200 Subject: [PATCH 04/19] updates --- train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index 790fc20e..6ac50a54 100644 --- a/train.py +++ b/train.py @@ -47,9 +47,9 @@ def train( data_cfg, img_size=416, resume=False, - epochs=100, # 500200 batches at bs 4, 117263 images = 68 epochs - batch_size=16, - accumulate=4, # effective bs = 64 = batch_size * accumulate + epochs=100, # 500200 batches at bs 16, 117263 images = 273 epochs + batch_size=8, + accumulate=8, # effective bs = batch_size * accumulate = 8 * 8 = 64 freeze_backbone=False, transfer=False # Transfer learning (train only YOLO layers) ): From 63036deeb7bced06cd101bf6048e6a9a61b4d513 Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Mon, 1 Jul 2019 00:41:13 +0200 Subject: [PATCH 05/19] updates --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 6ac50a54..151163a2 100644 --- a/train.py +++ b/train.py @@ -351,7 +351,7 @@ if __name__ == '__main__': init_seeds(seed=int(time.time())) s = [.2, .2, .2, .2, .2, .2, .2, .2, .2 * 0, .2 * 0, .05 * 0, .2 * 0] # fractional sigmas for i, k in enumerate(hyp.keys()): - x = (np.random.randn(1) * s[i] + 1) ** 3.0 # plt.hist(x.ravel(), 300) + x = (np.random.randn(1) * s[i] + 1) ** 2.0 # plt.hist(x.ravel(), 300) hyp[k] *= float(x) # vary by about 30% 1sigma # Clip to limits From 09d065711a937a5d3f8a266514713a2a6612061f Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Mon, 1 Jul 2019 01:27:32 +0200 Subject: [PATCH 06/19] updates --- .gitignore | 1 + data/get_coco_dataset_gdrive.sh | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100755 data/get_coco_dataset_gdrive.sh diff --git a/.gitignore b/.gitignore index ae46812c..997fba97 100755 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ data/* !data/trainvalno5k.shapes !data/5k.shapes !data/5k.txt +!data/*.sh pycocotools/* results*.txt diff --git a/data/get_coco_dataset_gdrive.sh b/data/get_coco_dataset_gdrive.sh new file mode 100755 index 00000000..fabaad2c --- /dev/null +++ b/data/get_coco_dataset_gdrive.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859 + +# Download COCO dataset +fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO" +filename="coco_gdrive.zip" +curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null +curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} + +# Unzip +unzip -q coco_gdrive.zip \ No newline at end of file From 5e2b802f68a65f305773a30f8572e7bc0250d87d Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Mon, 1 Jul 2019 14:48:44 +0200 Subject: [PATCH 07/19] updates --- detect.py | 24 +++++++++++------------- test.py | 20 +++++++++----------- train.py | 49 +++++++++++++++++++------------------------------ 3 files changed, 39 insertions(+), 54 deletions(-) diff --git a/detect.py b/detect.py index 192ef310..f33d39f5 100644 --- a/detect.py +++ b/detect.py @@ -127,20 +127,18 @@ if __name__ == '__main__': parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold') parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') - parser.add_argument('--fourcc', type=str, default='mp4v', help='specifies the fourcc code for output video encoding (make sure ffmpeg supports specified fourcc codec)') - parser.add_argument('--output', type=str, default='output',help='specifies the output path for images and videos') + parser.add_argument('--fourcc', type=str, default='mp4v', help='fourcc output video codec (verify ffmpeg support)') + parser.add_argument('--output', type=str, default='output', help='specifies the output path for images and videos') opt = parser.parse_args() print(opt) with torch.no_grad(): - detect( - opt.cfg, - opt.data_cfg, - opt.weights, - images=opt.images, - img_size=opt.img_size, - conf_thres=opt.conf_thres, - nms_thres=opt.nms_thres, - fourcc=opt.fourcc, - output=opt.output - ) + detect(opt.cfg, + opt.data_cfg, + opt.weights, + images=opt.images, + img_size=opt.img_size, + conf_thres=opt.conf_thres, + nms_thres=opt.nms_thres, + fourcc=opt.fourcc, + output=opt.output) diff --git a/test.py b/test.py index eb902c5f..eeaadf6f 100644 --- a/test.py +++ b/test.py @@ -201,14 +201,12 @@ if __name__ == '__main__': print(opt) with torch.no_grad(): - mAP = test( - opt.cfg, - opt.data_cfg, - opt.weights, - opt.batch_size, - opt.img_size, - opt.iou_thres, - opt.conf_thres, - opt.nms_thres, - opt.save_json - ) + mAP = test(opt.cfg, + opt.data_cfg, + opt.weights, + opt.batch_size, + opt.img_size, + opt.iou_thres, + opt.conf_thres, + opt.nms_thres, + opt.save_json) diff --git a/train.py b/train.py index 151163a2..3aa24156 100644 --- a/train.py +++ b/train.py @@ -46,12 +46,10 @@ def train( cfg, data_cfg, img_size=416, - resume=False, epochs=100, # 500200 batches at bs 16, 117263 images = 273 epochs batch_size=8, accumulate=8, # effective bs = batch_size * accumulate = 8 * 8 = 64 freeze_backbone=False, - transfer=False # Transfer learning (train only YOLO layers) ): init_seeds() weights = 'weights' + os.sep @@ -81,8 +79,8 @@ def train( start_epoch = 0 best_loss = float('inf') nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255) - if resume: # Load previously saved model - if transfer: # Transfer learning + if opt.resume or opt.transfer: # Load previously saved model + if opt.transfer: # Transfer learning chkpt = torch.load(weights + 'yolov3-spp.pt', map_location=device) model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != 255}, strict=False) @@ -138,7 +136,11 @@ def train( # Initialize distributed training if torch.cuda.device_count() > 1: - dist.init_process_group(backend=opt.backend, init_method=opt.dist_url, world_size=opt.world_size, rank=opt.rank) + dist.init_process_group(backend='nccl', # 'distributed backend' + init_method='tcp://127.0.0.1:9999', # distributed training init method + world_size=1, # number of nodes for distributed training + rank=0) # distributed training node rank + model = torch.nn.parallel.DistributedDataParallel(model) # sampler = torch.utils.data.distributed.DistributedSampler(dataset) @@ -308,10 +310,6 @@ if __name__ == '__main__': parser.add_argument('--resume', action='store_true', help='resume training flag') parser.add_argument('--transfer', action='store_true', help='transfer learning flag') parser.add_argument('--num-workers', type=int, default=4, help='number of Pytorch DataLoader workers') - parser.add_argument('--dist-url', default='tcp://127.0.0.1:9999', type=str, help='distributed training init method') - parser.add_argument('--rank', default=0, type=int, help='distributed training node rank') - parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training') - parser.add_argument('--backend', default='nccl', type=str, help='distributed backend') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--giou', action='store_true', help='use GIoU loss instead of xy, wh loss') @@ -325,16 +323,12 @@ if __name__ == '__main__': opt.nosave = True # only save final checkpoint # Train - results = train( - opt.cfg, - opt.data_cfg, - img_size=opt.img_size, - resume=opt.resume or opt.transfer, - transfer=opt.transfer, - epochs=opt.epochs, - batch_size=opt.batch_size, - accumulate=opt.accumulate, - ) + results = train(opt.cfg, + opt.data_cfg, + img_size=opt.img_size, + epochs=opt.epochs, + batch_size=opt.batch_size, + accumulate=opt.accumulate) # Evolve hyperparameters (optional) if opt.evolve: @@ -361,16 +355,12 @@ if __name__ == '__main__': hyp[k] = np.clip(hyp[k], v[0], v[1]) # Determine mutation fitness - results = train( - opt.cfg, - opt.data_cfg, - img_size=opt.img_size, - resume=opt.resume or opt.transfer, - transfer=opt.transfer, - epochs=opt.epochs, - batch_size=opt.batch_size, - accumulate=opt.accumulate, - ) + results = train(opt.cfg, + opt.data_cfg, + img_size=opt.img_size, + epochs=opt.epochs, + batch_size=opt.batch_size, + accumulate=opt.accumulate) mutation_fitness = results[2] # Write mutation results @@ -378,7 +368,6 @@ if __name__ == '__main__': # Update hyperparameters if fitness improved if mutation_fitness > best_fitness: - # Fitness improved! print('Fitness improved!') best_fitness = mutation_fitness else: From b0d62e5204e32531860fe1fc5d77a8514c633e6b Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Mon, 1 Jul 2019 15:21:06 +0200 Subject: [PATCH 08/19] updates --- train.py | 18 +++++++++++++++++- utils/google_utils.py | 32 ++++++++++++++++++++++++++++++++ utils/utils.py | 1 + 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 utils/google_utils.py diff --git a/train.py b/train.py index 3aa24156..f4402680 100644 --- a/train.py +++ b/train.py @@ -10,6 +10,7 @@ import test # import test.py to get mAP after each epoch from models import * from utils.datasets import * from utils.utils import * +from utils.google_utils import * # 0.149 0.241 0.126 0.156 6.85 1.008 1.421 0.07989 16.94 6.215 10.61 4.272 0.251 0.001 -4 0.9 0.0005 320 64-1 giou hyp = {'giou': 1.008, # giou loss gain @@ -297,6 +298,21 @@ def print_mutation(hyp, results): with open('evolve.txt', 'a') as f: f.write(c + b + '\n') + cloud_evolve = False + if cloud_evolve: + # download cloud_evolve.txt + cloud_file = 'https://storage.googleapis.com/yolov4/cloud_evolve.txt' + local_file = cloud_file.replace('https://', '') + name = Path(local_file).name + download_blob(bucket_name='yolov4', source_blob_name=name, destination_file_name=local_file) + + # add result to local cloud_evolve.txt + with open(local_file, 'a') as f: + f.write(c + b + '\n') + + # upload cloud_evolve.txt + upload_blob(bucket_name='yolov4', source_file_name=local_file, destination_blob_name=name) + if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -304,7 +320,7 @@ if __name__ == '__main__': parser.add_argument('--batch-size', type=int, default=8, help='batch size') parser.add_argument('--accumulate', type=int, default=8, help='number of batches to accumulate before optimizing') parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') - parser.add_argument('--data-cfg', type=str, default='data/coco_64img.data', help='coco.data file path') + parser.add_argument('--data-cfg', type=str, default='data/coco_16img.data', help='coco.data file path') parser.add_argument('--single-scale', action='store_true', help='train at fixed size (no multi-scale)') parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') parser.add_argument('--resume', action='store_true', help='resume training flag') diff --git a/utils/google_utils.py b/utils/google_utils.py new file mode 100644 index 00000000..ee939443 --- /dev/null +++ b/utils/google_utils.py @@ -0,0 +1,32 @@ +# This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries +# pip install --upgrade google-cloud-storage + +from google.cloud import storage + + +def upload_blob(bucket_name, source_file_name, destination_blob_name): + # Uploads a file to a bucket + # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python + + storage_client = storage.Client() + bucket = storage_client.get_bucket(bucket_name) + blob = bucket.blob(destination_blob_name) + + blob.upload_from_filename(source_file_name) + + print('File {} uploaded to {}.'.format( + source_file_name, + destination_blob_name)) + + +def download_blob(bucket_name, source_blob_name, destination_file_name): + # Uploads a blob from a bucket + storage_client = storage.Client() + bucket = storage_client.get_bucket(bucket_name) + blob = bucket.blob(source_blob_name) + + blob.download_to_filename(destination_file_name) + + print('Blob {} downloaded to {}.'.format( + source_blob_name, + destination_file_name)) diff --git a/utils/utils.py b/utils/utils.py index 7d756f1d..4f8fe604 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -11,6 +11,7 @@ from PIL import Image from tqdm import tqdm from . import torch_utils +from . import google_utils matplotlib.rc('font', **{'size': 11}) From c4409aa2edb27ed8ae221620a19014502bb27e96 Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Mon, 1 Jul 2019 15:22:22 +0200 Subject: [PATCH 09/19] updates --- train.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index f4402680..24f59a10 100644 --- a/train.py +++ b/train.py @@ -10,7 +10,6 @@ import test # import test.py to get mAP after each epoch from models import * from utils.datasets import * from utils.utils import * -from utils.google_utils import * # 0.149 0.241 0.126 0.156 6.85 1.008 1.421 0.07989 16.94 6.215 10.61 4.272 0.251 0.001 -4 0.9 0.0005 320 64-1 giou hyp = {'giou': 1.008, # giou loss gain @@ -304,14 +303,14 @@ def print_mutation(hyp, results): cloud_file = 'https://storage.googleapis.com/yolov4/cloud_evolve.txt' local_file = cloud_file.replace('https://', '') name = Path(local_file).name - download_blob(bucket_name='yolov4', source_blob_name=name, destination_file_name=local_file) + google_utils.download_blob(bucket_name='yolov4', source_blob_name=name, destination_file_name=local_file) # add result to local cloud_evolve.txt with open(local_file, 'a') as f: f.write(c + b + '\n') # upload cloud_evolve.txt - upload_blob(bucket_name='yolov4', source_file_name=local_file, destination_blob_name=name) + google_utils.upload_blob(bucket_name='yolov4', source_file_name=local_file, destination_blob_name=name) if __name__ == '__main__': From 05358accbbba5d6af2e10eb287ed46af28ed2f94 Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Mon, 1 Jul 2019 15:23:30 +0200 Subject: [PATCH 10/19] updates --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 24f59a10..b911bf27 100644 --- a/train.py +++ b/train.py @@ -319,7 +319,7 @@ if __name__ == '__main__': parser.add_argument('--batch-size', type=int, default=8, help='batch size') parser.add_argument('--accumulate', type=int, default=8, help='number of batches to accumulate before optimizing') parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') - parser.add_argument('--data-cfg', type=str, default='data/coco_16img.data', help='coco.data file path') + parser.add_argument('--data-cfg', type=str, default='data/coco_64img.data', help='coco.data file path') parser.add_argument('--single-scale', action='store_true', help='train at fixed size (no multi-scale)') parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') parser.add_argument('--resume', action='store_true', help='resume training flag') From cf51cf9c990e2ff6a3435c5910a7a291cb290138 Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Mon, 1 Jul 2019 17:14:42 +0200 Subject: [PATCH 11/19] updates --- train.py | 53 ++++++++++++++++++++--------------------------------- 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/train.py b/train.py index b911bf27..76969494 100644 --- a/train.py +++ b/train.py @@ -294,28 +294,20 @@ def print_mutation(hyp, results): b = '%11.4g' * len(hyp) % tuple(hyp.values()) # hyperparam values c = '%11.3g' * len(results) % results # results (P, R, mAP, F1, test_loss) print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c)) - with open('evolve.txt', 'a') as f: - f.write(c + b + '\n') - cloud_evolve = False - if cloud_evolve: - # download cloud_evolve.txt - cloud_file = 'https://storage.googleapis.com/yolov4/cloud_evolve.txt' - local_file = cloud_file.replace('https://', '') - name = Path(local_file).name - google_utils.download_blob(bucket_name='yolov4', source_blob_name=name, destination_file_name=local_file) - - # add result to local cloud_evolve.txt - with open(local_file, 'a') as f: + if opt.cloud_evolve: + os.system('gsutil cp gs://yolov4/evolve.txt .') # download evolve.txt + with open('evolve.txt', 'a') as f: # append result to evolve.txt + f.write(c + b + '\n') + os.system('gsutil cp evolve.txt gs://yolov4') # upload evolve.txt + else: + with open('evolve.txt', 'a') as f: f.write(c + b + '\n') - - # upload cloud_evolve.txt - google_utils.upload_blob(bucket_name='yolov4', source_file_name=local_file, destination_blob_name=name) if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--epochs', type=int, default=100, help='number of epochs') + parser.add_argument('--epochs', type=int, default=1, help='number of epochs') parser.add_argument('--batch-size', type=int, default=8, help='batch size') parser.add_argument('--accumulate', type=int, default=8, help='number of batches to accumulate before optimizing') parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') @@ -329,10 +321,12 @@ if __name__ == '__main__': parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--giou', action='store_true', help='use GIoU loss instead of xy, wh loss') parser.add_argument('--evolve', action='store_true', help='run hyperparameter evolution') + parser.add_argument('--cloud_evolve', action='store_true', help='--evolve from a central source') parser.add_argument('--var', default=0, type=int, help='debug variable') opt = parser.parse_args() print(opt) + opt.evolve = opt.cloud_evolve or opt.evolve if opt.evolve: opt.notest = True # only test final epoch opt.nosave = True # only save final checkpoint @@ -347,16 +341,17 @@ if __name__ == '__main__': # Evolve hyperparameters (optional) if opt.evolve: - best_fitness = results[2] # use mAP for fitness - - # Write mutation results - print_mutation(hyp, results) - gen = 1000 # generations to evolve - for _ in range(gen): + print_mutation(hyp, results) # Write mutation results - # Mutate hyperparameters - old_hyp = hyp.copy() + for _ in range(gen): + # Get best hyperparamters + x = np.loadtxt('evolve.txt', ndmin=2) + x = x[x[:, 2].argmax()] # select best mAP for fitness (col 2) + for i, k in enumerate(hyp.keys()): + hyp[k] = x[i + 5] + + # Mutate init_seeds(seed=int(time.time())) s = [.2, .2, .2, .2, .2, .2, .2, .2, .2 * 0, .2 * 0, .05 * 0, .2 * 0] # fractional sigmas for i, k in enumerate(hyp.keys()): @@ -369,25 +364,17 @@ if __name__ == '__main__': for k, v in zip(keys, limits): hyp[k] = np.clip(hyp[k], v[0], v[1]) - # Determine mutation fitness + # Train mutation results = train(opt.cfg, opt.data_cfg, img_size=opt.img_size, epochs=opt.epochs, batch_size=opt.batch_size, accumulate=opt.accumulate) - mutation_fitness = results[2] # Write mutation results print_mutation(hyp, results) - # Update hyperparameters if fitness improved - if mutation_fitness > best_fitness: - print('Fitness improved!') - best_fitness = mutation_fitness - else: - hyp = old_hyp.copy() # reset hyp to - # # Plot results # import numpy as np # import matplotlib.pyplot as plt From f43ee6ef94a54570316cb09faa8ee2d33f7b57a6 Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Mon, 1 Jul 2019 17:17:29 +0200 Subject: [PATCH 12/19] updates --- train.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/train.py b/train.py index 76969494..1b1e7182 100644 --- a/train.py +++ b/train.py @@ -307,7 +307,7 @@ def print_mutation(hyp, results): if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--epochs', type=int, default=1, help='number of epochs') + parser.add_argument('--epochs', type=int, default=100, help='number of epochs') parser.add_argument('--batch-size', type=int, default=8, help='batch size') parser.add_argument('--accumulate', type=int, default=8, help='number of batches to accumulate before optimizing') parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') @@ -320,8 +320,8 @@ if __name__ == '__main__': parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--giou', action='store_true', help='use GIoU loss instead of xy, wh loss') - parser.add_argument('--evolve', action='store_true', help='run hyperparameter evolution') - parser.add_argument('--cloud_evolve', action='store_true', help='--evolve from a central source') + parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') + parser.add_argument('--cloud_evolve', action='store_true', help='evolve hyperparameters from a cloud source') parser.add_argument('--var', default=0, type=int, help='debug variable') opt = parser.parse_args() print(opt) @@ -347,7 +347,7 @@ if __name__ == '__main__': for _ in range(gen): # Get best hyperparamters x = np.loadtxt('evolve.txt', ndmin=2) - x = x[x[:, 2].argmax()] # select best mAP for fitness (col 2) + x = x[x[:, 2].argmax()] # select best mAP as genetic fitness (col 2) for i, k in enumerate(hyp.keys()): hyp[k] = x[i + 5] @@ -356,7 +356,7 @@ if __name__ == '__main__': s = [.2, .2, .2, .2, .2, .2, .2, .2, .2 * 0, .2 * 0, .05 * 0, .2 * 0] # fractional sigmas for i, k in enumerate(hyp.keys()): x = (np.random.randn(1) * s[i] + 1) ** 2.0 # plt.hist(x.ravel(), 300) - hyp[k] *= float(x) # vary by about 30% 1sigma + hyp[k] *= float(x) # vary by 20% 1sigma # Clip to limits keys = ['lr0', 'iou_t', 'momentum', 'weight_decay'] From 1fd871abd8074435c4a9973be3cbb4d78139d12a Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Mon, 1 Jul 2019 17:44:42 +0200 Subject: [PATCH 13/19] updates --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 1b1e7182..06a061dd 100644 --- a/train.py +++ b/train.py @@ -321,7 +321,7 @@ if __name__ == '__main__': parser.add_argument('--notest', action='store_true', help='only test final epoch') parser.add_argument('--giou', action='store_true', help='use GIoU loss instead of xy, wh loss') parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') - parser.add_argument('--cloud_evolve', action='store_true', help='evolve hyperparameters from a cloud source') + parser.add_argument('--cloud-evolve', action='store_true', help='evolve hyperparameters from a cloud source') parser.add_argument('--var', default=0, type=int, help='debug variable') opt = parser.parse_args() print(opt) From ccf757b3ea41537bf90b73be0a86b556645906e2 Mon Sep 17 00:00:00 2001 From: Yonghye Kwon Date: Tue, 2 Jul 2019 19:24:18 +0900 Subject: [PATCH 14/19] changed the criteria for the best weight file (#356) * changed the criteria for the best weight file changed the criteria for the best weight file from loss to mAP I trained the model on my custom dataset. But I failed to get a good results when I load the weight file that has the lowest loss on test dataset. I thought that the loss used in YOLO is not proper criteria for detection performance. So I changed the criteria from loss to mAP. what do you think of this? * Update train.py --- train.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/train.py b/train.py index 06a061dd..1f1aad3a 100644 --- a/train.py +++ b/train.py @@ -77,7 +77,7 @@ def train( cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 - best_loss = float('inf') + best_map = 0. nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255) if opt.resume or opt.transfer: # Load previously saved model if opt.transfer: # Transfer learning @@ -256,17 +256,17 @@ def train( with open('results.txt', 'a') as file: file.write(s + '%11.3g' * 5 % results + '\n') # P, R, mAP, F1, test_loss - # Update best loss - test_loss = results[4] - if test_loss < best_loss: - best_loss = test_loss + # Update best map + test_map = results[2] + if test_map > best_map: + best_map = test_map # Save training results save = (not opt.nosave) or (epoch == epochs - 1) if save: # Create checkpoint chkpt = {'epoch': epoch, - 'best_loss': best_loss, + 'best_map': best_map, 'model': model.module.state_dict() if type( model) is nn.parallel.DistributedDataParallel else model.state_dict(), 'optimizer': optimizer.state_dict()} From a8cf64af31034dfd408663f76e8af37288cb5f2c Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Tue, 2 Jul 2019 18:21:28 +0200 Subject: [PATCH 15/19] updates --- train.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/train.py b/train.py index 1f1aad3a..b318331c 100644 --- a/train.py +++ b/train.py @@ -77,7 +77,7 @@ def train( cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 - best_map = 0. + best_fitness = 0.0 nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255) if opt.resume or opt.transfer: # Load previously saved model if opt.transfer: # Transfer learning @@ -94,7 +94,7 @@ def train( start_epoch = chkpt['epoch'] + 1 if chkpt['optimizer'] is not None: optimizer.load_state_dict(chkpt['optimizer']) - best_loss = chkpt['best_loss'] + best_fitness = chkpt['best_fitness'] del chkpt else: # Initialize model with backbone (optional) @@ -257,16 +257,16 @@ def train( file.write(s + '%11.3g' * 5 % results + '\n') # P, R, mAP, F1, test_loss # Update best map - test_map = results[2] - if test_map > best_map: - best_map = test_map + fitness = results[2] + if fitness > best_fitness: + best_fitness = fitness # Save training results save = (not opt.nosave) or (epoch == epochs - 1) if save: # Create checkpoint chkpt = {'epoch': epoch, - 'best_map': best_map, + 'best_fitness': best_fitness, 'model': model.module.state_dict() if type( model) is nn.parallel.DistributedDataParallel else model.state_dict(), 'optimizer': optimizer.state_dict()} @@ -275,7 +275,7 @@ def train( torch.save(chkpt, latest) # Save best checkpoint - if best_loss == test_loss: + if best_fitness == fitness: torch.save(chkpt, best) # Save backup every 10 epochs (optional) From 1d0a4a3ace94567172e2adbabfba0fa553a07f1a Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Wed, 3 Jul 2019 14:42:11 +0200 Subject: [PATCH 16/19] updates --- models.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/models.py b/models.py index 347264fd..6b71989a 100755 --- a/models.py +++ b/models.py @@ -15,6 +15,7 @@ def create_modules(module_defs): hyperparams = module_defs.pop(0) output_filters = [int(hyperparams['channels'])] module_list = nn.ModuleList() + yolo_index = -1 for i, module_def in enumerate(module_defs): modules = nn.Sequential() @@ -58,6 +59,7 @@ def create_modules(module_defs): modules.add_module('shortcut_%d' % i, EmptyLayer()) elif module_def['type'] == 'yolo': + yolo_index += 1 anchor_idxs = [int(x) for x in module_def['mask'].split(',')] # Extract anchors anchors = [float(x) for x in module_def['anchors'].split(',')] @@ -66,8 +68,7 @@ def create_modules(module_defs): nc = int(module_def['classes']) # number of classes img_size = hyperparams['height'] # Define detection layer - yolo_layer = YOLOLayer(anchors, nc, img_size, cfg=hyperparams['cfg']) - modules.add_module('yolo_%d' % i, yolo_layer) + modules.add_module('yolo_%d' % i, YOLOLayer(anchors, nc, img_size, yolo_index)) # Register module list and number of output filters module_list.append(modules) @@ -99,7 +100,7 @@ class Upsample(nn.Module): class YOLOLayer(nn.Module): - def __init__(self, anchors, nc, img_size, cfg): + def __init__(self, anchors, nc, img_size, yolo_index): super(YOLOLayer, self).__init__() self.anchors = torch.Tensor(anchors) @@ -109,7 +110,7 @@ class YOLOLayer(nn.Module): self.ny = 0 # initialize number of y gridpoints if ONNX_EXPORT: # grids must be computed in __init__ - stride = [32, 16, 8][yolo_layer] # stride of this layer + stride = [32, 16, 8][yolo_index] # stride of this layer nx = int(img_size[1] / stride) # number x grid points ny = int(img_size[0] / stride) # number y grid points create_grids(self, max(img_size), (nx, ny)) From ab141fcc1ff976fa9d1bd983e11fe43ba4628e2e Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Wed, 3 Jul 2019 15:37:04 +0200 Subject: [PATCH 17/19] updates --- models.py | 15 +-------------- train.py | 1 + 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/models.py b/models.py index 6b71989a..b1717d99 100755 --- a/models.py +++ b/models.py @@ -45,8 +45,7 @@ def create_modules(module_defs): modules.add_module('maxpool_%d' % i, maxpool) elif module_def['type'] == 'upsample': - # upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest') # WARNING: deprecated - upsample = Upsample(scale_factor=int(module_def['stride'])) + upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest') modules.add_module('upsample_%d' % i, upsample) elif module_def['type'] == 'route': @@ -87,18 +86,6 @@ class EmptyLayer(nn.Module): return x -class Upsample(nn.Module): - # Custom Upsample layer (nn.Upsample gives deprecated warning message) - - def __init__(self, scale_factor=1, mode='nearest'): - super(Upsample, self).__init__() - self.scale_factor = scale_factor - self.mode = mode - - def forward(self, x): - return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) - - class YOLOLayer(nn.Module): def __init__(self, anchors, nc, img_size, yolo_index): super(YOLOLayer, self).__init__() diff --git a/train.py b/train.py index b318331c..b5dbd857 100644 --- a/train.py +++ b/train.py @@ -12,6 +12,7 @@ from utils.datasets import * from utils.utils import * # 0.149 0.241 0.126 0.156 6.85 1.008 1.421 0.07989 16.94 6.215 10.61 4.272 0.251 0.001 -4 0.9 0.0005 320 64-1 giou +# 0.111 0.27 0.132 0.131 3.96 1.276 0.3156 0.1425 21.21 6.224 11.59 8.83 0.376 0.001 -4 0.9 0.0005 hyp = {'giou': 1.008, # giou loss gain 'xy': 1.421, # xy loss gain 'wh': 0.07989, # wh loss gain From 1e62ee2152203b4175a982995389dbb1a74bb05a Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Wed, 3 Jul 2019 16:17:46 +0200 Subject: [PATCH 18/19] updates --- train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index b5dbd857..40e2381c 100644 --- a/train.py +++ b/train.py @@ -128,7 +128,7 @@ def train( # plt.savefig('LR.png', dpi=300) # Dataset - rectangular_training = False + rectangular_training = True dataset = LoadImagesAndLabels(train_path, img_size, batch_size, @@ -196,7 +196,7 @@ def train( imgs = imgs.to(device) targets = targets.to(device) - # Multi-Scale training + # Multi-Scale training TODO: short-side to 32-multiple https://github.com/ultralytics/yolov3/issues/358 if multi_scale: if (i + nb * epoch) / accumulate % 10 == 0: #  adjust (67% - 150%) every 10 batches img_size = random.choice(range(img_size_min, img_size_max + 1)) * 32 From 109991198c4cfd87902ef5e34f59b9a451a4b95c Mon Sep 17 00:00:00 2001 From: glenn-jocher Date: Wed, 3 Jul 2019 16:18:08 +0200 Subject: [PATCH 19/19] updates --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 40e2381c..ea00fc68 100644 --- a/train.py +++ b/train.py @@ -128,7 +128,7 @@ def train( # plt.savefig('LR.png', dpi=300) # Dataset - rectangular_training = True + rectangular_training = False dataset = LoadImagesAndLabels(train_path, img_size, batch_size,