updates
This commit is contained in:
		
							parent
							
								
									b7a25e60ce
								
							
						
					
					
						commit
						ba265d91b2
					
				
							
								
								
									
										87
									
								
								train.py
								
								
								
								
							
							
						
						
									
										87
									
								
								train.py
								
								
								
								
							| 
						 | 
				
			
			@ -53,7 +53,7 @@ def train():
 | 
			
		|||
    cfg = opt.cfg
 | 
			
		||||
    data = opt.data
 | 
			
		||||
    img_size = opt.img_size
 | 
			
		||||
    epochs = 1 if opt.prebias else opt.epochs  # 500200 batches at bs 64, 117263 images = 273 epochs
 | 
			
		||||
    epochs = opt.epochs  # 500200 batches at bs 64, 117263 images = 273 epochs
 | 
			
		||||
    batch_size = opt.batch_size
 | 
			
		||||
    accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
 | 
			
		||||
    weights = opt.weights  # initial training weights
 | 
			
		||||
| 
						 | 
				
			
			@ -65,8 +65,8 @@ def train():
 | 
			
		|||
    # Initialize
 | 
			
		||||
    init_seeds()
 | 
			
		||||
    if opt.multi_scale:
 | 
			
		||||
        img_sz_min = 9  # round(img_size / 32 / 1.5)
 | 
			
		||||
        img_sz_max = 21  # round(img_size / 32 * 1.5)
 | 
			
		||||
        img_sz_min = round(img_size / 32 / 1.5)
 | 
			
		||||
        img_sz_max = round(img_size / 32 * 1.5)
 | 
			
		||||
        img_size = img_sz_max * 32  # initiate with maximum multi_scale size
 | 
			
		||||
        print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size))
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -136,16 +136,6 @@ def train():
 | 
			
		|||
        # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
 | 
			
		||||
        cutoff = load_darknet_weights(model, weights)
 | 
			
		||||
 | 
			
		||||
    if opt.prebias:
 | 
			
		||||
        # Update params (bias-only training allows more aggressive settings: i.e. SGD ~0.1 lr0, ~0.9 momentum)
 | 
			
		||||
        for p in optimizer.param_groups:
 | 
			
		||||
            p['lr'] = 0.1  # learning rate
 | 
			
		||||
            if p.get('momentum') is not None:  # for SGD but not Adam
 | 
			
		||||
                p['momentum'] = 0.9
 | 
			
		||||
 | 
			
		||||
        for name, p in model.named_parameters():
 | 
			
		||||
            p.requires_grad = True if name.endswith('.bias') else False
 | 
			
		||||
 | 
			
		||||
    # Scheduler https://github.com/ultralytics/yolov3/issues/238
 | 
			
		||||
    # lf = lambda x: 1 - x / epochs  # linear ramp to zero
 | 
			
		||||
    # lf = lambda x: 10 ** (hyp['lrf'] * x / epochs)  # exp ramp
 | 
			
		||||
| 
						 | 
				
			
			@ -186,7 +176,7 @@ def train():
 | 
			
		|||
                                  rect=opt.rect,  # rectangular training
 | 
			
		||||
                                  image_weights=False,
 | 
			
		||||
                                  cache_labels=epochs > 10,
 | 
			
		||||
                                  cache_images=opt.cache_images and not opt.prebias)
 | 
			
		||||
                                  cache_images=opt.cache_images)
 | 
			
		||||
 | 
			
		||||
    # Dataloader
 | 
			
		||||
    batch_size = min(batch_size, len(dataset))
 | 
			
		||||
| 
						 | 
				
			
			@ -198,17 +188,16 @@ def train():
 | 
			
		|||
                                             pin_memory=True,
 | 
			
		||||
                                             collate_fn=dataset.collate_fn)
 | 
			
		||||
 | 
			
		||||
    # Test Dataloader
 | 
			
		||||
    if not opt.prebias:
 | 
			
		||||
        testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path, opt.img_size, batch_size * 2,
 | 
			
		||||
                                                                     hyp=hyp,
 | 
			
		||||
                                                                     rect=True,
 | 
			
		||||
                                                                     cache_labels=True,
 | 
			
		||||
                                                                     cache_images=opt.cache_images),
 | 
			
		||||
                                                 batch_size=batch_size * 2,
 | 
			
		||||
                                                 num_workers=nw,
 | 
			
		||||
                                                 pin_memory=True,
 | 
			
		||||
                                                 collate_fn=dataset.collate_fn)
 | 
			
		||||
    # Testloader
 | 
			
		||||
    testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path, opt.img_size, batch_size * 2,
 | 
			
		||||
                                                                 hyp=hyp,
 | 
			
		||||
                                                                 rect=True,
 | 
			
		||||
                                                                 cache_labels=True,
 | 
			
		||||
                                                                 cache_images=opt.cache_images),
 | 
			
		||||
                                             batch_size=batch_size * 2,
 | 
			
		||||
                                             num_workers=nw,
 | 
			
		||||
                                             pin_memory=True,
 | 
			
		||||
                                             collate_fn=dataset.collate_fn)
 | 
			
		||||
 | 
			
		||||
    # Start training
 | 
			
		||||
    nb = len(dataloader)
 | 
			
		||||
| 
						 | 
				
			
			@ -222,11 +211,26 @@ def train():
 | 
			
		|||
    t0 = time.time()
 | 
			
		||||
    torch_utils.model_info(model, report='summary')  # 'full' or 'summary'
 | 
			
		||||
    print('Using %g dataloader workers' % nw)
 | 
			
		||||
    print('Starting %s for %g epochs...' % ('prebias' if opt.prebias else 'training', epochs))
 | 
			
		||||
    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
 | 
			
		||||
    print('Starting training for %g epochs...' % epochs)
 | 
			
		||||
    for epoch in range(start_epoch - 1 if opt.prebias else start_epoch, epochs):  # epoch ------------------------------
 | 
			
		||||
        model.train()
 | 
			
		||||
        print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
 | 
			
		||||
 | 
			
		||||
        # Prebias
 | 
			
		||||
        if opt.prebias:
 | 
			
		||||
            if epoch < 0:  # prebias
 | 
			
		||||
                ps = 0.1, 0.9, False  # prebias settings (lr=0.1, momentum=0.9, requires_grad=False)
 | 
			
		||||
            else:  # normal training
 | 
			
		||||
                ps = hyp['lr0'], hyp['momentum'], True  # normal training settings
 | 
			
		||||
                opt.prebias = False
 | 
			
		||||
 | 
			
		||||
            for p in optimizer.param_groups:
 | 
			
		||||
                p['lr'] = ps[0]  # learning rate
 | 
			
		||||
                if p.get('momentum') is not None:  # for SGD but not Adam
 | 
			
		||||
                    p['momentum'] = ps[1]
 | 
			
		||||
            for name, p in model.named_parameters():
 | 
			
		||||
                p.requires_grad = True if name.endswith('.bias') else ps[2]
 | 
			
		||||
 | 
			
		||||
        # Update image weights (optional)
 | 
			
		||||
        if dataset.image_weights:
 | 
			
		||||
            w = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights
 | 
			
		||||
| 
						 | 
				
			
			@ -300,13 +304,11 @@ def train():
 | 
			
		|||
 | 
			
		||||
            # end batch ------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
        # Update scheduler
 | 
			
		||||
        scheduler.step()
 | 
			
		||||
 | 
			
		||||
        # Process epoch results
 | 
			
		||||
        final_epoch = epoch + 1 == epochs
 | 
			
		||||
        if opt.prebias:
 | 
			
		||||
            print_model_biases(model)
 | 
			
		||||
            continue
 | 
			
		||||
        elif not opt.notest or final_epoch:  # Calculate mAP
 | 
			
		||||
            is_coco = any([x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) and model.nc == 80
 | 
			
		||||
            results, maps = test.test(cfg,
 | 
			
		||||
| 
						 | 
				
			
			@ -319,10 +321,13 @@ def train():
 | 
			
		|||
                                      save_json=final_epoch and is_coco,
 | 
			
		||||
                                      dataloader=testloader)
 | 
			
		||||
 | 
			
		||||
        # Update scheduler
 | 
			
		||||
        scheduler.step()
 | 
			
		||||
 | 
			
		||||
        # Write epoch results
 | 
			
		||||
        with open(results_file, 'a') as f:
 | 
			
		||||
            f.write(s + '%10.3g' * 7 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
 | 
			
		||||
        if len(opt.name) and opt.bucket and not opt.prebias:
 | 
			
		||||
        if len(opt.name) and opt.bucket:
 | 
			
		||||
            os.system('gsutil cp results.txt gs://%s/results%s.txt' % (opt.bucket, opt.name))
 | 
			
		||||
 | 
			
		||||
        # Write Tensorboard results
 | 
			
		||||
| 
						 | 
				
			
			@ -339,7 +344,7 @@ def train():
 | 
			
		|||
            best_fitness = fitness
 | 
			
		||||
 | 
			
		||||
        # Save training results
 | 
			
		||||
        save = (not opt.nosave) or (final_epoch and not opt.evolve) or opt.prebias
 | 
			
		||||
        save = (not opt.nosave) or (final_epoch and not opt.evolve)
 | 
			
		||||
        if save:
 | 
			
		||||
            with open(results_file, 'r') as f:
 | 
			
		||||
                # Create checkpoint
 | 
			
		||||
| 
						 | 
				
			
			@ -368,7 +373,7 @@ def train():
 | 
			
		|||
 | 
			
		||||
    # end training
 | 
			
		||||
    n = opt.name
 | 
			
		||||
    if len(n) and not opt.prebias:
 | 
			
		||||
    if len(n):
 | 
			
		||||
        n = '_' + n if not n.isnumeric() else n
 | 
			
		||||
        fresults, flast, fbest = 'results%s.txt' % n, 'last%s.pt' % n, 'best%s.pt' % n
 | 
			
		||||
        os.rename('results.txt', fresults)
 | 
			
		||||
| 
						 | 
				
			
			@ -387,20 +392,6 @@ def train():
 | 
			
		|||
    return results
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def prebias():
 | 
			
		||||
    # trains output bias layers for 1 epoch and creates new backbone
 | 
			
		||||
    if opt.prebias:
 | 
			
		||||
        # opt_0 = opt  # save settings
 | 
			
		||||
        # opt.rect = False  # update settings (if any)
 | 
			
		||||
 | 
			
		||||
        train()  # train model biases
 | 
			
		||||
        create_backbone(last)  # saved results as backbone.pt
 | 
			
		||||
 | 
			
		||||
        # opt = opt_0  # reset settings
 | 
			
		||||
        opt.weights = wdir + 'backbone.pt'  # assign backbone
 | 
			
		||||
        opt.prebias = False  # disable prebias
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    parser = argparse.ArgumentParser()
 | 
			
		||||
    parser.add_argument('--epochs', type=int, default=273)  # 500200 batches at bs 16, 117263 COCO images = 273 epochs
 | 
			
		||||
| 
						 | 
				
			
			@ -444,7 +435,6 @@ if __name__ == '__main__':
 | 
			
		|||
        except:
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        prebias()  # optional
 | 
			
		||||
        train()  # train normally
 | 
			
		||||
 | 
			
		||||
    else:  # Evolve hyperparameters (optional)
 | 
			
		||||
| 
						 | 
				
			
			@ -483,7 +473,6 @@ if __name__ == '__main__':
 | 
			
		|||
                hyp[k] = np.clip(hyp[k], v[0], v[1])
 | 
			
		||||
 | 
			
		||||
            # Train mutation
 | 
			
		||||
            prebias()
 | 
			
		||||
            results = train()
 | 
			
		||||
 | 
			
		||||
            # Write mutation results
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,7 +8,9 @@
 | 
			
		|||
#t=ultralytics/yolov3:v199 && sudo docker pull $t && sudo nvidia-docker run -it --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t python3 train.py --data coco2014.data --img-size 672 --epochs 10 --batch 16 --accum 4 --weights '' --arc defaultpw --device 0 --multi
 | 
			
		||||
 | 
			
		||||
while true; do
 | 
			
		||||
  python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --bucket ult/wer --evolve --device $1 --cfg yolov3-tiny-3cls.cfg
 | 
			
		||||
  python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --bucket ult/wer --evolve --device $1 --cfg yolov3-tiny-3cls.cfg --cache
 | 
			
		||||
  # python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --bucket ult/athena --evolve --device $1
 | 
			
		||||
 | 
			
		||||
  # python3 train.py --data coco2014.data --img-size 640 --epochs 10 --batch 22 --accum 3 --evolve --weights '' --arc defaultpw --pre --bucket yolov4/640ms_coco2014_10e --device $1 --multi
 | 
			
		||||
  # python3 train.py --data coco2014.data --img-size 320 --epochs 27 --batch 64 --accum 1 --evolve --weights '' --arc defaultpw --pre --bucket yolov4/320_coco2014_27e --device $1
 | 
			
		||||
done
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -40,12 +40,13 @@ python3 test.py --save-json
 | 
			
		|||
# Evolve
 | 
			
		||||
t=ultralytics/yolov3:v206
 | 
			
		||||
sudo docker kill $(sudo docker ps -a -q --filter ancestor=$t)
 | 
			
		||||
for i in 0 1 2 3 4 5 6 7
 | 
			
		||||
for i in 0 1 2 3
 | 
			
		||||
do
 | 
			
		||||
  sudo docker pull $t && sudo nvidia-docker run -d --ipc=host -v "$(pwd)"/data:/usr/src/data $t bash utils/evolve.sh $i
 | 
			
		||||
  sudo docker pull $t && sudo docker run --gpus all -d --ipc=host -v "$(pwd)"/data:/usr/src/data $t bash utils/evolve.sh $i
 | 
			
		||||
  # sudo docker pull $t && sudo docker run --gpus all -d --ipc=host -v "$(pwd)"/out:/usr/src/out $t bash utils/evolve.sh $i
 | 
			
		||||
  # sudo docker pull $t && sudo nvidia-docker run -d --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t bash utils/evolve.sh $i
 | 
			
		||||
  # sudo docker pull $t && sudo nvidia-docker run -d --ipc=host -v /mnt/disks/nvme0n1/coco:/usr/src/coco $t bash utils/evolve.sh $i
 | 
			
		||||
  sleep 1
 | 
			
		||||
  sleep 120
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -257,6 +258,7 @@ n=198 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo nvidia-docker r
 | 
			
		|||
n=199 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo nvidia-docker run --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 100 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --bucket ultralytics/athena --name $n --device 0
 | 
			
		||||
n=200 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo nvidia-docker run --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 100 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --bucket ultralytics/athena --name $n --device 6
 | 
			
		||||
n=207 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo nvidia-docker run --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 100 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --bucket ultralytics/athena --name $n --device 7
 | 
			
		||||
n=208 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all --ipc=host -v "$(pwd)"/out:/usr/src/out $t python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --arc defaultpw --pre --multi --device 0
 | 
			
		||||
 | 
			
		||||
# sm4
 | 
			
		||||
n=201 && t=ultralytics/yolov3:v201 && sudo docker pull $t && sudo nvidia-docker run -d --ipc=host -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 1000 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --bucket ult/wer --name $n --device 0 --cfg yolov3-tiny-3cls.cfg
 | 
			
		||||
| 
						 | 
				
			
			@ -265,3 +267,4 @@ n=203 && t=ultralytics/yolov3:v201 && sudo docker pull $t && sudo nvidia-docker
 | 
			
		|||
n=204 && t=ultralytics/yolov3:v202 && sudo docker pull $t && sudo nvidia-docker run -d --ipc=host -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 1000 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --bucket ult/wer --name $n --device 3 --cfg yolov3-tiny-3cls-sm4.cfg
 | 
			
		||||
n=205 && t=ultralytics/yolov3:v202 && sudo docker pull $t && sudo nvidia-docker run -it --ipc=host -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 1000 --batch 64 --accum 1 --weights '' --arc defaultpw --pre --multi --bucket ult/wer --name $n --device 4 --cfg yolov3-tiny-3cls-sm4.cfg
 | 
			
		||||
 | 
			
		||||
n=206 && t=ultralytics/yolov3:v$n && sudo docker pull $t && sudo docker run --gpus all -it --ipc=host -v "$(pwd)"/data:/usr/src/data $t python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.pt --arc defaultpw --pre --multi --notest --nosave --cache --device 0 --cfg yolov3-tiny-3cls.cfg
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -633,7 +633,7 @@ def get_yolo_layers(model):
 | 
			
		|||
 | 
			
		||||
def print_model_biases(model):
 | 
			
		||||
    # prints the bias neurons preceding each yolo layer
 | 
			
		||||
    print('\nModel Bias Summary (per output layer):')
 | 
			
		||||
    print('\nModel Bias Summary:')
 | 
			
		||||
    multi_gpu = type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
 | 
			
		||||
    for l in model.yolo_layers:  # print pretrained biases
 | 
			
		||||
        if multi_gpu:
 | 
			
		||||
| 
						 | 
				
			
			@ -642,7 +642,7 @@ def print_model_biases(model):
 | 
			
		|||
        else:
 | 
			
		||||
            na = model.module_list[l].na
 | 
			
		||||
            b = model.module_list[l - 1][0].bias.view(na, -1)  # bias 3x85
 | 
			
		||||
        print('regression: %5.2f+/-%-5.2f ' % (b[:, :4].mean(), b[:, :4].std()),
 | 
			
		||||
        print('layer %3g regression: %5.2f+/-%-5.2f ' % (l, b[:, :4].mean(), b[:, :4].std()),
 | 
			
		||||
              'objectness: %5.2f+/-%-5.2f ' % (b[:, 4].mean(), b[:, 4].std()),
 | 
			
		||||
              'classification: %5.2f+/-%-5.2f' % (b[:, 5:].mean(), b[:, 5:].std()))
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue