updates
This commit is contained in:
		
							parent
							
								
									4e8e39da93
								
							
						
					
					
						commit
						135b38e9ba
					
				
							
								
								
									
										57
									
								
								train.py
								
								
								
								
							
							
						
						
									
										57
									
								
								train.py
								
								
								
								
							|  | @ -62,12 +62,13 @@ def train(): | ||||||
|     epochs = opt.epochs  # 500200 batches at bs 16, 117263 images = 273 epochs |     epochs = opt.epochs  # 500200 batches at bs 16, 117263 images = 273 epochs | ||||||
|     batch_size = opt.batch_size |     batch_size = opt.batch_size | ||||||
|     accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64 |     accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64 | ||||||
|  |     weights = opt.weights  # initial training weights | ||||||
| 
 | 
 | ||||||
|     # Initialize |     # Initialize | ||||||
|     init_seeds() |     init_seeds() | ||||||
|     weights = 'weights' + os.sep |     wdir = 'weights' + os.sep  # weights dir | ||||||
|     last = weights + 'last.pt' |     last = wdir + 'last.pt' | ||||||
|     best = weights + 'best.pt' |     best = wdir + 'best.pt' | ||||||
|     device = torch_utils.select_device(apex=mixed_precision) |     device = torch_utils.select_device(apex=mixed_precision) | ||||||
|     multi_scale = opt.multi_scale |     multi_scale = opt.multi_scale | ||||||
| 
 | 
 | ||||||
|  | @ -94,26 +95,23 @@ def train(): | ||||||
|     cutoff = -1  # backbone reaches to cutoff layer |     cutoff = -1  # backbone reaches to cutoff layer | ||||||
|     start_epoch = 0 |     start_epoch = 0 | ||||||
|     best_fitness = 0. |     best_fitness = 0. | ||||||
|     nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters'])  # yolo layer size (i.e. 255) |     if weights.endswith('.pt'):  # pytorch format | ||||||
|     if opt.resume or opt.transfer:  # Load previously saved model |         # possible weights are 'last.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc. | ||||||
|         if opt.transfer:  # Transfer learning |         if opt.bucket: | ||||||
|             chkpt = torch.load(weights + 'yolov3-spp.pt', map_location=device) |             os.system('gsutil cp gs://%s/last.pt %s' % (opt.bucket, last))  # download from bucket | ||||||
|             model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != 255}, |         chkpt = torch.load(weights, map_location=device) | ||||||
|                                   strict=False) |  | ||||||
| 
 | 
 | ||||||
|             for p in model.parameters(): |         # load model | ||||||
|                 p.requires_grad = True if p.shape[0] == nf else False |         if opt.transfer: | ||||||
| 
 |             chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} | ||||||
|         else:  # resume from last.pt |         model.load_state_dict(chkpt['model'], strict=False) | ||||||
|             if opt.bucket: |  | ||||||
|                 os.system('gsutil cp gs://%s/last.pt %s' % (opt.bucket, last))  # download from bucket |  | ||||||
|             chkpt = torch.load(last, map_location=device)  # load checkpoint |  | ||||||
|             model.load_state_dict(chkpt['model']) |  | ||||||
| 
 | 
 | ||||||
|  |         # load optimizer | ||||||
|         if chkpt['optimizer'] is not None: |         if chkpt['optimizer'] is not None: | ||||||
|             optimizer.load_state_dict(chkpt['optimizer']) |             optimizer.load_state_dict(chkpt['optimizer']) | ||||||
|             best_fitness = chkpt['best_fitness'] |             best_fitness = chkpt['best_fitness'] | ||||||
| 
 | 
 | ||||||
|  |         # load results | ||||||
|         if chkpt.get('training_results') is not None: |         if chkpt.get('training_results') is not None: | ||||||
|             with open('results.txt', 'w') as file: |             with open('results.txt', 'w') as file: | ||||||
|                 file.write(chkpt['training_results'])  # write results.txt |                 file.write(chkpt['training_results'])  # write results.txt | ||||||
|  | @ -121,15 +119,14 @@ def train(): | ||||||
|         start_epoch = chkpt['epoch'] + 1 |         start_epoch = chkpt['epoch'] + 1 | ||||||
|         del chkpt |         del chkpt | ||||||
| 
 | 
 | ||||||
|     else:  # Initialize model with backbone (optional) |     elif weights.endswith('.weights'):  # darknet format | ||||||
|         if '-tiny.cfg' in cfg: |         # possible weights are 'yolov3.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc. | ||||||
|             cutoff = load_darknet_weights(model, weights + 'yolov3-tiny.conv.15') |         cutoff = load_darknet_weights(model, weights) | ||||||
|         else: |  | ||||||
|             cutoff = load_darknet_weights(model, weights + 'darknet53.conv.74') |  | ||||||
| 
 | 
 | ||||||
|         # Remove old results |     if opt.transfer:  # transfer learning | ||||||
|         for f in glob.glob('*_batch*.jpg') + glob.glob('results.txt'): |         nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters'])  # yolo layer size (i.e. 255) | ||||||
|             os.remove(f) |         for p in model.parameters(): | ||||||
|  |             p.requires_grad = True if p.shape[0] == nf else False | ||||||
| 
 | 
 | ||||||
|     # Scheduler https://github.com/ultralytics/yolov3/issues/238 |     # Scheduler https://github.com/ultralytics/yolov3/issues/238 | ||||||
|     # lf = lambda x: 1 - x / epochs  # linear ramp to zero |     # lf = lambda x: 1 - x / epochs  # linear ramp to zero | ||||||
|  | @ -181,6 +178,10 @@ def train(): | ||||||
|                                              pin_memory=True, |                                              pin_memory=True, | ||||||
|                                              collate_fn=dataset.collate_fn) |                                              collate_fn=dataset.collate_fn) | ||||||
| 
 | 
 | ||||||
|  |     # Remove previous results | ||||||
|  |     for f in glob.glob('*_batch*.jpg') + glob.glob('results.txt'): | ||||||
|  |         os.remove(f) | ||||||
|  | 
 | ||||||
|     # Start training |     # Start training | ||||||
|     model.nc = nc  # attach number of classes to model |     model.nc = nc  # attach number of classes to model | ||||||
|     model.hyp = hyp  # attach hyperparameters to model |     model.hyp = hyp  # attach hyperparameters to model | ||||||
|  | @ -327,7 +328,7 @@ def train(): | ||||||
| 
 | 
 | ||||||
|             # Save backup every 10 epochs (optional) |             # Save backup every 10 epochs (optional) | ||||||
|             if epoch > 0 and epoch % 10 == 0: |             if epoch > 0 and epoch % 10 == 0: | ||||||
|                 torch.save(chkpt, weights + 'backup%g.pt' % epoch) |                 torch.save(chkpt, wdir + 'backup%g.pt' % epoch) | ||||||
| 
 | 
 | ||||||
|             # Delete checkpoint |             # Delete checkpoint | ||||||
|             del chkpt |             del chkpt | ||||||
|  | @ -345,7 +346,7 @@ if __name__ == '__main__': | ||||||
|     parser.add_argument('--epochs', type=int, default=273)  # 500200 batches at bs 16, 117263 images = 273 epochs |     parser.add_argument('--epochs', type=int, default=273)  # 500200 batches at bs 16, 117263 images = 273 epochs | ||||||
|     parser.add_argument('--batch-size', type=int, default=32)  # effective bs = batch_size * accumulate = 16 * 4 = 64 |     parser.add_argument('--batch-size', type=int, default=32)  # effective bs = batch_size * accumulate = 16 * 4 = 64 | ||||||
|     parser.add_argument('--accumulate', type=int, default=2, help='batches to accumulate before optimizing') |     parser.add_argument('--accumulate', type=int, default=2, help='batches to accumulate before optimizing') | ||||||
|     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp-1cls.cfg', help='cfg file path') | ||||||
|     parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path') |     parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path') | ||||||
|     parser.add_argument('--multi-scale', action='store_true', help='adjust (67% - 150%) img_size every 10 batches') |     parser.add_argument('--multi-scale', action='store_true', help='adjust (67% - 150%) img_size every 10 batches') | ||||||
|     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') | ||||||
|  | @ -358,7 +359,9 @@ if __name__ == '__main__': | ||||||
|     parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') |     parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') | ||||||
|     parser.add_argument('--img-weights', action='store_true', help='select training images by weight') |     parser.add_argument('--img-weights', action='store_true', help='select training images by weight') | ||||||
|     parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') |     parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') | ||||||
|  |     parser.add_argument('--weights', type=str, default='', help='initial weights')  # i.e. weights/darknet.53.conv.74 | ||||||
|     opt = parser.parse_args() |     opt = parser.parse_args() | ||||||
|  |     opt.weights = 'weights/last.pt' if opt.resume else opt.weights | ||||||
|     print(opt) |     print(opt) | ||||||
| 
 | 
 | ||||||
|     tb_writer = None |     tb_writer = None | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue