updates
This commit is contained in:
parent
1cde55f7c9
commit
3cfc84a183
|
@ -86,10 +86,11 @@ https://cloud.google.com/deep-learning-vm/
|
|||
GPUs | `batch_size` | batch time | epoch time | epoch cost
|
||||
--- |---| --- | --- | ---
|
||||
1 K80 | 64 (32x2) | 2.9s | 175min | $0.58
|
||||
1 T4 | 64 (32x2) | 0.8s | 49min | $0.29
|
||||
1 T4 | 64 (32x2) | 0.80s | 49min | $0.29
|
||||
2 T4 | 64 (64x1) | 0.52s | 32min | $0.36
|
||||
1 2080ti | 64 (32x2) | - | - | -
|
||||
1 V100 | 64 (32x2) | 0.38s | 23min | $0.31
|
||||
2 V100 | 64 (64x1) | 0.38s | 23min | $0.62
|
||||
2 V100 | 64 (64x1) | 0.30s | 18min | $0.46
|
||||
|
||||
# Inference
|
||||
|
||||
|
|
40
train.py
40
train.py
|
@ -3,6 +3,7 @@ import time
|
|||
|
||||
import torch.optim as optim
|
||||
import torch.optim.lr_scheduler as lr_scheduler
|
||||
import torch.distributed as dist
|
||||
|
||||
import test # import test.py to get mAP after each epoch
|
||||
from models import *
|
||||
|
@ -10,6 +11,12 @@ from utils.datasets import *
|
|||
from utils.utils import *
|
||||
from utils.adabound import *
|
||||
|
||||
mixed_precision = True
|
||||
try: # Mixed precision training https://github.com/NVIDIA/apex
|
||||
from apex import amp
|
||||
except: # not installed: install help: https://github.com/NVIDIA/apex/issues/259
|
||||
mixed_precision = False
|
||||
|
||||
# 320 --epochs 1
|
||||
# 0.109 0.297 0.15 0.126 7.04 1.666 4.062 0.1845 42.6 3.34 12.61 8.338 0.2705 0.001 -4 0.9 0.0005 a 320 giou + best_anchor False
|
||||
# 0.223 0.218 0.138 0.189 9.28 1.153 4.376 0.08263 24.28 3.05 20.93 2.842 0.2759 0.001357 -5.036 0.9158 0.0005722 b mAP/F1 - 50/50 weighting
|
||||
|
@ -152,6 +159,18 @@ def train(cfg,
|
|||
# plt.tight_layout()
|
||||
# plt.savefig('LR.png', dpi=300)
|
||||
|
||||
# Mixed precision training https://github.com/NVIDIA/apex
|
||||
if mixed_precision:
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
|
||||
|
||||
# Initialize distributed training
|
||||
if torch.cuda.device_count() > 1:
|
||||
dist.init_process_group(backend='nccl', # 'distributed backend'
|
||||
init_method='tcp://127.0.0.1:9999', # distributed training init method
|
||||
world_size=1, # number of nodes for distributed training
|
||||
rank=0) # distributed training node rank
|
||||
model = torch.nn.parallel.DistributedDataParallel(model)
|
||||
|
||||
# Dataset
|
||||
dataset = LoadImagesAndLabels(train_path,
|
||||
img_size,
|
||||
|
@ -160,16 +179,6 @@ def train(cfg,
|
|||
hyp=hyp, # augmentation hyperparameters
|
||||
rect=opt.rect) # rectangular training
|
||||
|
||||
# Initialize distributed training
|
||||
if torch.cuda.device_count() > 1:
|
||||
torch.distributed.init_process_group(backend='nccl', # 'distributed backend'
|
||||
init_method='tcp://127.0.0.1:9999', # distributed training init method
|
||||
world_size=1, # number of nodes for distributed training
|
||||
rank=0) # distributed training node rank
|
||||
|
||||
model = torch.nn.parallel.DistributedDataParallel(model)
|
||||
# sampler = torch.utils.data.distributed.DistributedSampler(dataset)
|
||||
|
||||
# Dataloader
|
||||
dataloader = torch.utils.data.DataLoader(dataset,
|
||||
batch_size=batch_size,
|
||||
|
@ -178,15 +187,6 @@ def train(cfg,
|
|||
pin_memory=True,
|
||||
collate_fn=dataset.collate_fn)
|
||||
|
||||
# Mixed precision training https://github.com/NVIDIA/apex
|
||||
mixed_precision = True
|
||||
if mixed_precision:
|
||||
try:
|
||||
from apex import amp
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
|
||||
except: # not installed: install help: https://github.com/NVIDIA/apex/issues/259
|
||||
mixed_precision = False
|
||||
|
||||
# Start training
|
||||
model.hyp = hyp # attach hyperparameters to model
|
||||
# model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
|
||||
|
@ -315,7 +315,7 @@ def train(cfg,
|
|||
|
||||
# Report time
|
||||
print('%g epochs completed in %.3f hours.' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
|
||||
del model, optimizer, loss, dataset, dataloader, scheduler
|
||||
dist.destroy_process_group() if torch.cuda.is_available() else None
|
||||
torch.cuda.empty_cache()
|
||||
return results
|
||||
|
||||
|
|
Loading…
Reference in New Issue