Merge remote-tracking branch 'origin/master'

This commit is contained in:
Glenn Jocher 2019-07-29 00:45:37 +02:00
commit 3a7711856e
10 changed files with 792 additions and 199 deletions

View File

@ -5,7 +5,7 @@
</td> </td>
<td align="center"> <td align="center">
<a href="https://www.ultralytics.com" target="_blank"> <a href="https://www.ultralytics.com" target="_blank">
<img src="https://storage.googleapis.com/ultralytics/logo/logoname1000.png" width="200"></a> <img src="https://storage.googleapis.com/ultralytics/logo/logoname1000.png" width="160"></a>
<img src="https://user-images.githubusercontent.com/26833433/61591093-2b4d4480-abc2-11e9-8b46-d88eb1dabba1.jpg"> <img src="https://user-images.githubusercontent.com/26833433/61591093-2b4d4480-abc2-11e9-8b46-d88eb1dabba1.jpg">
<a href="https://itunes.apple.com/app/id1452689527" target="_blank"> <a href="https://itunes.apple.com/app/id1452689527" target="_blank">
<img src="https://user-images.githubusercontent.com/26833433/50044365-9b22ac00-0082-11e9-862f-e77aee7aa7b0.png" width="180"></a> <img src="https://user-images.githubusercontent.com/26833433/50044365-9b22ac00-0082-11e9-862f-e77aee7aa7b0.png" width="180"></a>
@ -43,8 +43,7 @@ Python 3.7 or later with the following `pip3 install -U -r requirements.txt` pac
# Jupyter Notebook # Jupyter Notebook
A jupyter notebook with training, inference and testing examples is available at: Our Jupyter [notebook](https://colab.research.google.com/github/ultralytics/yolov3/blob/master/examples.ipynb) provides quick training, inference and testing examples.
https://colab.research.google.com/drive/1G8T-VFxQkjDe4idzN8F-hbIBqkkkQnxw
# Training # Training
@ -87,10 +86,11 @@ https://cloud.google.com/deep-learning-vm/
GPUs | `batch_size` | batch time | epoch time | epoch cost GPUs | `batch_size` | batch time | epoch time | epoch cost
--- |---| --- | --- | --- --- |---| --- | --- | ---
1 K80 | 64 (32x2) | 2.9s | 175min | $0.58 1 K80 | 64 (32x2) | 2.9s | 175min | $0.58
1 T4 | 64 (32x2) | 0.8s | 49min | $0.29 1 T4 | 64 (32x2) | 0.80s | 49min | $0.29
2 T4 | 64 (64x1) | 0.52s | 32min | $0.36
1 2080ti | 64 (32x2) | - | - | - 1 2080ti | 64 (32x2) | - | - | -
1 V100 | 64 (32x2) | 0.38s | 23min | $0.31 1 V100 | 64 (32x2) | 0.38s | 23min | $0.31
2 V100 | 64 (64x1) | 0.38s | 23min | $0.62 2 V100 | 64 (64x1) | 0.30s | 18min | $0.46
# Inference # Inference

340
examples.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,3 @@
import os
import torch.nn.functional as F
from utils.parse_config import * from utils.parse_config import *
from utils.utils import * from utils.utils import *
@ -145,6 +141,7 @@ class YOLOLayer(nn.Module):
return torch.cat((xy / ngu, wh, p_conf, p_cls), 2).squeeze().t() return torch.cat((xy / ngu, wh, p_conf, p_cls), 2).squeeze().t()
else: # inference else: # inference
# s = 1.5 # scale_xy (pxy = pxy * s - (s - 1) / 2)
io = p.clone() # inference output io = p.clone() # inference output
io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy
io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method

View File

@ -1,7 +1,7 @@
# pip3 install -U -r requirements.txt # pip3 install -U -r requirements.txt
# conda install numpy opencv matplotlib tqdm pillow # conda install numpy opencv matplotlib tqdm pillow
# conda install pytorch torchvision -c pytorch # conda install pytorch torchvision -c pytorch
# conda install -c conda-forge scikit-image # conda install scikit-image -c conda-forge
numpy numpy
opencv-python opencv-python
torch >= 1.1.0 torch >= 1.1.0

254
train.py
View File

@ -4,19 +4,29 @@ import time
import torch.distributed as dist import torch.distributed as dist
import torch.optim as optim import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import DataLoader
import test # import test.py to get mAP after each epoch import test # import test.py to get mAP after each epoch
from models import * from models import *
from utils.adabound import *
from utils.datasets import * from utils.datasets import *
from utils.utils import * from utils.utils import *
mixed_precision = True
try: # Mixed precision training https://github.com/NVIDIA/apex
from apex import amp
except: # not installed: install help: https://github.com/NVIDIA/apex/issues/259
mixed_precision = False
# 320 --epochs 1 # 320 --epochs 1
# 0.109 0.297 0.15 0.126 7.04 1.666 4.062 0.1845 42.6 3.34 12.61 8.338 0.2705 0.001 -4 0.9 0.0005 a 320 giou + best_anchor False # 0.109 0.297 0.150 0.126 7.04 1.666 4.062 0.1845 42.6 3.34 12.61 8.338 0.2705 0.001 -4 0.9 0.0005 a 320 giou + best_anchor False
# 0.223 0.218 0.138 0.189 9.28 1.153 4.376 0.08263 24.28 3.05 20.93 2.842 0.2759 0.001357 -5.036 0.9158 0.0005722 b mAP/F1 - 50/50 weighting # 0.223 0.218 0.138 0.189 9.28 1.153 4.376 0.08263 24.28 3.05 20.93 2.842 0.2759 0.001357 -5.036 0.9158 0.0005722 b mAP/F1 - 50/50 weighting
# 0.231 0.215 0.135 0.191 9.51 1.432 3.007 0.06082 24.87 3.477 24.13 2.802 0.3436 0.001127 -5.036 0.9232 0.0005874 c # 0.231 0.215 0.135 0.191 9.51 1.432 3.007 0.06082 24.87 3.477 24.13 2.802 0.3436 0.001127 -5.036 0.9232 0.0005874 c
# 0.246 0.194 0.128 0.192 8.12 1.101 3.954 0.0817 22.83 3.967 19.83 1.779 0.3352 0.000895 -5.036 0.9238 0.0007973 d # 0.246 0.194 0.128 0.192 8.12 1.101 3.954 0.0817 22.83 3.967 19.83 1.779 0.3352 0.000895 -5.036 0.9238 0.0007973 d
# 0.187 0.237 0.144 0.186 14.6 1.607 4.202 0.09439 39.27 3.726 31.26 2.634 0.273 0.001542 -5.036 0.8364 0.0008393 e # 0.187 0.237 0.144 0.186 14.6 1.607 4.202 0.09439 39.27 3.726 31.26 2.634 0.273 0.001542 -5.036 0.8364 0.0008393 e
# 0.250 0.217 0.136 0.195 3.3 1.2 2 0.604 15.7 3.67 20 1.36 0.194 0.00128 -4 0.95 0.000201 0.8 0.388 1.2 0.119 0.0589 0.401 f
# 0.269 0.225 0.149 0.218 6.71 1.13 5.25 0.246 22.4 3.64 17.8 1.31 0.256 0.00146 -4 0.936 0.00042 0.123 0.18 1.81 0.0987 0.0788 0.441 g
# 0.179 0.274 0.165 0.187 7.95 1.22 7.62 0.224 17 5.71 17.7 3.28 0.295 0.00136 -4 0.875 0.000319 0.131 0.208 2.14 0.14 0.0773 0.228 h
# 0.296 0.228 0.152 0.220 5.18 1.43 4.27 0.265 11.7 4.81 11.5 1.56 0.281 0.0013 -4 0.944 0.000427 0.0599 0.142 1.03 0.0552 0.0555 0.434 i
# 320 --epochs 2 # 320 --epochs 2
# 0.242 0.296 0.196 0.231 5.67 0.8541 4.286 0.1539 21.61 1.957 22.9 2.894 0.3689 0.001844 -4 0.913 0.000467 # ha 0.417 mAP @ epoch 100 # 0.242 0.296 0.196 0.231 5.67 0.8541 4.286 0.1539 21.61 1.957 22.9 2.894 0.3689 0.001844 -4 0.913 0.000467 # ha 0.417 mAP @ epoch 100
@ -25,40 +35,45 @@ from utils.utils import *
# 0.161 0.327 0.190 0.193 7.82 1.153 4.062 0.1845 24.28 3.05 20.93 2.842 0.2759 0.001357 -4 0.916 0.000572 # hd 0.438 mAP @ epoch 100 # 0.161 0.327 0.190 0.193 7.82 1.153 4.062 0.1845 24.28 3.05 20.93 2.842 0.2759 0.001357 -4 0.916 0.000572 # hd 0.438 mAP @ epoch 100
# Training hyperparameters d # Training hyperparameters g
hyp = {'giou': 1.153, # giou loss gain # hyp = {'giou': 1.13, # giou loss gain
'xy': 4.062, # xy loss gain # 'xy': 5.25, # xy loss gain
'wh': 0.1845, # wh loss gain # 'wh': 0.246, # wh loss gain
'cls': 24.28, # cls loss gain # 'cls': 22.4, # cls loss gain
'cls_pw': 3.05, # cls BCELoss positive_weight # 'cls_pw': 3.64, # cls BCELoss positive_weight
'obj': 20.93, # obj loss gain # 'obj': 17.8, # obj loss gain
'obj_pw': 2.842, # obj BCELoss positive_weight # 'obj_pw': 1.31, # obj BCELoss positive_weight
'iou_t': 0.2759, # iou training threshold # 'iou_t': 0.256, # iou training threshold
'lr0': 0.001357, # initial learning rate # 'lr0': 0.00146, # initial learning rate
'lrf': -4., # final LambdaLR learning rate = lr0 * (10 ** lrf)
'momentum': 0.916, # SGD momentum
'weight_decay': 0.0000572, # optimizer weight decay
'hsv_s': 0.5, # image HSV-Saturation augmentation (fraction)
'hsv_v': 0.5, # image HSV-Value augmentation (fraction)
'degrees': 5, # image rotation (+/- deg)
'translate': 0.1, # image translation (+/- fraction)
'scale': 0.1, # image scale (+/- gain)
'shear': 2} # image shear (+/- deg)
# # Training hyperparameters e
# hyp = {'giou': 1.607, # giou loss gain
# 'xy': 4.062, # xy loss gain
# 'wh': 0.1845, # wh loss gain
# 'cls': 39.27, # cls loss gain
# 'cls_pw': 3.726, # cls BCELoss positive_weight
# 'obj': 31.26, # obj loss gain
# 'obj_pw': 2.634, # obj BCELoss positive_weight
# 'iou_t': 0.273, # iou target-anchor training threshold
# 'lr0': 0.001542, # initial learning rate
# 'lrf': -4., # final LambdaLR learning rate = lr0 * (10 ** lrf) # 'lrf': -4., # final LambdaLR learning rate = lr0 * (10 ** lrf)
# 'momentum': 0.8364, # SGD momentum # 'momentum': 0.936, # SGD momentum
# 'weight_decay': 0.0008393} # optimizer weight decay # 'weight_decay': 0.00042, # optimizer weight decay
# 'hsv_s': 0.123, # image HSV-Saturation augmentation (fraction)
# 'hsv_v': 0.18, # image HSV-Value augmentation (fraction)
# 'degrees': 1.81, # image rotation (+/- deg)
# 'translate': 0.0987, # image translation (+/- fraction)
# 'scale': 0.0788, # image scale (+/- gain)
# 'shear': 0.441} # image shear (+/- deg)
# Training hyperparameters i
hyp = {'giou': 1.43, # giou loss gain
'xy': 4.27, # xy loss gain
'wh': 0.265, # wh loss gain
'cls': 11.7, # cls loss gain
'cls_pw': 4.81, # cls BCELoss positive_weight
'obj': 11.5, # obj loss gain
'obj_pw': 1.56, # obj BCELoss positive_weight
'iou_t': 0.281, # iou training threshold
'lr0': 0.0013, # initial learning rate
'lrf': -4., # final LambdaLR learning rate = lr0 * (10 ** lrf)
'momentum': 0.944, # SGD momentum
'weight_decay': 0.000427, # optimizer weight decay
'hsv_s': 0.0599, # image HSV-Saturation augmentation (fraction)
'hsv_v': 0.142, # image HSV-Value augmentation (fraction)
'degrees': 1.03, # image rotation (+/- deg)
'translate': 0.0552, # image translation (+/- fraction)
'scale': 0.0555, # image scale (+/- gain)
'shear': 0.434} # image shear (+/- deg)
def train(cfg, def train(cfg,
@ -66,13 +81,13 @@ def train(cfg,
img_size=416, img_size=416,
epochs=100, # 500200 batches at bs 16, 117263 images = 273 epochs epochs=100, # 500200 batches at bs 16, 117263 images = 273 epochs
batch_size=16, batch_size=16,
accumulate=4): # effective bs = batch_size * accumulate = 8 * 8 = 64 accumulate=4): # effective bs = batch_size * accumulate = 16 * 4 = 64
# Initialize # Initialize
init_seeds() init_seeds()
weights = 'weights' + os.sep weights = 'weights' + os.sep
last = weights + 'last.pt' last = weights + 'last.pt'
best = weights + 'best.pt' best = weights + 'best.pt'
device = torch_utils.select_device() device = torch_utils.select_device(apex=mixed_precision)
multi_scale = opt.multi_scale multi_scale = opt.multi_scale
if multi_scale: if multi_scale:
@ -89,11 +104,13 @@ def train(cfg,
model = Darknet(cfg).to(device) model = Darknet(cfg).to(device)
# Optimizer # Optimizer
optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], weight_decay=hyp['weight_decay']) optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], weight_decay=hyp['weight_decay'],
nesterov=True)
# optimizer = AdaBound(model.parameters(), lr=hyp['lr0'], final_lr=0.1)
cutoff = -1 # backbone reaches to cutoff layer cutoff = -1 # backbone reaches to cutoff layer
start_epoch = 0 start_epoch = 0
best_fitness = 0.0 best_fitness = 0.
if opt.resume or opt.transfer: # Load previously saved model if opt.resume or opt.transfer: # Load previously saved model
if opt.transfer: # Transfer learning if opt.transfer: # Transfer learning
nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255) nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255)
@ -136,7 +153,7 @@ def train(cfg,
# lf = lambda x: 10 ** (hyp['lrf'] * x / epochs) # exp ramp # lf = lambda x: 10 ** (hyp['lrf'] * x / epochs) # exp ramp
# lf = lambda x: 1 - 10 ** (hyp['lrf'] * (1 - x / epochs)) # inverse exp ramp # lf = lambda x: 1 - 10 ** (hyp['lrf'] * (1 - x / epochs)) # inverse exp ramp
# scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(opt.epochs * x) for x in (0.8, 0.9)], gamma=0.1) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(opt.epochs * x) for x in [0.8]], gamma=0.1)
scheduler.last_epoch = start_epoch - 1 scheduler.last_epoch = start_epoch - 1
# # Plot lr schedule # # Plot lr schedule
@ -150,6 +167,18 @@ def train(cfg,
# plt.tight_layout() # plt.tight_layout()
# plt.savefig('LR.png', dpi=300) # plt.savefig('LR.png', dpi=300)
# Mixed precision training https://github.com/NVIDIA/apex
if mixed_precision:
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
# Initialize distributed training
if torch.cuda.device_count() > 1:
dist.init_process_group(backend='nccl', # 'distributed backend'
init_method='tcp://127.0.0.1:9999', # distributed training init method
world_size=1, # number of nodes for distributed training
rank=0) # distributed training node rank
model = torch.nn.parallel.DistributedDataParallel(model)
# Dataset # Dataset
dataset = LoadImagesAndLabels(train_path, dataset = LoadImagesAndLabels(train_path,
img_size, img_size,
@ -158,32 +187,13 @@ def train(cfg,
hyp=hyp, # augmentation hyperparameters hyp=hyp, # augmentation hyperparameters
rect=opt.rect) # rectangular training rect=opt.rect) # rectangular training
# Initialize distributed training
if torch.cuda.device_count() > 1:
dist.init_process_group(backend='nccl', # 'distributed backend'
init_method='tcp://127.0.0.1:9999', # distributed training init method
world_size=1, # number of nodes for distributed training
rank=0) # distributed training node rank
model = torch.nn.parallel.DistributedDataParallel(model)
# sampler = torch.utils.data.distributed.DistributedSampler(dataset)
# Dataloader # Dataloader
dataloader = DataLoader(dataset, dataloader = torch.utils.data.DataLoader(dataset,
batch_size=batch_size, batch_size=batch_size,
num_workers=opt.num_workers, num_workers=opt.num_workers,
shuffle=not opt.rect, # Shuffle=True unless rectangular training is used shuffle=not opt.rect, # Shuffle=True unless rectangular training is used
pin_memory=True, pin_memory=True,
collate_fn=dataset.collate_fn) collate_fn=dataset.collate_fn)
# Mixed precision training https://github.com/NVIDIA/apex
mixed_precision = True
if mixed_precision:
try:
from apex import amp
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
except: # not installed: install help: https://github.com/NVIDIA/apex/issues/259
mixed_precision = False
# Start training # Start training
model.hyp = hyp # attach hyperparameters to model model.hyp = hyp # attach hyperparameters to model
@ -192,7 +202,7 @@ def train(cfg,
nb = len(dataloader) nb = len(dataloader)
maps = np.zeros(nc) # mAP per class maps = np.zeros(nc) # mAP per class
results = (0, 0, 0, 0, 0) # P, R, mAP, F1, test_loss results = (0, 0, 0, 0, 0) # P, R, mAP, F1, test_loss
n_burnin = min(round(nb / 5 + 1), 1000) # burn-in batches # n_burnin = min(round(nb / 5 + 1), 1000) # burn-in batches
t0 = time.time() t0 = time.time()
for epoch in range(start_epoch, epochs): for epoch in range(start_epoch, epochs):
model.train() model.train()
@ -234,11 +244,11 @@ def train(cfg,
plot_images(imgs=imgs, targets=targets, paths=paths, fname='train_batch%g.jpg' % i) plot_images(imgs=imgs, targets=targets, paths=paths, fname='train_batch%g.jpg' % i)
# SGD burn-in # SGD burn-in
if epoch == 0 and i <= n_burnin: # if epoch == 0 and i <= n_burnin:
g = (i / n_burnin) ** 4 # gain # g = (i / n_burnin) ** 4 # gain
for x in optimizer.param_groups: # for x in optimizer.param_groups:
x['lr'] = hyp['lr0'] * g # x['lr'] = hyp['lr0'] * g
x['weight_decay'] = hyp['weight_decay'] * g # x['weight_decay'] = hyp['weight_decay'] * g
# Run model # Run model
pred = model(imgs) pred = model(imgs)
@ -313,33 +323,11 @@ def train(cfg,
# Report time # Report time
print('%g epochs completed in %.3f hours.' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) print('%g epochs completed in %.3f hours.' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
del model, optimizer dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
torch.cuda.empty_cache()
return results return results
def print_mutation(hyp, results):
# Write mutation results
a = '%11s' * len(hyp) % tuple(hyp.keys()) # hyperparam keys
b = '%11.3g' * len(hyp) % tuple(hyp.values()) # hyperparam values
c = '%11.3g' * len(results) % results # results (P, R, mAP, F1, test_loss)
print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
if opt.bucket:
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt
with open('evolve.txt', 'a') as f: # append result
f.write(c + b + '\n')
x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0) # load unique rows
np.savetxt('evolve.txt', x[np.argsort(-fitness(x))], '%11.3g') # save sort by fitness
os.system('gsutil cp evolve.txt gs://%s' % opt.bucket) # upload evolve.txt
else:
with open('evolve.txt', 'a') as f:
f.write(c + b + '\n')
def fitness(x): # returns fitness of hyp evolution vectors
return x[:, 2] * 0.5 + x[:, 3] * 0.5 # fitness = weighted combination of mAP and F1
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type=int, default=100, help='number of epochs') parser.add_argument('--epochs', type=int, default=100, help='number of epochs')
@ -362,38 +350,38 @@ if __name__ == '__main__':
opt = parser.parse_args() opt = parser.parse_args()
print(opt) print(opt)
if opt.evolve: if not opt.evolve: # Train normally
results = train(opt.cfg,
opt.data,
img_size=opt.img_size,
epochs=opt.epochs,
batch_size=opt.batch_size,
accumulate=opt.accumulate)
else: # Evolve hyperparameters (optional)
opt.notest = True # only test final epoch opt.notest = True # only test final epoch
opt.nosave = True # only save final checkpoint opt.nosave = True # only save final checkpoint
if opt.bucket:
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
# Train for _ in range(1): # generations to evolve
results = train(opt.cfg, if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate
opt.data, # Get best hyperparameters
img_size=opt.img_size, x = np.loadtxt('evolve.txt', ndmin=2)
epochs=opt.epochs, x = x[fitness(x).argmax()] # select best fitness hyps
batch_size=opt.batch_size, for i, k in enumerate(hyp.keys()):
accumulate=opt.accumulate) hyp[k] = x[i + 5]
# Evolve hyperparameters (optional) # Mutate
if opt.evolve: init_seeds(seed=int(time.time()))
print_mutation(hyp, results) # Write mutation results s = [.15, .15, .15, .15, .15, .15, .15, .15, .15, .00, .05, .20, .20, .20, .20, .20, .20, .20] # sigmas
for _ in range(1000): # generations to evolve for i, k in enumerate(hyp.keys()):
# Get best hyperparameters x = (np.random.randn(1) * s[i] + 1) ** 2.0 # plt.hist(x.ravel(), 300)
x = np.loadtxt('evolve.txt', ndmin=2) hyp[k] *= float(x) # vary by sigmas
x = x[fitness(x).argmax()] # select best fitness hyps
for i, k in enumerate(hyp.keys()):
hyp[k] = x[i + 5]
# Mutate
init_seeds(seed=int(time.time()))
s = [.15, .15, .15, .15, .15, .15, .15, .15, .15, .00, .05, .20, .20, .20, .20, .20, .20, .20] # sigmas
for i, k in enumerate(hyp.keys()):
x = (np.random.randn(1) * s[i] + 1) ** 2.0 # plt.hist(x.ravel(), 300)
hyp[k] *= float(x) # vary by sigmas
# Clip to limits # Clip to limits
keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale'] keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale']
limits = [(1e-4, 1e-2), (0.00, 0.70), (0.60, 0.95), (0, 0.001), (0, .8), (0, .8), (0, .8), (0, .8)] limits = [(1e-4, 1e-2), (0.00, 0.70), (0.60, 0.97), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9)]
for k, v in zip(keys, limits): for k, v in zip(keys, limits):
hyp[k] = np.clip(hyp[k], v[0], v[1]) hyp[k] = np.clip(hyp[k], v[0], v[1])
@ -406,19 +394,7 @@ if __name__ == '__main__':
accumulate=opt.accumulate) accumulate=opt.accumulate)
# Write mutation results # Write mutation results
print_mutation(hyp, results) print_mutation(hyp, results, opt.bucket)
# # Plot results # Plot results
# import numpy as np # plot_evolution_results(hyp)
# import matplotlib.pyplot as plt
# a = np.loadtxt('evolve.txt')
# x = fitness(a)
# weights = (x - x.min()) ** 2
# fig = plt.figure(figsize=(10, 10))
# for i in range(len(hyp)):
# y = a[:, i + 5]
# mu = (y * weights).sum() / weights.sum()
# plt.subplot(4, 5, i + 1)
# plt.plot(x.max(), mu, 'o')
# plt.plot(x, y, '.')
# print(list(hyp.keys())[i], '%.4g' % mu)

236
utils/adabound.py Normal file
View File

@ -0,0 +1,236 @@
import math
import torch
from torch.optim import Optimizer
class AdaBound(Optimizer):
"""Implements AdaBound algorithm.
It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
Arguments:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float, optional): Adam learning rate (default: 1e-3)
betas (Tuple[float, float], optional): coefficients used for computing
running averages of gradient and its square (default: (0.9, 0.999))
final_lr (float, optional): final (SGD) learning rate (default: 0.1)
gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
eps (float, optional): term added to the denominator to improve
numerical stability (default: 1e-8)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
.. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
https://openreview.net/forum?id=Bkg3g2R9FX
"""
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
eps=1e-8, weight_decay=0, amsbound=False):
if not 0.0 <= lr:
raise ValueError("Invalid learning rate: {}".format(lr))
if not 0.0 <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps))
if not 0.0 <= betas[0] < 1.0:
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
if not 0.0 <= betas[1] < 1.0:
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
if not 0.0 <= final_lr:
raise ValueError("Invalid final learning rate: {}".format(final_lr))
if not 0.0 <= gamma < 1.0:
raise ValueError("Invalid gamma parameter: {}".format(gamma))
defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
weight_decay=weight_decay, amsbound=amsbound)
super(AdaBound, self).__init__(params, defaults)
self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
def __setstate__(self, state):
super(AdaBound, self).__setstate__(state)
for group in self.param_groups:
group.setdefault('amsbound', False)
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()
for group, base_lr in zip(self.param_groups, self.base_lrs):
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
if grad.is_sparse:
raise RuntimeError(
'Adam does not support sparse gradients, please consider SparseAdam instead')
amsbound = group['amsbound']
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
# Exponential moving average of gradient values
state['exp_avg'] = torch.zeros_like(p.data)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p.data)
if amsbound:
# Maintains max of all exp. moving avg. of sq. grad. values
state['max_exp_avg_sq'] = torch.zeros_like(p.data)
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
if amsbound:
max_exp_avg_sq = state['max_exp_avg_sq']
beta1, beta2 = group['betas']
state['step'] += 1
if group['weight_decay'] != 0:
grad = grad.add(group['weight_decay'], p.data)
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
if amsbound:
# Maintains the maximum of all 2nd moment running avg. till now
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
# Use the max. for normalizing running avg. of gradient
denom = max_exp_avg_sq.sqrt().add_(group['eps'])
else:
denom = exp_avg_sq.sqrt().add_(group['eps'])
bias_correction1 = 1 - beta1 ** state['step']
bias_correction2 = 1 - beta2 ** state['step']
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
# Applies bounds on actual learning rate
# lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
final_lr = group['final_lr'] * group['lr'] / base_lr
lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
step_size = torch.full_like(denom, step_size)
step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
p.data.add_(-step_size)
return loss
class AdaBoundW(Optimizer):
"""Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
Arguments:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float, optional): Adam learning rate (default: 1e-3)
betas (Tuple[float, float], optional): coefficients used for computing
running averages of gradient and its square (default: (0.9, 0.999))
final_lr (float, optional): final (SGD) learning rate (default: 0.1)
gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
eps (float, optional): term added to the denominator to improve
numerical stability (default: 1e-8)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
.. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
https://openreview.net/forum?id=Bkg3g2R9FX
"""
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
eps=1e-8, weight_decay=0, amsbound=False):
if not 0.0 <= lr:
raise ValueError("Invalid learning rate: {}".format(lr))
if not 0.0 <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps))
if not 0.0 <= betas[0] < 1.0:
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
if not 0.0 <= betas[1] < 1.0:
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
if not 0.0 <= final_lr:
raise ValueError("Invalid final learning rate: {}".format(final_lr))
if not 0.0 <= gamma < 1.0:
raise ValueError("Invalid gamma parameter: {}".format(gamma))
defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
weight_decay=weight_decay, amsbound=amsbound)
super(AdaBoundW, self).__init__(params, defaults)
self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
def __setstate__(self, state):
super(AdaBoundW, self).__setstate__(state)
for group in self.param_groups:
group.setdefault('amsbound', False)
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()
for group, base_lr in zip(self.param_groups, self.base_lrs):
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
if grad.is_sparse:
raise RuntimeError(
'Adam does not support sparse gradients, please consider SparseAdam instead')
amsbound = group['amsbound']
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
# Exponential moving average of gradient values
state['exp_avg'] = torch.zeros_like(p.data)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p.data)
if amsbound:
# Maintains max of all exp. moving avg. of sq. grad. values
state['max_exp_avg_sq'] = torch.zeros_like(p.data)
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
if amsbound:
max_exp_avg_sq = state['max_exp_avg_sq']
beta1, beta2 = group['betas']
state['step'] += 1
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
if amsbound:
# Maintains the maximum of all 2nd moment running avg. till now
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
# Use the max. for normalizing running avg. of gradient
denom = max_exp_avg_sq.sqrt().add_(group['eps'])
else:
denom = exp_avg_sq.sqrt().add_(group['eps'])
bias_correction1 = 1 - beta1 ** state['step']
bias_correction2 = 1 - beta2 ** state['step']
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
# Applies bounds on actual learning rate
# lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
final_lr = group['final_lr'] * group['lr'] / base_lr
lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
step_size = torch.full_like(denom, step_size)
step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
if group['weight_decay'] != 0:
decayed_weights = torch.mul(p.data, group['weight_decay'])
p.data.add_(-step_size)
p.data.sub_(decayed_weights)
else:
p.data.add_(-step_size)
return loss

View File

@ -8,9 +8,9 @@ from pathlib import Path
import cv2 import cv2
import numpy as np import numpy as np
import torch import torch
from PIL import Image, ExifTags
from torch.utils.data import Dataset from torch.utils.data import Dataset
from tqdm import tqdm from tqdm import tqdm
from PIL import Image, ExifTags
from utils.utils import xyxy2xywh, xywh2xyxy from utils.utils import xyxy2xywh, xywh2xyxy
@ -40,8 +40,6 @@ def exif_size(img):
class LoadImages: # for inference class LoadImages: # for inference
def __init__(self, path, img_size=416): def __init__(self, path, img_size=416):
self.height = img_size
files = [] files = []
if os.path.isdir(path): if os.path.isdir(path):
files = sorted(glob.glob('%s/*.*' % path)) files = sorted(glob.glob('%s/*.*' % path))
@ -52,6 +50,7 @@ class LoadImages: # for inference
videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats] videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
nI, nV = len(images), len(videos) nI, nV = len(images), len(videos)
self.img_size = img_size
self.files = images + videos self.files = images + videos
self.nF = nI + nV # number of files self.nF = nI + nV # number of files
self.video_flag = [False] * nI + [True] * nV self.video_flag = [False] * nI + [True] * nV
@ -96,7 +95,7 @@ class LoadImages: # for inference
print('image %g/%g %s: ' % (self.count, self.nF, path), end='') print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
# Padded resize # Padded resize
img, *_ = letterbox(img0, new_shape=self.height) img, *_ = letterbox(img0, new_shape=self.img_size)
# Normalize RGB # Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
@ -117,8 +116,10 @@ class LoadImages: # for inference
class LoadWebcam: # for inference class LoadWebcam: # for inference
def __init__(self, img_size=416): def __init__(self, img_size=416):
self.cam = cv2.VideoCapture(0) self.img_size = img_size
self.height = img_size self.cam = cv2.VideoCapture(0) # local camera
# self.cam = cv2.VideoCapture('rtsp://192.168.1.64/1') # IP camera
# self.cam = cv2.VideoCapture('rtsp://username:password@192.168.1.64/1') # IP camera with login
def __iter__(self): def __iter__(self):
self.count = -1 self.count = -1
@ -138,7 +139,7 @@ class LoadWebcam: # for inference
print('webcam %g: ' % self.count, end='') print('webcam %g: ' % self.count, end='')
# Padded resize # Padded resize
img, *_ = letterbox(img0, new_shape=self.height) img, *_ = letterbox(img0, new_shape=self.img_size)
# Normalize RGB # Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
@ -154,8 +155,7 @@ class LoadWebcam: # for inference
class LoadImagesAndLabels(Dataset): # for training/testing class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False): def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False):
with open(path, 'r') as f: with open(path, 'r') as f:
img_files = f.read().splitlines() self.img_files = [x for x in f.read().splitlines() if os.path.splitext(x)[-1].lower() in img_formats]
self.img_files = [x for x in img_files if os.path.splitext(x)[-1].lower() in img_formats]
n = len(self.img_files) n = len(self.img_files)
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
@ -405,10 +405,11 @@ def letterbox(img, new_shape=416, color=(128, 128, 128), mode='auto'):
new_unpad = (new_shape, new_shape) new_unpad = (new_shape, new_shape)
ratiow, ratioh = new_shape / shape[1], new_shape / shape[0] ratiow, ratioh = new_shape / shape[1], new_shape / shape[0]
if shape[::-1] != new_unpad:
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_AREA) # resize
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_AREA) # resized, no border img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square
return img, ratiow, ratioh, dw, dh return img, ratiow, ratioh, dw, dh

View File

@ -28,6 +28,12 @@ python3 detect.py
# Test # Test
python3 test.py --save-json python3 test.py --save-json
# Evolve
for i in {0..500}
do
python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4
done
# Git pull # Git pull
git pull https://github.com/ultralytics/yolov3 # master git pull https://github.com/ultralytics/yolov3 # master
git pull https://github.com/ultralytics/yolov3 test # branch git pull https://github.com/ultralytics/yolov3 test # branch

View File

@ -9,24 +9,19 @@ def init_seeds(seed=0):
# torch.backends.cudnn.deterministic = True # https://pytorch.org/docs/stable/notes/randomness.html # torch.backends.cudnn.deterministic = True # https://pytorch.org/docs/stable/notes/randomness.html
def select_device(force_cpu=False): def select_device(force_cpu=False, apex=False):
# apex if mixed precision training https://github.com/NVIDIA/apex
cuda = False if force_cpu else torch.cuda.is_available() cuda = False if force_cpu else torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu') device = torch.device('cuda:0' if cuda else 'cpu')
if not cuda: if not cuda:
print('Using CPU') print('Using CPU')
if cuda: if cuda:
try: # Mixed precision training https://github.com/NVIDIA/apex
from apex import amp
apex_str = 'with Apex '
except:
apex_str = ''
torch.backends.cudnn.benchmark = True # set False for reproducible results torch.backends.cudnn.benchmark = True # set False for reproducible results
c = 1024 ** 2 # bytes to MB c = 1024 ** 2 # bytes to MB
ng = torch.cuda.device_count() ng = torch.cuda.device_count()
x = [torch.cuda.get_device_properties(i) for i in range(ng)] x = [torch.cuda.get_device_properties(i) for i in range(ng)]
cuda_str = 'Using CUDA ' + apex_str cuda_str = 'Using CUDA ' + ('Apex ' if apex else '')
for i in range(0, ng): for i in range(0, ng):
if i == 1: if i == 1:
# torch.cuda.set_device(0) # OPTIONAL: Set GPU ID # torch.cuda.set_device(0) # OPTIONAL: Set GPU ID
@ -42,14 +37,12 @@ def fuse_conv_and_bn(conv, bn):
# https://tehnokv.com/posts/fusing-batchnorm-and-conv/ # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
with torch.no_grad(): with torch.no_grad():
# init # init
fusedconv = torch.nn.Conv2d( fusedconv = torch.nn.Conv2d(conv.in_channels,
conv.in_channels, conv.out_channels,
conv.out_channels, kernel_size=conv.kernel_size,
kernel_size=conv.kernel_size, stride=conv.stride,
stride=conv.stride, padding=conv.padding,
padding=conv.padding, bias=True)
bias=True
)
# prepare filters # prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1) w_conv = conv.weight.clone().view(conv.out_channels, -1)

View File

@ -1,5 +1,7 @@
import glob import glob
import os
import random import random
from pathlib import Path
import cv2 import cv2
import matplotlib import matplotlib
@ -9,7 +11,6 @@ import torch
import torch.nn as nn import torch.nn as nn
from PIL import Image from PIL import Image
from tqdm import tqdm from tqdm import tqdm
from pathlib import Path
from . import torch_utils # , google_utils from . import torch_utils # , google_utils
@ -303,12 +304,14 @@ def compute_loss(p, targets, model, giou_loss=True): # predictions, targets, mo
tobj[b, a, gj, gi] = 1.0 # obj tobj[b, a, gj, gi] = 1.0 # obj
# pi[..., 2:4] = torch.sigmoid(pi[..., 2:4]) # wh power loss (uncomment) # pi[..., 2:4] = torch.sigmoid(pi[..., 2:4]) # wh power loss (uncomment)
# s = 1.5 # scale_xy
pxy = torch.sigmoid(pi[..., 0:2]) # * s - (s - 1) / 2
if giou_loss: if giou_loss:
pbox = torch.cat((torch.sigmoid(pi[..., 0:2]), torch.exp(pi[..., 2:4]) * anchor_vec[i]), 1) # predicted pbox = torch.cat((pxy, torch.exp(pi[..., 2:4]) * anchor_vec[i]), 1) # predicted
giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True) # giou computation giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True) # giou computation
lxy += (k * h['giou']) * (1.0 - giou).mean() # giou loss lxy += (k * h['giou']) * (1.0 - giou).mean() # giou loss
else: else:
lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss lxy += (k * h['xy']) * MSE(pxy, txy[i]) # xy loss
lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i]) # wh yolo loss lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i]) # wh yolo loss
tclsm = torch.zeros_like(pi[..., 5:]) tclsm = torch.zeros_like(pi[..., 5:])
@ -542,23 +545,20 @@ def select_best_evolve(path='evolve*.txt'): # from utils.utils import *; select
print(file, x[fitness.argmax()]) print(file, x[fitness.argmax()])
def kmeans_targets(path='./data/coco_64img.txt'): # from utils.utils import *; kmeans_targets() def kmeans_targets(path='./data/coco_64img.txt', n=9, img_size=320): # from utils.utils import *; kmeans_targets()
# Produces a list of target kmeans suitable for use in *.cfg files
img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif']
with open(path, 'r') as f: with open(path, 'r') as f:
img_files = f.read().splitlines() img_files = [x for x in f.read().splitlines() if os.path.splitext(x)[-1].lower() in img_formats]
img_files = list(filter(lambda x: len(x) > 0, img_files))
# Read shapes # Read shapes
n = len(img_files) nf = len(img_files)
assert n > 0, 'No images found in %s' % path assert nf > 0, 'No images found in %s' % path
label_files = [x.replace('images', 'labels'). label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in img_files]
replace('.jpeg', '.txt').
replace('.jpg', '.txt').
replace('.bmp', '.txt').
replace('.png', '.txt') for x in img_files]
s = np.array([Image.open(f).size for f in tqdm(img_files, desc='Reading image shapes')]) # (width, height) s = np.array([Image.open(f).size for f in tqdm(img_files, desc='Reading image shapes')]) # (width, height)
# Read targets # Read targets
labels = [np.zeros((0, 5))] * n labels = [np.zeros((0, 5))] * nf
iter = tqdm(label_files, desc='Reading labels') iter = tqdm(label_files, desc='Reading labels')
for i, file in enumerate(iter): for i, file in enumerate(iter):
try: try:
@ -570,19 +570,43 @@ def kmeans_targets(path='./data/coco_64img.txt'): # from utils.utils import *;
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
l[:, [1, 3]] *= s[i][0] l[:, [1, 3]] *= s[i][0]
l[:, [2, 4]] *= s[i][1] l[:, [2, 4]] *= s[i][1]
l[:, 1:] *= 320 / max(s[i]) l[:, 1:] *= img_size / max(s[i]) # nominal img_size for training here
labels[i] = l labels[i] = l
except: except:
pass # print('Warning: missing labels for %s' % self.img_files[i]) # missing label file pass # print('Warning: missing labels for %s' % self.img_files[i]) # missing label file
assert len(np.concatenate(labels, 0)) > 0, 'No labels found. Incorrect label paths provided.' assert len(np.concatenate(labels, 0)) > 0, 'No labels found. Incorrect label paths provided.'
# kmeans # kmeans calculation
from scipy import cluster from scipy import cluster
wh = np.concatenate(labels, 0)[:, 3:5] wh = np.concatenate(labels, 0)[:, 3:5]
k = cluster.vq.kmeans(wh, 9)[0] k = cluster.vq.kmeans(wh, n)[0]
k = k[np.argsort(k.prod(1))] k = k[np.argsort(k.prod(1))]
for x in k.ravel(): for x in k.ravel():
print('%.1f, ' % x, end='') print('%.1f, ' % x, end='') # drop-in replacement for *.cfg anchors
def print_mutation(hyp, results, bucket=''):
# Print mutation results to evolve.txt (for use with train.py --evolve)
a = '%11s' * len(hyp) % tuple(hyp.keys()) # hyperparam keys
b = '%11.3g' * len(hyp) % tuple(hyp.values()) # hyperparam values
c = '%11.3g' * len(results) % results # results (P, R, mAP, F1, test_loss)
print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
if bucket:
os.system('gsutil cp gs://%s/evolve.txt .' % bucket) # download evolve.txt
with open('evolve.txt', 'a') as f: # append result
f.write(c + b + '\n')
x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0) # load unique rows
np.savetxt('evolve.txt', x[np.argsort(-fitness(x))], '%11.3g') # save sort by fitness
os.system('gsutil cp evolve.txt gs://%s' % bucket) # upload evolve.txt
else:
with open('evolve.txt', 'a') as f:
f.write(c + b + '\n')
def fitness(x):
# Returns fitness (for use with results.txt or evolve.txt)
return 0.50 * x[:, 2] + 0.50 * x[:, 3] # fitness = 0.9 * mAP + 0.1 * F1
# Plotting functions --------------------------------------------------------------------------------------------------- # Plotting functions ---------------------------------------------------------------------------------------------------
@ -617,7 +641,7 @@ def plot_wh_methods(): # from utils.utils import *; plot_wh_methods()
plt.ylabel('output') plt.ylabel('output')
plt.legend() plt.legend()
fig.tight_layout() fig.tight_layout()
fig.savefig('comparison.png', dpi=300) fig.savefig('comparison.png', dpi=200)
def plot_images(imgs, targets, paths=None, fname='images.jpg'): def plot_images(imgs, targets, paths=None, fname='images.jpg'):
@ -642,7 +666,7 @@ def plot_images(imgs, targets, paths=None, fname='images.jpg'):
s = Path(paths[i]).name s = Path(paths[i]).name
plt.title(s[:min(len(s), 40)], fontdict={'size': 8}) # limit to 40 characters plt.title(s[:min(len(s), 40)], fontdict={'size': 8}) # limit to 40 characters
fig.tight_layout() fig.tight_layout()
fig.savefig(fname, dpi=300) fig.savefig(fname, dpi=200)
plt.close() plt.close()
@ -662,7 +686,7 @@ def plot_test_txt(): # from utils.utils import *; plot_test()
ax[0].hist(cx, bins=600) ax[0].hist(cx, bins=600)
ax[1].hist(cy, bins=600) ax[1].hist(cy, bins=600)
fig.tight_layout() fig.tight_layout()
plt.savefig('hist1d.jpg', dpi=300) plt.savefig('hist1d.jpg', dpi=200)
def plot_targets_txt(): # from utils.utils import *; plot_targets_txt() def plot_targets_txt(): # from utils.utils import *; plot_targets_txt()
@ -678,7 +702,27 @@ def plot_targets_txt(): # from utils.utils import *; plot_targets_txt()
ax[i].legend() ax[i].legend()
ax[i].set_title(s[i]) ax[i].set_title(s[i])
fig.tight_layout() fig.tight_layout()
plt.savefig('targets.jpg', dpi=300) plt.savefig('targets.jpg', dpi=200)
def plot_evolution_results(hyp): # from utils.utils import *; plot_evolution_results(hyp)
# Plot hyperparameter evolution results in evolve.txt
x = np.loadtxt('evolve.txt')
f = fitness(x)
weights = (f - f.min()) ** 2 # for weighted results
fig = plt.figure(figsize=(12, 10))
matplotlib.rc('font', **{'size': 8})
for i, (k, v) in enumerate(hyp.items()):
y = x[:, i + 5]
# mu = (y * weights).sum() / weights.sum() # best weighted result
mu = y[f.argmax()] # best single result
plt.subplot(4, 5, i + 1)
plt.plot(mu, f.max(), 'o', markersize=10)
plt.plot(y, f, '.')
plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters
print('%15s: %.3g' % (k, mu))
fig.tight_layout()
plt.savefig('evolve.png', dpi=200)
def plot_results(start=0, stop=0): # from utils.utils import *; plot_results() def plot_results(start=0, stop=0): # from utils.utils import *; plot_results()
@ -698,4 +742,4 @@ def plot_results(start=0, stop=0): # from utils.utils import *; plot_results()
ax[i].set_title(s[i]) ax[i].set_title(s[i])
fig.tight_layout() fig.tight_layout()
ax[4].legend() ax[4].legend()
fig.savefig('results.png', dpi=300) fig.savefig('results.png', dpi=200)