Merge remote-tracking branch 'origin/master'

This commit is contained in:
Glenn Jocher 2019-07-29 00:45:37 +02:00
commit 3a7711856e
10 changed files with 792 additions and 199 deletions

View File

@ -5,7 +5,7 @@
</td>
<td align="center">
<a href="https://www.ultralytics.com" target="_blank">
<img src="https://storage.googleapis.com/ultralytics/logo/logoname1000.png" width="200"></a>
<img src="https://storage.googleapis.com/ultralytics/logo/logoname1000.png" width="160"></a>
<img src="https://user-images.githubusercontent.com/26833433/61591093-2b4d4480-abc2-11e9-8b46-d88eb1dabba1.jpg">
<a href="https://itunes.apple.com/app/id1452689527" target="_blank">
<img src="https://user-images.githubusercontent.com/26833433/50044365-9b22ac00-0082-11e9-862f-e77aee7aa7b0.png" width="180"></a>
@ -43,8 +43,7 @@ Python 3.7 or later with the following `pip3 install -U -r requirements.txt` pac
# Jupyter Notebook
A jupyter notebook with training, inference and testing examples is available at:
https://colab.research.google.com/drive/1G8T-VFxQkjDe4idzN8F-hbIBqkkkQnxw
Our Jupyter [notebook](https://colab.research.google.com/github/ultralytics/yolov3/blob/master/examples.ipynb) provides quick training, inference and testing examples.
# Training
@ -87,10 +86,11 @@ https://cloud.google.com/deep-learning-vm/
GPUs | `batch_size` | batch time | epoch time | epoch cost
--- |---| --- | --- | ---
1 K80 | 64 (32x2) | 2.9s | 175min | $0.58
1 T4 | 64 (32x2) | 0.8s | 49min | $0.29
1 T4 | 64 (32x2) | 0.80s | 49min | $0.29
2 T4 | 64 (64x1) | 0.52s | 32min | $0.36
1 2080ti | 64 (32x2) | - | - | -
1 V100 | 64 (32x2) | 0.38s | 23min | $0.31
2 V100 | 64 (64x1) | 0.38s | 23min | $0.62
2 V100 | 64 (64x1) | 0.30s | 18min | $0.46
# Inference

340
examples.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,3 @@
import os
import torch.nn.functional as F
from utils.parse_config import *
from utils.utils import *
@ -145,6 +141,7 @@ class YOLOLayer(nn.Module):
return torch.cat((xy / ngu, wh, p_conf, p_cls), 2).squeeze().t()
else: # inference
# s = 1.5 # scale_xy (pxy = pxy * s - (s - 1) / 2)
io = p.clone() # inference output
io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy
io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method

View File

@ -1,7 +1,7 @@
# pip3 install -U -r requirements.txt
# conda install numpy opencv matplotlib tqdm pillow
# conda install pytorch torchvision -c pytorch
# conda install -c conda-forge scikit-image
# conda install scikit-image -c conda-forge
numpy
opencv-python
torch >= 1.1.0

254
train.py
View File

@ -4,19 +4,29 @@ import time
import torch.distributed as dist
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import DataLoader
import test # import test.py to get mAP after each epoch
from models import *
from utils.adabound import *
from utils.datasets import *
from utils.utils import *
mixed_precision = True
try: # Mixed precision training https://github.com/NVIDIA/apex
from apex import amp
except: # not installed: install help: https://github.com/NVIDIA/apex/issues/259
mixed_precision = False
# 320 --epochs 1
# 0.109 0.297 0.15 0.126 7.04 1.666 4.062 0.1845 42.6 3.34 12.61 8.338 0.2705 0.001 -4 0.9 0.0005 a 320 giou + best_anchor False
# 0.223 0.218 0.138 0.189 9.28 1.153 4.376 0.08263 24.28 3.05 20.93 2.842 0.2759 0.001357 -5.036 0.9158 0.0005722 b mAP/F1 - 50/50 weighting
# 0.231 0.215 0.135 0.191 9.51 1.432 3.007 0.06082 24.87 3.477 24.13 2.802 0.3436 0.001127 -5.036 0.9232 0.0005874 c
# 0.246 0.194 0.128 0.192 8.12 1.101 3.954 0.0817 22.83 3.967 19.83 1.779 0.3352 0.000895 -5.036 0.9238 0.0007973 d
# 0.187 0.237 0.144 0.186 14.6 1.607 4.202 0.09439 39.27 3.726 31.26 2.634 0.273 0.001542 -5.036 0.8364 0.0008393 e
# 0.109 0.297 0.150 0.126 7.04 1.666 4.062 0.1845 42.6 3.34 12.61 8.338 0.2705 0.001 -4 0.9 0.0005 a 320 giou + best_anchor False
# 0.223 0.218 0.138 0.189 9.28 1.153 4.376 0.08263 24.28 3.05 20.93 2.842 0.2759 0.001357 -5.036 0.9158 0.0005722 b mAP/F1 - 50/50 weighting
# 0.231 0.215 0.135 0.191 9.51 1.432 3.007 0.06082 24.87 3.477 24.13 2.802 0.3436 0.001127 -5.036 0.9232 0.0005874 c
# 0.246 0.194 0.128 0.192 8.12 1.101 3.954 0.0817 22.83 3.967 19.83 1.779 0.3352 0.000895 -5.036 0.9238 0.0007973 d
# 0.187 0.237 0.144 0.186 14.6 1.607 4.202 0.09439 39.27 3.726 31.26 2.634 0.273 0.001542 -5.036 0.8364 0.0008393 e
# 0.250 0.217 0.136 0.195 3.3 1.2 2 0.604 15.7 3.67 20 1.36 0.194 0.00128 -4 0.95 0.000201 0.8 0.388 1.2 0.119 0.0589 0.401 f
# 0.269 0.225 0.149 0.218 6.71 1.13 5.25 0.246 22.4 3.64 17.8 1.31 0.256 0.00146 -4 0.936 0.00042 0.123 0.18 1.81 0.0987 0.0788 0.441 g
# 0.179 0.274 0.165 0.187 7.95 1.22 7.62 0.224 17 5.71 17.7 3.28 0.295 0.00136 -4 0.875 0.000319 0.131 0.208 2.14 0.14 0.0773 0.228 h
# 0.296 0.228 0.152 0.220 5.18 1.43 4.27 0.265 11.7 4.81 11.5 1.56 0.281 0.0013 -4 0.944 0.000427 0.0599 0.142 1.03 0.0552 0.0555 0.434 i
# 320 --epochs 2
# 0.242 0.296 0.196 0.231 5.67 0.8541 4.286 0.1539 21.61 1.957 22.9 2.894 0.3689 0.001844 -4 0.913 0.000467 # ha 0.417 mAP @ epoch 100
@ -25,40 +35,45 @@ from utils.utils import *
# 0.161 0.327 0.190 0.193 7.82 1.153 4.062 0.1845 24.28 3.05 20.93 2.842 0.2759 0.001357 -4 0.916 0.000572 # hd 0.438 mAP @ epoch 100
# Training hyperparameters d
hyp = {'giou': 1.153, # giou loss gain
'xy': 4.062, # xy loss gain
'wh': 0.1845, # wh loss gain
'cls': 24.28, # cls loss gain
'cls_pw': 3.05, # cls BCELoss positive_weight
'obj': 20.93, # obj loss gain
'obj_pw': 2.842, # obj BCELoss positive_weight
'iou_t': 0.2759, # iou training threshold
'lr0': 0.001357, # initial learning rate
'lrf': -4., # final LambdaLR learning rate = lr0 * (10 ** lrf)
'momentum': 0.916, # SGD momentum
'weight_decay': 0.0000572, # optimizer weight decay
'hsv_s': 0.5, # image HSV-Saturation augmentation (fraction)
'hsv_v': 0.5, # image HSV-Value augmentation (fraction)
'degrees': 5, # image rotation (+/- deg)
'translate': 0.1, # image translation (+/- fraction)
'scale': 0.1, # image scale (+/- gain)
'shear': 2} # image shear (+/- deg)
# # Training hyperparameters e
# hyp = {'giou': 1.607, # giou loss gain
# 'xy': 4.062, # xy loss gain
# 'wh': 0.1845, # wh loss gain
# 'cls': 39.27, # cls loss gain
# 'cls_pw': 3.726, # cls BCELoss positive_weight
# 'obj': 31.26, # obj loss gain
# 'obj_pw': 2.634, # obj BCELoss positive_weight
# 'iou_t': 0.273, # iou target-anchor training threshold
# 'lr0': 0.001542, # initial learning rate
# Training hyperparameters g
# hyp = {'giou': 1.13, # giou loss gain
# 'xy': 5.25, # xy loss gain
# 'wh': 0.246, # wh loss gain
# 'cls': 22.4, # cls loss gain
# 'cls_pw': 3.64, # cls BCELoss positive_weight
# 'obj': 17.8, # obj loss gain
# 'obj_pw': 1.31, # obj BCELoss positive_weight
# 'iou_t': 0.256, # iou training threshold
# 'lr0': 0.00146, # initial learning rate
# 'lrf': -4., # final LambdaLR learning rate = lr0 * (10 ** lrf)
# 'momentum': 0.8364, # SGD momentum
# 'weight_decay': 0.0008393} # optimizer weight decay
# 'momentum': 0.936, # SGD momentum
# 'weight_decay': 0.00042, # optimizer weight decay
# 'hsv_s': 0.123, # image HSV-Saturation augmentation (fraction)
# 'hsv_v': 0.18, # image HSV-Value augmentation (fraction)
# 'degrees': 1.81, # image rotation (+/- deg)
# 'translate': 0.0987, # image translation (+/- fraction)
# 'scale': 0.0788, # image scale (+/- gain)
# 'shear': 0.441} # image shear (+/- deg)
# Training hyperparameters i
hyp = {'giou': 1.43, # giou loss gain
'xy': 4.27, # xy loss gain
'wh': 0.265, # wh loss gain
'cls': 11.7, # cls loss gain
'cls_pw': 4.81, # cls BCELoss positive_weight
'obj': 11.5, # obj loss gain
'obj_pw': 1.56, # obj BCELoss positive_weight
'iou_t': 0.281, # iou training threshold
'lr0': 0.0013, # initial learning rate
'lrf': -4., # final LambdaLR learning rate = lr0 * (10 ** lrf)
'momentum': 0.944, # SGD momentum
'weight_decay': 0.000427, # optimizer weight decay
'hsv_s': 0.0599, # image HSV-Saturation augmentation (fraction)
'hsv_v': 0.142, # image HSV-Value augmentation (fraction)
'degrees': 1.03, # image rotation (+/- deg)
'translate': 0.0552, # image translation (+/- fraction)
'scale': 0.0555, # image scale (+/- gain)
'shear': 0.434} # image shear (+/- deg)
def train(cfg,
@ -66,13 +81,13 @@ def train(cfg,
img_size=416,
epochs=100, # 500200 batches at bs 16, 117263 images = 273 epochs
batch_size=16,
accumulate=4): # effective bs = batch_size * accumulate = 8 * 8 = 64
accumulate=4): # effective bs = batch_size * accumulate = 16 * 4 = 64
# Initialize
init_seeds()
weights = 'weights' + os.sep
last = weights + 'last.pt'
best = weights + 'best.pt'
device = torch_utils.select_device()
device = torch_utils.select_device(apex=mixed_precision)
multi_scale = opt.multi_scale
if multi_scale:
@ -89,11 +104,13 @@ def train(cfg,
model = Darknet(cfg).to(device)
# Optimizer
optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], weight_decay=hyp['weight_decay'])
optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], weight_decay=hyp['weight_decay'],
nesterov=True)
# optimizer = AdaBound(model.parameters(), lr=hyp['lr0'], final_lr=0.1)
cutoff = -1 # backbone reaches to cutoff layer
start_epoch = 0
best_fitness = 0.0
best_fitness = 0.
if opt.resume or opt.transfer: # Load previously saved model
if opt.transfer: # Transfer learning
nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255)
@ -136,7 +153,7 @@ def train(cfg,
# lf = lambda x: 10 ** (hyp['lrf'] * x / epochs) # exp ramp
# lf = lambda x: 1 - 10 ** (hyp['lrf'] * (1 - x / epochs)) # inverse exp ramp
# scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(opt.epochs * x) for x in (0.8, 0.9)], gamma=0.1)
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(opt.epochs * x) for x in [0.8]], gamma=0.1)
scheduler.last_epoch = start_epoch - 1
# # Plot lr schedule
@ -150,6 +167,18 @@ def train(cfg,
# plt.tight_layout()
# plt.savefig('LR.png', dpi=300)
# Mixed precision training https://github.com/NVIDIA/apex
if mixed_precision:
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
# Initialize distributed training
if torch.cuda.device_count() > 1:
dist.init_process_group(backend='nccl', # 'distributed backend'
init_method='tcp://127.0.0.1:9999', # distributed training init method
world_size=1, # number of nodes for distributed training
rank=0) # distributed training node rank
model = torch.nn.parallel.DistributedDataParallel(model)
# Dataset
dataset = LoadImagesAndLabels(train_path,
img_size,
@ -158,32 +187,13 @@ def train(cfg,
hyp=hyp, # augmentation hyperparameters
rect=opt.rect) # rectangular training
# Initialize distributed training
if torch.cuda.device_count() > 1:
dist.init_process_group(backend='nccl', # 'distributed backend'
init_method='tcp://127.0.0.1:9999', # distributed training init method
world_size=1, # number of nodes for distributed training
rank=0) # distributed training node rank
model = torch.nn.parallel.DistributedDataParallel(model)
# sampler = torch.utils.data.distributed.DistributedSampler(dataset)
# Dataloader
dataloader = DataLoader(dataset,
batch_size=batch_size,
num_workers=opt.num_workers,
shuffle=not opt.rect, # Shuffle=True unless rectangular training is used
pin_memory=True,
collate_fn=dataset.collate_fn)
# Mixed precision training https://github.com/NVIDIA/apex
mixed_precision = True
if mixed_precision:
try:
from apex import amp
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
except: # not installed: install help: https://github.com/NVIDIA/apex/issues/259
mixed_precision = False
dataloader = torch.utils.data.DataLoader(dataset,
batch_size=batch_size,
num_workers=opt.num_workers,
shuffle=not opt.rect, # Shuffle=True unless rectangular training is used
pin_memory=True,
collate_fn=dataset.collate_fn)
# Start training
model.hyp = hyp # attach hyperparameters to model
@ -192,7 +202,7 @@ def train(cfg,
nb = len(dataloader)
maps = np.zeros(nc) # mAP per class
results = (0, 0, 0, 0, 0) # P, R, mAP, F1, test_loss
n_burnin = min(round(nb / 5 + 1), 1000) # burn-in batches
# n_burnin = min(round(nb / 5 + 1), 1000) # burn-in batches
t0 = time.time()
for epoch in range(start_epoch, epochs):
model.train()
@ -234,11 +244,11 @@ def train(cfg,
plot_images(imgs=imgs, targets=targets, paths=paths, fname='train_batch%g.jpg' % i)
# SGD burn-in
if epoch == 0 and i <= n_burnin:
g = (i / n_burnin) ** 4 # gain
for x in optimizer.param_groups:
x['lr'] = hyp['lr0'] * g
x['weight_decay'] = hyp['weight_decay'] * g
# if epoch == 0 and i <= n_burnin:
# g = (i / n_burnin) ** 4 # gain
# for x in optimizer.param_groups:
# x['lr'] = hyp['lr0'] * g
# x['weight_decay'] = hyp['weight_decay'] * g
# Run model
pred = model(imgs)
@ -313,33 +323,11 @@ def train(cfg,
# Report time
print('%g epochs completed in %.3f hours.' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
del model, optimizer
dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
torch.cuda.empty_cache()
return results
def print_mutation(hyp, results):
# Write mutation results
a = '%11s' * len(hyp) % tuple(hyp.keys()) # hyperparam keys
b = '%11.3g' * len(hyp) % tuple(hyp.values()) # hyperparam values
c = '%11.3g' * len(results) % results # results (P, R, mAP, F1, test_loss)
print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
if opt.bucket:
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt
with open('evolve.txt', 'a') as f: # append result
f.write(c + b + '\n')
x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0) # load unique rows
np.savetxt('evolve.txt', x[np.argsort(-fitness(x))], '%11.3g') # save sort by fitness
os.system('gsutil cp evolve.txt gs://%s' % opt.bucket) # upload evolve.txt
else:
with open('evolve.txt', 'a') as f:
f.write(c + b + '\n')
def fitness(x): # returns fitness of hyp evolution vectors
return x[:, 2] * 0.5 + x[:, 3] * 0.5 # fitness = weighted combination of mAP and F1
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type=int, default=100, help='number of epochs')
@ -362,38 +350,38 @@ if __name__ == '__main__':
opt = parser.parse_args()
print(opt)
if opt.evolve:
if not opt.evolve: # Train normally
results = train(opt.cfg,
opt.data,
img_size=opt.img_size,
epochs=opt.epochs,
batch_size=opt.batch_size,
accumulate=opt.accumulate)
else: # Evolve hyperparameters (optional)
opt.notest = True # only test final epoch
opt.nosave = True # only save final checkpoint
if opt.bucket:
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
# Train
results = train(opt.cfg,
opt.data,
img_size=opt.img_size,
epochs=opt.epochs,
batch_size=opt.batch_size,
accumulate=opt.accumulate)
for _ in range(1): # generations to evolve
if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate
# Get best hyperparameters
x = np.loadtxt('evolve.txt', ndmin=2)
x = x[fitness(x).argmax()] # select best fitness hyps
for i, k in enumerate(hyp.keys()):
hyp[k] = x[i + 5]
# Evolve hyperparameters (optional)
if opt.evolve:
print_mutation(hyp, results) # Write mutation results
for _ in range(1000): # generations to evolve
# Get best hyperparameters
x = np.loadtxt('evolve.txt', ndmin=2)
x = x[fitness(x).argmax()] # select best fitness hyps
for i, k in enumerate(hyp.keys()):
hyp[k] = x[i + 5]
# Mutate
init_seeds(seed=int(time.time()))
s = [.15, .15, .15, .15, .15, .15, .15, .15, .15, .00, .05, .20, .20, .20, .20, .20, .20, .20] # sigmas
for i, k in enumerate(hyp.keys()):
x = (np.random.randn(1) * s[i] + 1) ** 2.0 # plt.hist(x.ravel(), 300)
hyp[k] *= float(x) # vary by sigmas
# Mutate
init_seeds(seed=int(time.time()))
s = [.15, .15, .15, .15, .15, .15, .15, .15, .15, .00, .05, .20, .20, .20, .20, .20, .20, .20] # sigmas
for i, k in enumerate(hyp.keys()):
x = (np.random.randn(1) * s[i] + 1) ** 2.0 # plt.hist(x.ravel(), 300)
hyp[k] *= float(x) # vary by sigmas
# Clip to limits
keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale']
limits = [(1e-4, 1e-2), (0.00, 0.70), (0.60, 0.95), (0, 0.001), (0, .8), (0, .8), (0, .8), (0, .8)]
limits = [(1e-4, 1e-2), (0.00, 0.70), (0.60, 0.97), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9)]
for k, v in zip(keys, limits):
hyp[k] = np.clip(hyp[k], v[0], v[1])
@ -406,19 +394,7 @@ if __name__ == '__main__':
accumulate=opt.accumulate)
# Write mutation results
print_mutation(hyp, results)
print_mutation(hyp, results, opt.bucket)
# # Plot results
# import numpy as np
# import matplotlib.pyplot as plt
# a = np.loadtxt('evolve.txt')
# x = fitness(a)
# weights = (x - x.min()) ** 2
# fig = plt.figure(figsize=(10, 10))
# for i in range(len(hyp)):
# y = a[:, i + 5]
# mu = (y * weights).sum() / weights.sum()
# plt.subplot(4, 5, i + 1)
# plt.plot(x.max(), mu, 'o')
# plt.plot(x, y, '.')
# print(list(hyp.keys())[i], '%.4g' % mu)
# Plot results
# plot_evolution_results(hyp)

236
utils/adabound.py Normal file
View File

@ -0,0 +1,236 @@
import math
import torch
from torch.optim import Optimizer
class AdaBound(Optimizer):
"""Implements AdaBound algorithm.
It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
Arguments:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float, optional): Adam learning rate (default: 1e-3)
betas (Tuple[float, float], optional): coefficients used for computing
running averages of gradient and its square (default: (0.9, 0.999))
final_lr (float, optional): final (SGD) learning rate (default: 0.1)
gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
eps (float, optional): term added to the denominator to improve
numerical stability (default: 1e-8)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
.. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
https://openreview.net/forum?id=Bkg3g2R9FX
"""
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
eps=1e-8, weight_decay=0, amsbound=False):
if not 0.0 <= lr:
raise ValueError("Invalid learning rate: {}".format(lr))
if not 0.0 <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps))
if not 0.0 <= betas[0] < 1.0:
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
if not 0.0 <= betas[1] < 1.0:
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
if not 0.0 <= final_lr:
raise ValueError("Invalid final learning rate: {}".format(final_lr))
if not 0.0 <= gamma < 1.0:
raise ValueError("Invalid gamma parameter: {}".format(gamma))
defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
weight_decay=weight_decay, amsbound=amsbound)
super(AdaBound, self).__init__(params, defaults)
self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
def __setstate__(self, state):
super(AdaBound, self).__setstate__(state)
for group in self.param_groups:
group.setdefault('amsbound', False)
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()
for group, base_lr in zip(self.param_groups, self.base_lrs):
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
if grad.is_sparse:
raise RuntimeError(
'Adam does not support sparse gradients, please consider SparseAdam instead')
amsbound = group['amsbound']
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
# Exponential moving average of gradient values
state['exp_avg'] = torch.zeros_like(p.data)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p.data)
if amsbound:
# Maintains max of all exp. moving avg. of sq. grad. values
state['max_exp_avg_sq'] = torch.zeros_like(p.data)
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
if amsbound:
max_exp_avg_sq = state['max_exp_avg_sq']
beta1, beta2 = group['betas']
state['step'] += 1
if group['weight_decay'] != 0:
grad = grad.add(group['weight_decay'], p.data)
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
if amsbound:
# Maintains the maximum of all 2nd moment running avg. till now
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
# Use the max. for normalizing running avg. of gradient
denom = max_exp_avg_sq.sqrt().add_(group['eps'])
else:
denom = exp_avg_sq.sqrt().add_(group['eps'])
bias_correction1 = 1 - beta1 ** state['step']
bias_correction2 = 1 - beta2 ** state['step']
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
# Applies bounds on actual learning rate
# lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
final_lr = group['final_lr'] * group['lr'] / base_lr
lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
step_size = torch.full_like(denom, step_size)
step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
p.data.add_(-step_size)
return loss
class AdaBoundW(Optimizer):
"""Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
Arguments:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float, optional): Adam learning rate (default: 1e-3)
betas (Tuple[float, float], optional): coefficients used for computing
running averages of gradient and its square (default: (0.9, 0.999))
final_lr (float, optional): final (SGD) learning rate (default: 0.1)
gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
eps (float, optional): term added to the denominator to improve
numerical stability (default: 1e-8)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
.. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
https://openreview.net/forum?id=Bkg3g2R9FX
"""
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
eps=1e-8, weight_decay=0, amsbound=False):
if not 0.0 <= lr:
raise ValueError("Invalid learning rate: {}".format(lr))
if not 0.0 <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps))
if not 0.0 <= betas[0] < 1.0:
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
if not 0.0 <= betas[1] < 1.0:
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
if not 0.0 <= final_lr:
raise ValueError("Invalid final learning rate: {}".format(final_lr))
if not 0.0 <= gamma < 1.0:
raise ValueError("Invalid gamma parameter: {}".format(gamma))
defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
weight_decay=weight_decay, amsbound=amsbound)
super(AdaBoundW, self).__init__(params, defaults)
self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
def __setstate__(self, state):
super(AdaBoundW, self).__setstate__(state)
for group in self.param_groups:
group.setdefault('amsbound', False)
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()
for group, base_lr in zip(self.param_groups, self.base_lrs):
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
if grad.is_sparse:
raise RuntimeError(
'Adam does not support sparse gradients, please consider SparseAdam instead')
amsbound = group['amsbound']
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
# Exponential moving average of gradient values
state['exp_avg'] = torch.zeros_like(p.data)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p.data)
if amsbound:
# Maintains max of all exp. moving avg. of sq. grad. values
state['max_exp_avg_sq'] = torch.zeros_like(p.data)
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
if amsbound:
max_exp_avg_sq = state['max_exp_avg_sq']
beta1, beta2 = group['betas']
state['step'] += 1
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
if amsbound:
# Maintains the maximum of all 2nd moment running avg. till now
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
# Use the max. for normalizing running avg. of gradient
denom = max_exp_avg_sq.sqrt().add_(group['eps'])
else:
denom = exp_avg_sq.sqrt().add_(group['eps'])
bias_correction1 = 1 - beta1 ** state['step']
bias_correction2 = 1 - beta2 ** state['step']
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
# Applies bounds on actual learning rate
# lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
final_lr = group['final_lr'] * group['lr'] / base_lr
lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
step_size = torch.full_like(denom, step_size)
step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
if group['weight_decay'] != 0:
decayed_weights = torch.mul(p.data, group['weight_decay'])
p.data.add_(-step_size)
p.data.sub_(decayed_weights)
else:
p.data.add_(-step_size)
return loss

View File

@ -8,9 +8,9 @@ from pathlib import Path
import cv2
import numpy as np
import torch
from PIL import Image, ExifTags
from torch.utils.data import Dataset
from tqdm import tqdm
from PIL import Image, ExifTags
from utils.utils import xyxy2xywh, xywh2xyxy
@ -40,8 +40,6 @@ def exif_size(img):
class LoadImages: # for inference
def __init__(self, path, img_size=416):
self.height = img_size
files = []
if os.path.isdir(path):
files = sorted(glob.glob('%s/*.*' % path))
@ -52,6 +50,7 @@ class LoadImages: # for inference
videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
nI, nV = len(images), len(videos)
self.img_size = img_size
self.files = images + videos
self.nF = nI + nV # number of files
self.video_flag = [False] * nI + [True] * nV
@ -96,7 +95,7 @@ class LoadImages: # for inference
print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
# Padded resize
img, *_ = letterbox(img0, new_shape=self.height)
img, *_ = letterbox(img0, new_shape=self.img_size)
# Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
@ -117,8 +116,10 @@ class LoadImages: # for inference
class LoadWebcam: # for inference
def __init__(self, img_size=416):
self.cam = cv2.VideoCapture(0)
self.height = img_size
self.img_size = img_size
self.cam = cv2.VideoCapture(0) # local camera
# self.cam = cv2.VideoCapture('rtsp://192.168.1.64/1') # IP camera
# self.cam = cv2.VideoCapture('rtsp://username:password@192.168.1.64/1') # IP camera with login
def __iter__(self):
self.count = -1
@ -138,7 +139,7 @@ class LoadWebcam: # for inference
print('webcam %g: ' % self.count, end='')
# Padded resize
img, *_ = letterbox(img0, new_shape=self.height)
img, *_ = letterbox(img0, new_shape=self.img_size)
# Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
@ -154,8 +155,7 @@ class LoadWebcam: # for inference
class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False):
with open(path, 'r') as f:
img_files = f.read().splitlines()
self.img_files = [x for x in img_files if os.path.splitext(x)[-1].lower() in img_formats]
self.img_files = [x for x in f.read().splitlines() if os.path.splitext(x)[-1].lower() in img_formats]
n = len(self.img_files)
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
@ -405,10 +405,11 @@ def letterbox(img, new_shape=416, color=(128, 128, 128), mode='auto'):
new_unpad = (new_shape, new_shape)
ratiow, ratioh = new_shape / shape[1], new_shape / shape[0]
if shape[::-1] != new_unpad:
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_AREA) # resize
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return img, ratiow, ratioh, dw, dh

View File

@ -28,6 +28,12 @@ python3 detect.py
# Test
python3 test.py --save-json
# Evolve
for i in {0..500}
do
python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4
done
# Git pull
git pull https://github.com/ultralytics/yolov3 # master
git pull https://github.com/ultralytics/yolov3 test # branch

View File

@ -9,24 +9,19 @@ def init_seeds(seed=0):
# torch.backends.cudnn.deterministic = True # https://pytorch.org/docs/stable/notes/randomness.html
def select_device(force_cpu=False):
def select_device(force_cpu=False, apex=False):
# apex if mixed precision training https://github.com/NVIDIA/apex
cuda = False if force_cpu else torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
if not cuda:
print('Using CPU')
if cuda:
try: # Mixed precision training https://github.com/NVIDIA/apex
from apex import amp
apex_str = 'with Apex '
except:
apex_str = ''
torch.backends.cudnn.benchmark = True # set False for reproducible results
c = 1024 ** 2 # bytes to MB
ng = torch.cuda.device_count()
x = [torch.cuda.get_device_properties(i) for i in range(ng)]
cuda_str = 'Using CUDA ' + apex_str
cuda_str = 'Using CUDA ' + ('Apex ' if apex else '')
for i in range(0, ng):
if i == 1:
# torch.cuda.set_device(0) # OPTIONAL: Set GPU ID
@ -42,14 +37,12 @@ def fuse_conv_and_bn(conv, bn):
# https://tehnokv.com/posts/fusing-batchnorm-and-conv/
with torch.no_grad():
# init
fusedconv = torch.nn.Conv2d(
conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
bias=True
)
fusedconv = torch.nn.Conv2d(conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
bias=True)
# prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)

View File

@ -1,5 +1,7 @@
import glob
import os
import random
from pathlib import Path
import cv2
import matplotlib
@ -9,7 +11,6 @@ import torch
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
from pathlib import Path
from . import torch_utils # , google_utils
@ -303,12 +304,14 @@ def compute_loss(p, targets, model, giou_loss=True): # predictions, targets, mo
tobj[b, a, gj, gi] = 1.0 # obj
# pi[..., 2:4] = torch.sigmoid(pi[..., 2:4]) # wh power loss (uncomment)
# s = 1.5 # scale_xy
pxy = torch.sigmoid(pi[..., 0:2]) # * s - (s - 1) / 2
if giou_loss:
pbox = torch.cat((torch.sigmoid(pi[..., 0:2]), torch.exp(pi[..., 2:4]) * anchor_vec[i]), 1) # predicted
pbox = torch.cat((pxy, torch.exp(pi[..., 2:4]) * anchor_vec[i]), 1) # predicted
giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True) # giou computation
lxy += (k * h['giou']) * (1.0 - giou).mean() # giou loss
else:
lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss
lxy += (k * h['xy']) * MSE(pxy, txy[i]) # xy loss
lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i]) # wh yolo loss
tclsm = torch.zeros_like(pi[..., 5:])
@ -542,23 +545,20 @@ def select_best_evolve(path='evolve*.txt'): # from utils.utils import *; select
print(file, x[fitness.argmax()])
def kmeans_targets(path='./data/coco_64img.txt'): # from utils.utils import *; kmeans_targets()
def kmeans_targets(path='./data/coco_64img.txt', n=9, img_size=320): # from utils.utils import *; kmeans_targets()
# Produces a list of target kmeans suitable for use in *.cfg files
img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif']
with open(path, 'r') as f:
img_files = f.read().splitlines()
img_files = list(filter(lambda x: len(x) > 0, img_files))
img_files = [x for x in f.read().splitlines() if os.path.splitext(x)[-1].lower() in img_formats]
# Read shapes
n = len(img_files)
assert n > 0, 'No images found in %s' % path
label_files = [x.replace('images', 'labels').
replace('.jpeg', '.txt').
replace('.jpg', '.txt').
replace('.bmp', '.txt').
replace('.png', '.txt') for x in img_files]
nf = len(img_files)
assert nf > 0, 'No images found in %s' % path
label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in img_files]
s = np.array([Image.open(f).size for f in tqdm(img_files, desc='Reading image shapes')]) # (width, height)
# Read targets
labels = [np.zeros((0, 5))] * n
labels = [np.zeros((0, 5))] * nf
iter = tqdm(label_files, desc='Reading labels')
for i, file in enumerate(iter):
try:
@ -570,19 +570,43 @@ def kmeans_targets(path='./data/coco_64img.txt'): # from utils.utils import *;
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
l[:, [1, 3]] *= s[i][0]
l[:, [2, 4]] *= s[i][1]
l[:, 1:] *= 320 / max(s[i])
l[:, 1:] *= img_size / max(s[i]) # nominal img_size for training here
labels[i] = l
except:
pass # print('Warning: missing labels for %s' % self.img_files[i]) # missing label file
assert len(np.concatenate(labels, 0)) > 0, 'No labels found. Incorrect label paths provided.'
# kmeans
# kmeans calculation
from scipy import cluster
wh = np.concatenate(labels, 0)[:, 3:5]
k = cluster.vq.kmeans(wh, 9)[0]
k = cluster.vq.kmeans(wh, n)[0]
k = k[np.argsort(k.prod(1))]
for x in k.ravel():
print('%.1f, ' % x, end='')
print('%.1f, ' % x, end='') # drop-in replacement for *.cfg anchors
def print_mutation(hyp, results, bucket=''):
# Print mutation results to evolve.txt (for use with train.py --evolve)
a = '%11s' * len(hyp) % tuple(hyp.keys()) # hyperparam keys
b = '%11.3g' * len(hyp) % tuple(hyp.values()) # hyperparam values
c = '%11.3g' * len(results) % results # results (P, R, mAP, F1, test_loss)
print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
if bucket:
os.system('gsutil cp gs://%s/evolve.txt .' % bucket) # download evolve.txt
with open('evolve.txt', 'a') as f: # append result
f.write(c + b + '\n')
x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0) # load unique rows
np.savetxt('evolve.txt', x[np.argsort(-fitness(x))], '%11.3g') # save sort by fitness
os.system('gsutil cp evolve.txt gs://%s' % bucket) # upload evolve.txt
else:
with open('evolve.txt', 'a') as f:
f.write(c + b + '\n')
def fitness(x):
# Returns fitness (for use with results.txt or evolve.txt)
return 0.50 * x[:, 2] + 0.50 * x[:, 3] # fitness = 0.9 * mAP + 0.1 * F1
# Plotting functions ---------------------------------------------------------------------------------------------------
@ -617,7 +641,7 @@ def plot_wh_methods(): # from utils.utils import *; plot_wh_methods()
plt.ylabel('output')
plt.legend()
fig.tight_layout()
fig.savefig('comparison.png', dpi=300)
fig.savefig('comparison.png', dpi=200)
def plot_images(imgs, targets, paths=None, fname='images.jpg'):
@ -642,7 +666,7 @@ def plot_images(imgs, targets, paths=None, fname='images.jpg'):
s = Path(paths[i]).name
plt.title(s[:min(len(s), 40)], fontdict={'size': 8}) # limit to 40 characters
fig.tight_layout()
fig.savefig(fname, dpi=300)
fig.savefig(fname, dpi=200)
plt.close()
@ -662,7 +686,7 @@ def plot_test_txt(): # from utils.utils import *; plot_test()
ax[0].hist(cx, bins=600)
ax[1].hist(cy, bins=600)
fig.tight_layout()
plt.savefig('hist1d.jpg', dpi=300)
plt.savefig('hist1d.jpg', dpi=200)
def plot_targets_txt(): # from utils.utils import *; plot_targets_txt()
@ -678,7 +702,27 @@ def plot_targets_txt(): # from utils.utils import *; plot_targets_txt()
ax[i].legend()
ax[i].set_title(s[i])
fig.tight_layout()
plt.savefig('targets.jpg', dpi=300)
plt.savefig('targets.jpg', dpi=200)
def plot_evolution_results(hyp): # from utils.utils import *; plot_evolution_results(hyp)
# Plot hyperparameter evolution results in evolve.txt
x = np.loadtxt('evolve.txt')
f = fitness(x)
weights = (f - f.min()) ** 2 # for weighted results
fig = plt.figure(figsize=(12, 10))
matplotlib.rc('font', **{'size': 8})
for i, (k, v) in enumerate(hyp.items()):
y = x[:, i + 5]
# mu = (y * weights).sum() / weights.sum() # best weighted result
mu = y[f.argmax()] # best single result
plt.subplot(4, 5, i + 1)
plt.plot(mu, f.max(), 'o', markersize=10)
plt.plot(y, f, '.')
plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters
print('%15s: %.3g' % (k, mu))
fig.tight_layout()
plt.savefig('evolve.png', dpi=200)
def plot_results(start=0, stop=0): # from utils.utils import *; plot_results()
@ -698,4 +742,4 @@ def plot_results(start=0, stop=0): # from utils.utils import *; plot_results()
ax[i].set_title(s[i])
fig.tight_layout()
ax[4].legend()
fig.savefig('results.png', dpi=300)
fig.savefig('results.png', dpi=200)