Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
a5592093ef
|
@ -30,6 +30,7 @@ data/*
|
||||||
!data/trainvalno5k.shapes
|
!data/trainvalno5k.shapes
|
||||||
!data/5k.shapes
|
!data/5k.shapes
|
||||||
!data/5k.txt
|
!data/5k.txt
|
||||||
|
!data/*.sh
|
||||||
|
|
||||||
pycocotools/*
|
pycocotools/*
|
||||||
results*.txt
|
results*.txt
|
||||||
|
|
|
@ -211,7 +211,6 @@ Computing mAP: 100%|████████████████████
|
||||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.331
|
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.331
|
||||||
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.517
|
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.517
|
||||||
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.618
|
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.618
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
# Citation
|
# Citation
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859
|
||||||
|
|
||||||
|
# Download COCO dataset
|
||||||
|
fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO"
|
||||||
|
filename="coco_gdrive.zip"
|
||||||
|
curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
|
||||||
|
curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
|
||||||
|
|
||||||
|
# Unzip
|
||||||
|
unzip -q coco_gdrive.zip
|
24
detect.py
24
detect.py
|
@ -127,20 +127,18 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
|
parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
|
||||||
parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
|
parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
|
||||||
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
|
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
|
||||||
parser.add_argument('--fourcc', type=str, default='mp4v', help='specifies the fourcc code for output video encoding (make sure ffmpeg supports specified fourcc codec)')
|
parser.add_argument('--fourcc', type=str, default='mp4v', help='fourcc output video codec (verify ffmpeg support)')
|
||||||
parser.add_argument('--output', type=str, default='output',help='specifies the output path for images and videos')
|
parser.add_argument('--output', type=str, default='output', help='specifies the output path for images and videos')
|
||||||
opt = parser.parse_args()
|
opt = parser.parse_args()
|
||||||
print(opt)
|
print(opt)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
detect(
|
detect(opt.cfg,
|
||||||
opt.cfg,
|
opt.data_cfg,
|
||||||
opt.data_cfg,
|
opt.weights,
|
||||||
opt.weights,
|
images=opt.images,
|
||||||
images=opt.images,
|
img_size=opt.img_size,
|
||||||
img_size=opt.img_size,
|
conf_thres=opt.conf_thres,
|
||||||
conf_thres=opt.conf_thres,
|
nms_thres=opt.nms_thres,
|
||||||
nms_thres=opt.nms_thres,
|
fourcc=opt.fourcc,
|
||||||
fourcc=opt.fourcc,
|
output=opt.output)
|
||||||
output=opt.output
|
|
||||||
)
|
|
||||||
|
|
24
models.py
24
models.py
|
@ -15,6 +15,7 @@ def create_modules(module_defs):
|
||||||
hyperparams = module_defs.pop(0)
|
hyperparams = module_defs.pop(0)
|
||||||
output_filters = [int(hyperparams['channels'])]
|
output_filters = [int(hyperparams['channels'])]
|
||||||
module_list = nn.ModuleList()
|
module_list = nn.ModuleList()
|
||||||
|
yolo_index = -1
|
||||||
|
|
||||||
for i, module_def in enumerate(module_defs):
|
for i, module_def in enumerate(module_defs):
|
||||||
modules = nn.Sequential()
|
modules = nn.Sequential()
|
||||||
|
@ -44,8 +45,7 @@ def create_modules(module_defs):
|
||||||
modules.add_module('maxpool_%d' % i, maxpool)
|
modules.add_module('maxpool_%d' % i, maxpool)
|
||||||
|
|
||||||
elif module_def['type'] == 'upsample':
|
elif module_def['type'] == 'upsample':
|
||||||
# upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest') # WARNING: deprecated
|
upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')
|
||||||
upsample = Upsample(scale_factor=int(module_def['stride']))
|
|
||||||
modules.add_module('upsample_%d' % i, upsample)
|
modules.add_module('upsample_%d' % i, upsample)
|
||||||
|
|
||||||
elif module_def['type'] == 'route':
|
elif module_def['type'] == 'route':
|
||||||
|
@ -58,6 +58,7 @@ def create_modules(module_defs):
|
||||||
modules.add_module('shortcut_%d' % i, EmptyLayer())
|
modules.add_module('shortcut_%d' % i, EmptyLayer())
|
||||||
|
|
||||||
elif module_def['type'] == 'yolo':
|
elif module_def['type'] == 'yolo':
|
||||||
|
yolo_index += 1
|
||||||
anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
|
anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
|
||||||
# Extract anchors
|
# Extract anchors
|
||||||
anchors = [float(x) for x in module_def['anchors'].split(',')]
|
anchors = [float(x) for x in module_def['anchors'].split(',')]
|
||||||
|
@ -66,8 +67,7 @@ def create_modules(module_defs):
|
||||||
nc = int(module_def['classes']) # number of classes
|
nc = int(module_def['classes']) # number of classes
|
||||||
img_size = hyperparams['height']
|
img_size = hyperparams['height']
|
||||||
# Define detection layer
|
# Define detection layer
|
||||||
yolo_layer = YOLOLayer(anchors, nc, img_size, cfg=hyperparams['cfg'])
|
modules.add_module('yolo_%d' % i, YOLOLayer(anchors, nc, img_size, yolo_index))
|
||||||
modules.add_module('yolo_%d' % i, yolo_layer)
|
|
||||||
|
|
||||||
# Register module list and number of output filters
|
# Register module list and number of output filters
|
||||||
module_list.append(modules)
|
module_list.append(modules)
|
||||||
|
@ -86,20 +86,8 @@ class EmptyLayer(nn.Module):
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
class Upsample(nn.Module):
|
|
||||||
# Custom Upsample layer (nn.Upsample gives deprecated warning message)
|
|
||||||
|
|
||||||
def __init__(self, scale_factor=1, mode='nearest'):
|
|
||||||
super(Upsample, self).__init__()
|
|
||||||
self.scale_factor = scale_factor
|
|
||||||
self.mode = mode
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
|
|
||||||
|
|
||||||
|
|
||||||
class YOLOLayer(nn.Module):
|
class YOLOLayer(nn.Module):
|
||||||
def __init__(self, anchors, nc, img_size, cfg):
|
def __init__(self, anchors, nc, img_size, yolo_index):
|
||||||
super(YOLOLayer, self).__init__()
|
super(YOLOLayer, self).__init__()
|
||||||
|
|
||||||
self.anchors = torch.Tensor(anchors)
|
self.anchors = torch.Tensor(anchors)
|
||||||
|
@ -109,7 +97,7 @@ class YOLOLayer(nn.Module):
|
||||||
self.ny = 0 # initialize number of y gridpoints
|
self.ny = 0 # initialize number of y gridpoints
|
||||||
|
|
||||||
if ONNX_EXPORT: # grids must be computed in __init__
|
if ONNX_EXPORT: # grids must be computed in __init__
|
||||||
stride = [32, 16, 8][yolo_layer] # stride of this layer
|
stride = [32, 16, 8][yolo_index] # stride of this layer
|
||||||
nx = int(img_size[1] / stride) # number x grid points
|
nx = int(img_size[1] / stride) # number x grid points
|
||||||
ny = int(img_size[0] / stride) # number y grid points
|
ny = int(img_size[0] / stride) # number y grid points
|
||||||
create_grids(self, max(img_size), (nx, ny))
|
create_grids(self, max(img_size), (nx, ny))
|
||||||
|
|
20
test.py
20
test.py
|
@ -201,14 +201,12 @@ if __name__ == '__main__':
|
||||||
print(opt)
|
print(opt)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
mAP = test(
|
mAP = test(opt.cfg,
|
||||||
opt.cfg,
|
opt.data_cfg,
|
||||||
opt.data_cfg,
|
opt.weights,
|
||||||
opt.weights,
|
opt.batch_size,
|
||||||
opt.batch_size,
|
opt.img_size,
|
||||||
opt.img_size,
|
opt.iou_thres,
|
||||||
opt.iou_thres,
|
opt.conf_thres,
|
||||||
opt.conf_thres,
|
opt.nms_thres,
|
||||||
opt.nms_thres,
|
opt.save_json)
|
||||||
opt.save_json
|
|
||||||
)
|
|
||||||
|
|
186
train.py
186
train.py
|
@ -11,27 +11,31 @@ from models import *
|
||||||
from utils.datasets import *
|
from utils.datasets import *
|
||||||
from utils.utils import *
|
from utils.utils import *
|
||||||
|
|
||||||
# Hyperparameters: train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve 0.087 0.281 0.109 0.121
|
# 0.149 0.241 0.126 0.156 6.85 1.008 1.421 0.07989 16.94 6.215 10.61 4.272 0.251 0.001 -4 0.9 0.0005 320 64-1 giou
|
||||||
hyp = {'giou': .035, # giou loss gain
|
# 0.111 0.27 0.132 0.131 3.96 1.276 0.3156 0.1425 21.21 6.224 11.59 8.83 0.376 0.001 -4 0.9 0.0005
|
||||||
'xy': 0.20, # xy loss gain
|
hyp = {'giou': 1.008, # giou loss gain
|
||||||
'wh': 0.10, # wh loss gain
|
'xy': 1.421, # xy loss gain
|
||||||
'cls': 0.035, # cls loss gain
|
'wh': 0.07989, # wh loss gain
|
||||||
'cls_pw': 79.0, # cls BCELoss positive_weight
|
'cls': 16.94, # cls loss gain
|
||||||
'conf': 1.61, # conf loss gain
|
'cls_pw': 6.215, # cls BCELoss positive_weight
|
||||||
'conf_pw': 3.53, # conf BCELoss positive_weight
|
'conf': 10.61, # conf loss gain
|
||||||
'iou_t': 0.29, # iou target-anchor training threshold
|
'conf_pw': 4.272, # conf BCELoss positive_weight
|
||||||
|
'iou_t': 0.251, # iou target-anchor training threshold
|
||||||
'lr0': 0.001, # initial learning rate
|
'lr0': 0.001, # initial learning rate
|
||||||
'lrf': -4., # final learning rate = lr0 * (10 ** lrf)
|
'lrf': -4., # final learning rate = lr0 * (10 ** lrf)
|
||||||
'momentum': 0.90, # SGD momentum
|
'momentum': 0.90, # SGD momentum
|
||||||
'weight_decay': 0.0005} # optimizer weight decay
|
'weight_decay': 0.0005} # optimizer weight decay
|
||||||
|
|
||||||
# hyp = {'giou': 1.0, # giou loss gain
|
|
||||||
# 'xy': 1.0, # xy loss gain
|
# 0.0945 0.279 0.114 0.131 25 0.035 0.2 0.1 0.035 79 1.61 3.53 0.29 0.001 -4 0.9 0.0005 320 64-1
|
||||||
# 'wh': 1.0, # wh loss gain
|
# 0.112 0.265 0.111 0.144 12.6 0.035 0.2 0.1 0.035 79 1.61 3.53 0.29 0.001 -4 0.9 0.0005 320 32-2
|
||||||
# 'cls': 1.0, # cls loss gain
|
# hyp = {'giou': .035, # giou loss gain
|
||||||
|
# 'xy': 0.20, # xy loss gain
|
||||||
|
# 'wh': 0.10, # wh loss gain
|
||||||
|
# 'cls': 0.035, # cls loss gain
|
||||||
# 'cls_pw': 79.0, # cls BCELoss positive_weight
|
# 'cls_pw': 79.0, # cls BCELoss positive_weight
|
||||||
# 'conf': 1.0, # conf loss gain
|
# 'conf': 1.61, # conf loss gain
|
||||||
# 'conf_pw': 6.0, # conf BCELoss positive_weight
|
# 'conf_pw': 3.53, # conf BCELoss positive_weight
|
||||||
# 'iou_t': 0.29, # iou target-anchor training threshold
|
# 'iou_t': 0.29, # iou target-anchor training threshold
|
||||||
# 'lr0': 0.001, # initial learning rate
|
# 'lr0': 0.001, # initial learning rate
|
||||||
# 'lrf': -4., # final learning rate = lr0 * (10 ** lrf)
|
# 'lrf': -4., # final learning rate = lr0 * (10 ** lrf)
|
||||||
|
@ -43,12 +47,10 @@ def train(
|
||||||
cfg,
|
cfg,
|
||||||
data_cfg,
|
data_cfg,
|
||||||
img_size=416,
|
img_size=416,
|
||||||
resume=False,
|
epochs=100, # 500200 batches at bs 16, 117263 images = 273 epochs
|
||||||
epochs=100, # 500200 batches at bs 4, 117263 images = 68 epochs
|
batch_size=8,
|
||||||
batch_size=16,
|
accumulate=8, # effective bs = batch_size * accumulate = 8 * 8 = 64
|
||||||
accumulate=4, # effective bs = 64 = batch_size * accumulate
|
|
||||||
freeze_backbone=False,
|
freeze_backbone=False,
|
||||||
transfer=False # Transfer learning (train only YOLO layers)
|
|
||||||
):
|
):
|
||||||
init_seeds()
|
init_seeds()
|
||||||
weights = 'weights' + os.sep
|
weights = 'weights' + os.sep
|
||||||
|
@ -76,10 +78,10 @@ def train(
|
||||||
|
|
||||||
cutoff = -1 # backbone reaches to cutoff layer
|
cutoff = -1 # backbone reaches to cutoff layer
|
||||||
start_epoch = 0
|
start_epoch = 0
|
||||||
best_loss = float('inf')
|
best_fitness = 0.0
|
||||||
nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255)
|
nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255)
|
||||||
if resume: # Load previously saved model
|
if opt.resume or opt.transfer: # Load previously saved model
|
||||||
if transfer: # Transfer learning
|
if opt.transfer: # Transfer learning
|
||||||
chkpt = torch.load(weights + 'yolov3-spp.pt', map_location=device)
|
chkpt = torch.load(weights + 'yolov3-spp.pt', map_location=device)
|
||||||
model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != 255},
|
model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != 255},
|
||||||
strict=False)
|
strict=False)
|
||||||
|
@ -93,7 +95,7 @@ def train(
|
||||||
start_epoch = chkpt['epoch'] + 1
|
start_epoch = chkpt['epoch'] + 1
|
||||||
if chkpt['optimizer'] is not None:
|
if chkpt['optimizer'] is not None:
|
||||||
optimizer.load_state_dict(chkpt['optimizer'])
|
optimizer.load_state_dict(chkpt['optimizer'])
|
||||||
best_loss = chkpt['best_loss']
|
best_fitness = chkpt['best_fitness']
|
||||||
del chkpt
|
del chkpt
|
||||||
|
|
||||||
else: # Initialize model with backbone (optional)
|
else: # Initialize model with backbone (optional)
|
||||||
|
@ -135,7 +137,11 @@ def train(
|
||||||
|
|
||||||
# Initialize distributed training
|
# Initialize distributed training
|
||||||
if torch.cuda.device_count() > 1:
|
if torch.cuda.device_count() > 1:
|
||||||
dist.init_process_group(backend=opt.backend, init_method=opt.dist_url, world_size=opt.world_size, rank=opt.rank)
|
dist.init_process_group(backend='nccl', # 'distributed backend'
|
||||||
|
init_method='tcp://127.0.0.1:9999', # distributed training init method
|
||||||
|
world_size=1, # number of nodes for distributed training
|
||||||
|
rank=0) # distributed training node rank
|
||||||
|
|
||||||
model = torch.nn.parallel.DistributedDataParallel(model)
|
model = torch.nn.parallel.DistributedDataParallel(model)
|
||||||
# sampler = torch.utils.data.distributed.DistributedSampler(dataset)
|
# sampler = torch.utils.data.distributed.DistributedSampler(dataset)
|
||||||
|
|
||||||
|
@ -148,12 +154,13 @@ def train(
|
||||||
collate_fn=dataset.collate_fn)
|
collate_fn=dataset.collate_fn)
|
||||||
|
|
||||||
# Mixed precision training https://github.com/NVIDIA/apex
|
# Mixed precision training https://github.com/NVIDIA/apex
|
||||||
try:
|
mixed_precision = True
|
||||||
from apex import amp
|
if mixed_precision:
|
||||||
model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
|
try:
|
||||||
mixed_precision = True
|
from apex import amp
|
||||||
except: # not installed: install help: https://github.com/NVIDIA/apex/issues/259
|
model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
|
||||||
mixed_precision = False
|
except: # not installed: install help: https://github.com/NVIDIA/apex/issues/259
|
||||||
|
mixed_precision = False
|
||||||
|
|
||||||
# Start training
|
# Start training
|
||||||
model.hyp = hyp # attach hyperparameters to model
|
model.hyp = hyp # attach hyperparameters to model
|
||||||
|
@ -166,7 +173,8 @@ def train(
|
||||||
t, t0 = time.time(), time.time()
|
t, t0 = time.time(), time.time()
|
||||||
for epoch in range(start_epoch, epochs):
|
for epoch in range(start_epoch, epochs):
|
||||||
model.train()
|
model.train()
|
||||||
print(('\n%8s%12s' + '%10s' * 7) % ('Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'targets', 'time'))
|
print(('\n%8s%12s' + '%10s' * 7) %
|
||||||
|
('Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'targets', 'img_size'))
|
||||||
|
|
||||||
# Update scheduler
|
# Update scheduler
|
||||||
scheduler.step()
|
scheduler.step()
|
||||||
|
@ -183,15 +191,16 @@ def train(
|
||||||
# dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # random weighted index
|
# dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # random weighted index
|
||||||
|
|
||||||
mloss = torch.zeros(5).to(device) # mean losses
|
mloss = torch.zeros(5).to(device) # mean losses
|
||||||
for i, (imgs, targets, _, _) in enumerate(dataloader):
|
pbar = tqdm(enumerate(dataloader), total=nb) # progress bar
|
||||||
|
for i, (imgs, targets, _, _) in pbar:
|
||||||
imgs = imgs.to(device)
|
imgs = imgs.to(device)
|
||||||
targets = targets.to(device)
|
targets = targets.to(device)
|
||||||
|
|
||||||
# Multi-Scale training
|
# Multi-Scale training TODO: short-side to 32-multiple https://github.com/ultralytics/yolov3/issues/358
|
||||||
if multi_scale:
|
if multi_scale:
|
||||||
if (i + 1 + nb * epoch) / accumulate % 10 == 0: # adjust (67% - 150%) every 10 batches
|
if (i + nb * epoch) / accumulate % 10 == 0: # adjust (67% - 150%) every 10 batches
|
||||||
img_size = random.choice(range(img_size_min, img_size_max + 1)) * 32
|
img_size = random.choice(range(img_size_min, img_size_max + 1)) * 32
|
||||||
print('img_size = %g' % img_size)
|
# print('img_size = %g' % img_size)
|
||||||
scale_factor = img_size / max(imgs.shape[-2:])
|
scale_factor = img_size / max(imgs.shape[-2:])
|
||||||
imgs = F.interpolate(imgs, scale_factor=scale_factor, mode='bilinear', align_corners=False)
|
imgs = F.interpolate(imgs, scale_factor=scale_factor, mode='bilinear', align_corners=False)
|
||||||
|
|
||||||
|
@ -228,11 +237,11 @@ def train(
|
||||||
|
|
||||||
# Print batch results
|
# Print batch results
|
||||||
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
|
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
|
||||||
|
# s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nb - 1), *mloss, len(targets), time.time() - t)
|
||||||
s = ('%8s%12s' + '%10.3g' * 7) % (
|
s = ('%8s%12s' + '%10.3g' * 7) % (
|
||||||
'%g/%g' % (epoch, epochs - 1),
|
'%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nb - 1), *mloss, len(targets), img_size)
|
||||||
'%g/%g' % (i, nb - 1), *mloss, len(targets), time.time() - t)
|
|
||||||
t = time.time()
|
t = time.time()
|
||||||
print(s)
|
pbar.set_description(s) # print(s)
|
||||||
|
|
||||||
# Report time
|
# Report time
|
||||||
dt = (time.time() - t0) / 3600
|
dt = (time.time() - t0) / 3600
|
||||||
|
@ -248,17 +257,17 @@ def train(
|
||||||
with open('results.txt', 'a') as file:
|
with open('results.txt', 'a') as file:
|
||||||
file.write(s + '%11.3g' * 5 % results + '\n') # P, R, mAP, F1, test_loss
|
file.write(s + '%11.3g' * 5 % results + '\n') # P, R, mAP, F1, test_loss
|
||||||
|
|
||||||
# Update best loss
|
# Update best map
|
||||||
test_loss = results[4]
|
fitness = results[2]
|
||||||
if test_loss < best_loss:
|
if fitness > best_fitness:
|
||||||
best_loss = test_loss
|
best_fitness = fitness
|
||||||
|
|
||||||
# Save training results
|
# Save training results
|
||||||
save = (not opt.nosave) or (epoch == epochs - 1)
|
save = (not opt.nosave) or (epoch == epochs - 1)
|
||||||
if save:
|
if save:
|
||||||
# Create checkpoint
|
# Create checkpoint
|
||||||
chkpt = {'epoch': epoch,
|
chkpt = {'epoch': epoch,
|
||||||
'best_loss': best_loss,
|
'best_fitness': best_fitness,
|
||||||
'model': model.module.state_dict() if type(
|
'model': model.module.state_dict() if type(
|
||||||
model) is nn.parallel.DistributedDataParallel else model.state_dict(),
|
model) is nn.parallel.DistributedDataParallel else model.state_dict(),
|
||||||
'optimizer': optimizer.state_dict()}
|
'optimizer': optimizer.state_dict()}
|
||||||
|
@ -267,7 +276,7 @@ def train(
|
||||||
torch.save(chkpt, latest)
|
torch.save(chkpt, latest)
|
||||||
|
|
||||||
# Save best checkpoint
|
# Save best checkpoint
|
||||||
if best_loss == test_loss:
|
if best_fitness == fitness:
|
||||||
torch.save(chkpt, best)
|
torch.save(chkpt, best)
|
||||||
|
|
||||||
# Save backup every 10 epochs (optional)
|
# Save backup every 10 epochs (optional)
|
||||||
|
@ -286,8 +295,15 @@ def print_mutation(hyp, results):
|
||||||
b = '%11.4g' * len(hyp) % tuple(hyp.values()) # hyperparam values
|
b = '%11.4g' * len(hyp) % tuple(hyp.values()) # hyperparam values
|
||||||
c = '%11.3g' * len(results) % results # results (P, R, mAP, F1, test_loss)
|
c = '%11.3g' * len(results) % results # results (P, R, mAP, F1, test_loss)
|
||||||
print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
|
print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
|
||||||
with open('evolve.txt', 'a') as f:
|
|
||||||
f.write(c + b + '\n')
|
if opt.cloud_evolve:
|
||||||
|
os.system('gsutil cp gs://yolov4/evolve.txt .') # download evolve.txt
|
||||||
|
with open('evolve.txt', 'a') as f: # append result to evolve.txt
|
||||||
|
f.write(c + b + '\n')
|
||||||
|
os.system('gsutil cp evolve.txt gs://yolov4') # upload evolve.txt
|
||||||
|
else:
|
||||||
|
with open('evolve.txt', 'a') as f:
|
||||||
|
f.write(c + b + '\n')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -302,51 +318,46 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--resume', action='store_true', help='resume training flag')
|
parser.add_argument('--resume', action='store_true', help='resume training flag')
|
||||||
parser.add_argument('--transfer', action='store_true', help='transfer learning flag')
|
parser.add_argument('--transfer', action='store_true', help='transfer learning flag')
|
||||||
parser.add_argument('--num-workers', type=int, default=4, help='number of Pytorch DataLoader workers')
|
parser.add_argument('--num-workers', type=int, default=4, help='number of Pytorch DataLoader workers')
|
||||||
parser.add_argument('--dist-url', default='tcp://127.0.0.1:9999', type=str, help='distributed training init method')
|
|
||||||
parser.add_argument('--rank', default=0, type=int, help='distributed training node rank')
|
|
||||||
parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training')
|
|
||||||
parser.add_argument('--backend', default='nccl', type=str, help='distributed backend')
|
|
||||||
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
|
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
|
||||||
parser.add_argument('--notest', action='store_true', help='only test final epoch')
|
parser.add_argument('--notest', action='store_true', help='only test final epoch')
|
||||||
parser.add_argument('--giou', action='store_true', help='use GIoU loss instead of xy, wh loss')
|
parser.add_argument('--giou', action='store_true', help='use GIoU loss instead of xy, wh loss')
|
||||||
parser.add_argument('--evolve', action='store_true', help='run hyperparameter evolution')
|
parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
|
||||||
|
parser.add_argument('--cloud-evolve', action='store_true', help='evolve hyperparameters from a cloud source')
|
||||||
parser.add_argument('--var', default=0, type=int, help='debug variable')
|
parser.add_argument('--var', default=0, type=int, help='debug variable')
|
||||||
opt = parser.parse_args()
|
opt = parser.parse_args()
|
||||||
print(opt)
|
print(opt)
|
||||||
|
|
||||||
|
opt.evolve = opt.cloud_evolve or opt.evolve
|
||||||
if opt.evolve:
|
if opt.evolve:
|
||||||
opt.notest = True # only test final epoch
|
opt.notest = True # only test final epoch
|
||||||
opt.nosave = True # only save final checkpoint
|
opt.nosave = True # only save final checkpoint
|
||||||
|
|
||||||
# Train
|
# Train
|
||||||
results = train(
|
results = train(opt.cfg,
|
||||||
opt.cfg,
|
opt.data_cfg,
|
||||||
opt.data_cfg,
|
img_size=opt.img_size,
|
||||||
img_size=opt.img_size,
|
epochs=opt.epochs,
|
||||||
resume=opt.resume or opt.transfer,
|
batch_size=opt.batch_size,
|
||||||
transfer=opt.transfer,
|
accumulate=opt.accumulate)
|
||||||
epochs=opt.epochs,
|
|
||||||
batch_size=opt.batch_size,
|
|
||||||
accumulate=opt.accumulate,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Evolve hyperparameters (optional)
|
# Evolve hyperparameters (optional)
|
||||||
if opt.evolve:
|
if opt.evolve:
|
||||||
best_fitness = results[2] # use mAP for fitness
|
|
||||||
|
|
||||||
# Write mutation results
|
|
||||||
print_mutation(hyp, results)
|
|
||||||
|
|
||||||
gen = 1000 # generations to evolve
|
gen = 1000 # generations to evolve
|
||||||
for _ in range(gen):
|
print_mutation(hyp, results) # Write mutation results
|
||||||
|
|
||||||
# Mutate hyperparameters
|
for _ in range(gen):
|
||||||
old_hyp = hyp.copy()
|
# Get best hyperparamters
|
||||||
init_seeds(seed=int(time.time()))
|
x = np.loadtxt('evolve.txt', ndmin=2)
|
||||||
s = [.4, .4, .4, .4, .4, .4, .4, .4, .4 * 0, .4 * 0, .04 * 0, .4 * 0] # fractional sigmas
|
x = x[x[:, 2].argmax()] # select best mAP as genetic fitness (col 2)
|
||||||
for i, k in enumerate(hyp.keys()):
|
for i, k in enumerate(hyp.keys()):
|
||||||
x = (np.random.randn(1) * s[i] + 1) ** 1.1 # plt.hist(x.ravel(), 100)
|
hyp[k] = x[i + 5]
|
||||||
hyp[k] = hyp[k] * float(x) # vary by about 30% 1sigma
|
|
||||||
|
# Mutate
|
||||||
|
init_seeds(seed=int(time.time()))
|
||||||
|
s = [.2, .2, .2, .2, .2, .2, .2, .2, .2 * 0, .2 * 0, .05 * 0, .2 * 0] # fractional sigmas
|
||||||
|
for i, k in enumerate(hyp.keys()):
|
||||||
|
x = (np.random.randn(1) * s[i] + 1) ** 2.0 # plt.hist(x.ravel(), 300)
|
||||||
|
hyp[k] *= float(x) # vary by 20% 1sigma
|
||||||
|
|
||||||
# Clip to limits
|
# Clip to limits
|
||||||
keys = ['lr0', 'iou_t', 'momentum', 'weight_decay']
|
keys = ['lr0', 'iou_t', 'momentum', 'weight_decay']
|
||||||
|
@ -354,30 +365,17 @@ if __name__ == '__main__':
|
||||||
for k, v in zip(keys, limits):
|
for k, v in zip(keys, limits):
|
||||||
hyp[k] = np.clip(hyp[k], v[0], v[1])
|
hyp[k] = np.clip(hyp[k], v[0], v[1])
|
||||||
|
|
||||||
# Determine mutation fitness
|
# Train mutation
|
||||||
results = train(
|
results = train(opt.cfg,
|
||||||
opt.cfg,
|
opt.data_cfg,
|
||||||
opt.data_cfg,
|
img_size=opt.img_size,
|
||||||
img_size=opt.img_size,
|
epochs=opt.epochs,
|
||||||
resume=opt.resume or opt.transfer,
|
batch_size=opt.batch_size,
|
||||||
transfer=opt.transfer,
|
accumulate=opt.accumulate)
|
||||||
epochs=opt.epochs,
|
|
||||||
batch_size=opt.batch_size,
|
|
||||||
accumulate=opt.accumulate,
|
|
||||||
)
|
|
||||||
mutation_fitness = results[2]
|
|
||||||
|
|
||||||
# Write mutation results
|
# Write mutation results
|
||||||
print_mutation(hyp, results)
|
print_mutation(hyp, results)
|
||||||
|
|
||||||
# Update hyperparameters if fitness improved
|
|
||||||
if mutation_fitness > best_fitness:
|
|
||||||
# Fitness improved!
|
|
||||||
print('Fitness improved!')
|
|
||||||
best_fitness = mutation_fitness
|
|
||||||
else:
|
|
||||||
hyp = old_hyp.copy() # reset hyp to
|
|
||||||
|
|
||||||
# # Plot results
|
# # Plot results
|
||||||
# import numpy as np
|
# import numpy as np
|
||||||
# import matplotlib.pyplot as plt
|
# import matplotlib.pyplot as plt
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
# This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
|
||||||
|
# pip install --upgrade google-cloud-storage
|
||||||
|
|
||||||
|
from google.cloud import storage
|
||||||
|
|
||||||
|
|
||||||
|
def upload_blob(bucket_name, source_file_name, destination_blob_name):
|
||||||
|
# Uploads a file to a bucket
|
||||||
|
# https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
|
||||||
|
|
||||||
|
storage_client = storage.Client()
|
||||||
|
bucket = storage_client.get_bucket(bucket_name)
|
||||||
|
blob = bucket.blob(destination_blob_name)
|
||||||
|
|
||||||
|
blob.upload_from_filename(source_file_name)
|
||||||
|
|
||||||
|
print('File {} uploaded to {}.'.format(
|
||||||
|
source_file_name,
|
||||||
|
destination_blob_name))
|
||||||
|
|
||||||
|
|
||||||
|
def download_blob(bucket_name, source_blob_name, destination_file_name):
|
||||||
|
# Uploads a blob from a bucket
|
||||||
|
storage_client = storage.Client()
|
||||||
|
bucket = storage_client.get_bucket(bucket_name)
|
||||||
|
blob = bucket.blob(source_blob_name)
|
||||||
|
|
||||||
|
blob.download_to_filename(destination_file_name)
|
||||||
|
|
||||||
|
print('Blob {} downloaded to {}.'.format(
|
||||||
|
source_blob_name,
|
||||||
|
destination_file_name))
|
|
@ -11,6 +11,7 @@ from PIL import Image
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from . import torch_utils
|
from . import torch_utils
|
||||||
|
from . import google_utils
|
||||||
|
|
||||||
matplotlib.rc('font', **{'size': 11})
|
matplotlib.rc('font', **{'size': 11})
|
||||||
|
|
||||||
|
@ -284,7 +285,7 @@ def compute_loss(p, targets, model, giou_loss=False): # predictions, targets, m
|
||||||
|
|
||||||
# Compute losses
|
# Compute losses
|
||||||
bs = p[0].shape[0] # batch size
|
bs = p[0].shape[0] # batch size
|
||||||
k = bs # loss gain
|
k = bs / 64 # loss gain
|
||||||
for i, pi0 in enumerate(p): # layer i predictions, i
|
for i, pi0 in enumerate(p): # layer i predictions, i
|
||||||
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
|
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
|
||||||
tconf = torch.zeros_like(pi0[..., 0]) # conf
|
tconf = torch.zeros_like(pi0[..., 0]) # conf
|
||||||
|
@ -303,12 +304,12 @@ def compute_loss(p, targets, model, giou_loss=False): # predictions, targets, m
|
||||||
lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss
|
lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss
|
||||||
lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i]) # wh yolo loss
|
lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i]) # wh yolo loss
|
||||||
|
|
||||||
# tclsm = torch.zeros_like(pi[..., 5:])
|
tclsm = torch.zeros_like(pi[..., 5:])
|
||||||
# tclsm[range(len(b)), tcls[i]] = 1.0
|
tclsm[range(len(b)), tcls[i]] = 1.0
|
||||||
# lcls += (k * h['cls']) * BCEcls(pi[..., 5:], tclsm) # class_conf loss
|
lcls += (k * h['cls']) * BCEcls(pi[..., 5:], tclsm) # cls loss (BCE)
|
||||||
lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i]) # class_conf loss
|
# lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i]) # cls loss (CE)
|
||||||
|
|
||||||
# # Append to text file
|
# Append targets to text file
|
||||||
# with open('targets.txt', 'a') as file:
|
# with open('targets.txt', 'a') as file:
|
||||||
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
|
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue