multi_gpu (#135)

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates
This commit is contained in:
Glenn Jocher 2019-03-17 23:45:39 +02:00 committed by GitHub
parent 8c730e03cd
commit 45fac6bff1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 261 additions and 304 deletions

149
models.py
View File

@ -1,7 +1,4 @@
import os import os
from collections import defaultdict
import torch.nn as nn
from utils.parse_config import * from utils.parse_config import *
from utils.utils import * from utils.utils import *
@ -104,106 +101,63 @@ class YOLOLayer(nn.Module):
def __init__(self, anchors, nC, img_size, yolo_layer, cfg): def __init__(self, anchors, nC, img_size, yolo_layer, cfg):
super(YOLOLayer, self).__init__() super(YOLOLayer, self).__init__()
nA = len(anchors)
self.anchors = torch.FloatTensor(anchors) self.anchors = torch.FloatTensor(anchors)
self.nA = nA # number of anchors (3) self.nA = len(anchors) # number of anchors (3)
self.nC = nC # number of classes (80) self.nC = nC # number of classes (80)
self.img_size = 0 self.img_size = 0
# self.coco_class_weights = coco_class_weights()
if ONNX_EXPORT: # grids must be computed in __init__ # if ONNX_EXPORT: # grids must be computed in __init__
stride = [32, 16, 8][yolo_layer] # stride of this layer stride = [32, 16, 8][yolo_layer] # stride of this layer
if cfg.endswith('yolov3-tiny.cfg'): if cfg.endswith('yolov3-tiny.cfg'):
stride *= 2 stride *= 2
self.nG = int(img_size / stride) # number grid points nG = int(img_size / stride) # number grid points
create_grids(self, img_size, self.nG)
def forward(self, p, img_size, targets=None, var=None): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
create_grids(self, img_size, nG, device)
def forward(self, p, img_size, var=None):
if ONNX_EXPORT: if ONNX_EXPORT:
bs, nG = 1, self.nG # batch size, grid size bs, nG = 1, self.nG # batch size, grid size
else: else:
bs, nG = p.shape[0], p.shape[-1] bs, nG = p.shape[0], p.shape[-1]
if self.img_size != img_size: if self.img_size != img_size:
create_grids(self, img_size, nG) create_grids(self, img_size, nG, p.device)
if p.is_cuda: # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh)
self.grid_xy = self.grid_xy.cuda()
self.anchor_wh = self.anchor_wh.cuda()
# p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 80) # (bs, anchors, grid, grid, classes + xywh)
p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # prediction p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # prediction
# xy, width and height if self.training:
xy = torch.sigmoid(p[..., 0:2]) return p
wh = p[..., 2:4] # wh (yolo method)
# wh = torch.sigmoid(p[..., 2:4]) # wh (power method)
# Training elif ONNX_EXPORT:
if targets is not None: grid_xy = self.grid_xy.repeat((1, self.nA, 1, 1, 1)).view((1, -1, 2))
MSELoss = nn.MSELoss() anchor_wh = self.anchor_wh.repeat((1, 1, nG, nG, 1)).view((1, -1, 2)) / nG
BCEWithLogitsLoss = nn.BCEWithLogitsLoss()
CrossEntropyLoss = nn.CrossEntropyLoss()
# Get outputs # p = p.view(-1, 5 + self.nC)
p_conf = p[..., 4] # Conf # xy = xy + self.grid_xy[0] # x, y
p_cls = p[..., 5:] # Class # wh = torch.exp(wh) * self.anchor_wh[0] # width, height
# p_conf = torch.sigmoid(p[:, 4:5]) # Conf
# p_cls = F.softmax(p[:, 5:], 1) * p_conf # SSD-like conf
# return torch.cat((xy / nG, wh, p_conf, p_cls), 1).t()
txy, twh, mask, tcls = build_targets(targets, self.anchor_vec, self.nA, self.nC, nG) p = p.view(1, -1, 5 + self.nC)
xy = torch.sigmoid(p[..., 0:2]) + grid_xy # x, y
wh = torch.exp(p[..., 2:4]) * anchor_wh # width, height
p_conf = torch.sigmoid(p[..., 4:5]) # Conf
p_cls = p[..., 5:]
# Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py
# p_cls = F.softmax(p_cls, 2) * p_conf # SSD-like conf
p_cls = torch.exp(p_cls).permute((2, 1, 0))
p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0)) # F.softmax() equivalent
p_cls = p_cls.permute(2, 1, 0)
return torch.cat((xy / nG, wh, p_conf, p_cls), 2).squeeze().t()
tcls = tcls[mask] else: # inference
if p.is_cuda: p[..., 0:2] = torch.sigmoid(p[..., 0:2]) + self.grid_xy # xy
txy, twh, mask, tcls = txy.cuda(), twh.cuda(), mask.cuda(), tcls.cuda() p[..., 2:4] = torch.exp(p[..., 2:4]) * self.anchor_wh # wh yolo method
# p[..., 2:4] = ((torch.sigmoid(p[..., 2:4]) * 2) ** 2) * self.anchor_wh # wh power method
# Compute losses
nT = sum([len(x) for x in targets]) # number of targets
nM = mask.sum().float() # number of anchors (assigned to targets)
k = 1 # nM / bs
if nM > 0:
lxy = k * MSELoss(xy[mask], txy[mask])
lwh = k * MSELoss(wh[mask], twh[mask])
lcls = (k / 4) * CrossEntropyLoss(p_cls[mask], torch.argmax(tcls, 1))
# lcls = (k * 10) * BCEWithLogitsLoss(p_cls[mask], tcls.float())
else:
FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor
lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0])
lconf = (k * 64) * BCEWithLogitsLoss(p_conf, mask.float())
# Sum loss components
loss = lxy + lwh + lconf + lcls
return loss, loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item(), nT
else:
if ONNX_EXPORT:
grid_xy = self.grid_xy.repeat((1, self.nA, 1, 1, 1)).view((1, -1, 2))
anchor_wh = self.anchor_wh.repeat((1, 1, nG, nG, 1)).view((1, -1, 2)) / nG
# p = p.view(-1, 85)
# xy = xy + self.grid_xy[0] # x, y
# wh = torch.exp(wh) * self.anchor_wh[0] # width, height
# p_conf = torch.sigmoid(p[:, 4:5]) # Conf
# p_cls = F.softmax(p[:, 5:85], 1) * p_conf # SSD-like conf
# return torch.cat((xy / nG, wh, p_conf, p_cls), 1).t()
p = p.view(1, -1, 5 + self.nC)
xy = xy.view(bs, self.nA * nG * nG, 2) + grid_xy # x, y
wh = torch.exp(p[..., 2:4]) * anchor_wh # width, height
p_conf = torch.sigmoid(p[..., 4:5]) # Conf
p_cls = p[..., 5:5 + self.nC]
# Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py
# p_cls = F.softmax(p_cls, 2) * p_conf # SSD-like conf
p_cls = torch.exp(p_cls).permute((2, 1, 0))
p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0)) # F.softmax() equivalent
p_cls = p_cls.permute(2, 1, 0)
return torch.cat((xy / nG, wh, p_conf, p_cls), 2).squeeze().t()
p[..., 0:2] = xy + self.grid_xy # xy
p[..., 2:4] = torch.exp(wh) * self.anchor_wh # wh yolo method
# p[..., 2:4] = ((wh * 2) ** 2) * self.anchor_wh # wh power method
p[..., 4] = torch.sigmoid(p[..., 4]) # p_conf p[..., 4] = torch.sigmoid(p[..., 4]) # p_conf
p[..., :4] *= self.stride p[..., :4] *= self.stride
@ -225,9 +179,7 @@ class Darknet(nn.Module):
self.loss_names = ['loss', 'xy', 'wh', 'conf', 'cls', 'nT'] self.loss_names = ['loss', 'xy', 'wh', 'conf', 'cls', 'nT']
self.losses = [] self.losses = []
def forward(self, x, targets=None, var=0): def forward(self, x, var=None):
self.losses = defaultdict(float)
is_training = targets is not None
img_size = x.shape[-1] img_size = x.shape[-1]
layer_outputs = [] layer_outputs = []
output = [] output = []
@ -246,23 +198,15 @@ class Darknet(nn.Module):
layer_i = int(module_def['from']) layer_i = int(module_def['from'])
x = layer_outputs[-1] + layer_outputs[layer_i] x = layer_outputs[-1] + layer_outputs[layer_i]
elif mtype == 'yolo': elif mtype == 'yolo':
if is_training: # get loss x = module[0](x, img_size)
x, *losses = module[0](x, img_size, targets, var)
for name, loss in zip(self.loss_names, losses):
self.losses[name] += loss
else: # get detections
x = module[0](x, img_size)
output.append(x) output.append(x)
layer_outputs.append(x) layer_outputs.append(x)
if is_training:
self.losses['nT'] /= 3
if ONNX_EXPORT: if ONNX_EXPORT:
output = torch.cat(output, 1) # merge the 3 layers 85 x (507, 2028, 8112) to 85 x 10647 output = torch.cat(output, 1) # merge the 3 layers 85 x (507, 2028, 8112) to 85 x 10647
return output[5:].t(), output[:4].t() # ONNX scores, boxes return output[5:].t(), output[:4].t() # ONNX scores, boxes
else:
return sum(output) if is_training else torch.cat(output, 1) return output if self.training else torch.cat(output, 1)
def get_yolo_layers(model): def get_yolo_layers(model):
@ -270,17 +214,18 @@ def get_yolo_layers(model):
return [i for i, x in enumerate(a) if x] # [82, 94, 106] for yolov3 return [i for i, x in enumerate(a) if x] # [82, 94, 106] for yolov3
def create_grids(self, img_size, nG): def create_grids(self, img_size, nG, device):
self.stride = img_size / nG self.stride = img_size / nG
# build xy offsets # build xy offsets
grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float() grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float()
grid_y = grid_x.permute(0, 1, 3, 2) grid_y = grid_x.permute(0, 1, 3, 2)
self.grid_xy = torch.stack((grid_x, grid_y), 4) self.grid_xy = torch.stack((grid_x, grid_y), 4).to(device)
# build wh gains # build wh gains
self.anchor_vec = self.anchors / self.stride self.anchor_vec = self.anchors.to(device) / self.stride
self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2) self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2).to(device)
self.nG = torch.FloatTensor([nG]).to(device)
def load_darknet_weights(self, weights, cutoff=-1): def load_darknet_weights(self, weights, cutoff=-1):

78
test.py
View File

@ -17,7 +17,8 @@ def test(
iou_thres=0.5, iou_thres=0.5,
conf_thres=0.3, conf_thres=0.3,
nms_thres=0.45, nms_thres=0.45,
save_json=False save_json=False,
model=None
): ):
device = torch_utils.select_device() device = torch_utils.select_device()
@ -26,14 +27,15 @@ def test(
nC = int(data_cfg_dict['classes']) # number of classes (80 for COCO) nC = int(data_cfg_dict['classes']) # number of classes (80 for COCO)
test_path = data_cfg_dict['valid'] test_path = data_cfg_dict['valid']
# Initialize model if model is None:
model = Darknet(cfg, img_size) # Initialize model
model = Darknet(cfg, img_size)
# Load weights # Load weights
if weights.endswith('.pt'): # pytorch format if weights.endswith('.pt'): # pytorch format
model.load_state_dict(torch.load(weights, map_location='cpu')['model']) model.load_state_dict(torch.load(weights, map_location='cpu')['model'])
else: # darknet format else: # darknet format
load_darknet_weights(model, weights) load_darknet_weights(model, weights)
model.to(device).eval() model.to(device).eval()
@ -43,32 +45,31 @@ def test(
mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0 mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0
print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \ mP, mR, mAPs, TP, jdict = [], [], [], [], []
[], [], [], [], [], [], [], [], []
AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC) AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC)
coco91class = coco80_to_coco91_class() coco91class = coco80_to_coco91_class()
for batch_i, (imgs, targets, paths, shapes) in enumerate(dataloader): for (imgs, targets, paths, shapes) in dataloader:
t = time.time() t = time.time()
output = model(imgs.to(device)) output = model(imgs.to(device))
output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres) output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres)
# Compute average precision for each sample # Compute average precision for each sample
for si, (labels, detections) in enumerate(zip(targets, output)): for si, detections in enumerate(output):
labels = targets[targets[:, 0] == si, 1:]
seen += 1 seen += 1
if detections is None: if detections is None:
# If there are labels but no detections mark as zero AP # If there are labels but no detections mark as zero AP
if labels.size(0) != 0: if len(labels) != 0:
mAPs.append(0), mR.append(0), mP.append(0) mP.append(0), mR.append(0), mAPs.append(0)
continue continue
# Get detections sorted by decreasing confidence scores # Get detections sorted by decreasing confidence scores
detections = detections.cpu().numpy() detections = detections[(-detections[:, 4]).argsort()]
detections = detections[np.argsort(-detections[:, 4])]
if save_json: if save_json:
# [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
box = torch.from_numpy(detections[:, :4]).clone() # xyxy box = detections[:, :4].clone() # xyxy
scale_coords(img_size, box, shapes[si]) # to original shape scale_coords(img_size, box, shapes[si]) # to original shape
box = xyxy2xywh(box) # xywh box = xyxy2xywh(box) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
@ -84,28 +85,24 @@ def test(
# If no labels add number of detections as incorrect # If no labels add number of detections as incorrect
correct = [] correct = []
if labels.size(0) == 0: if len(labels) == 0:
# correct.extend([0 for _ in range(len(detections))]) # correct.extend([0 for _ in range(len(detections))])
mAPs.append(0), mR.append(0), mP.append(0) mP.append(0), mR.append(0), mAPs.append(0)
continue continue
else: else:
# Extract target boxes as (x1, y1, x2, y2)
target_box = xywh2xyxy(labels[:, 1:5]) * img_size
target_cls = labels[:, 0] target_cls = labels[:, 0]
# Extract target boxes as (x1, y1, x2, y2)
target_boxes = xywh2xyxy(labels[:, 1:5]) * img_size
detected = [] detected = []
for *pred_bbox, conf, obj_conf, obj_pred in detections: for *pred_box, conf, cls_conf, cls_pred in detections:
# Best iou, index between pred and targets
iou, bi = bbox_iou(pred_box, target_box).max(0)
pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1) # If iou > threshold and class is correct mark as correct
# Compute iou with target boxes if iou > iou_thres and cls_pred == target_cls[bi] and bi not in detected:
iou = bbox_iou(pred_bbox, target_boxes)
# Extract index of largest overlap
best_i = np.argmax(iou)
# If overlap exceeds threshold and classification is correct mark as correct
if iou[best_i] > iou_thres and obj_pred == labels[best_i, 0] and best_i not in detected:
correct.append(1) correct.append(1)
detected.append(best_i) detected.append(bi)
else: else:
correct.append(0) correct.append(0)
@ -120,24 +117,24 @@ def test(
AP_accum += np.bincount(AP_class, minlength=nC, weights=AP) AP_accum += np.bincount(AP_class, minlength=nC, weights=AP)
# Compute mean AP across all classes in this image, and append to image list # Compute mean AP across all classes in this image, and append to image list
mAPs.append(AP.mean())
mR.append(R.mean())
mP.append(P.mean()) mP.append(P.mean())
mR.append(R.mean())
mAPs.append(AP.mean())
# Means of all images # Means of all images
mean_mAP = np.mean(mAPs)
mean_R = np.mean(mR)
mean_P = np.mean(mP) mean_P = np.mean(mP)
mean_R = np.mean(mR)
mean_mAP = np.mean(mAPs)
# Print image mAP and running mean mAP # Print image mAP and running mean mAP
print(('%11s%11s' + '%11.3g' * 4 + 's') % print(('%11s%11s' + '%11.3g' * 4 + 's') %
(seen, dataloader.nF, mean_P, mean_R, mean_mAP, time.time() - t)) (seen, dataloader.nF, mean_P, mean_R, mean_mAP, time.time() - t))
# Print mAP per class # Print mAP per class
print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP') + '\n\nmAP Per Class:') print('\nmAP Per Class:')
for i, c in enumerate(load_classes(data_cfg_dict['names'])): for i, c in enumerate(load_classes(data_cfg_dict['names'])):
print('%15s: %-.4f' % (c, AP_accum[i] / (AP_accum_count[i] + 1E-16))) if AP_accum_count[i]:
print('%15s: %-.4f' % (c, AP_accum[i] / (AP_accum_count[i])))
# Save JSON # Save JSON
if save_json: if save_json:
@ -159,7 +156,7 @@ def test(
cocoEval.summarize() cocoEval.summarize()
# Return mAP # Return mAP
return mean_mAP, mean_R, mean_P return mean_P, mean_R, mean_mAP
if __name__ == '__main__': if __name__ == '__main__':
@ -186,8 +183,7 @@ if __name__ == '__main__':
opt.iou_thres, opt.iou_thres,
opt.conf_thres, opt.conf_thres,
opt.nms_thres, opt.nms_thres,
opt.save_json opt.save_json)
)
# Image Total P R mAP # YOLOv3 320 # Image Total P R mAP # YOLOv3 320
# 32 5000 0.66 0.597 0.591 # 32 5000 0.66 0.597 0.591

View File

@ -17,7 +17,6 @@ def train(
accumulated_batches=1, accumulated_batches=1,
multi_scale=False, multi_scale=False,
freeze_backbone=False, freeze_backbone=False,
var=0,
): ):
weights = 'weights' + os.sep weights = 'weights' + os.sep
latest = weights + 'latest.pt' latest = weights + 'latest.pt'
@ -48,10 +47,6 @@ def train(
# Load weights to resume from # Load weights to resume from
model.load_state_dict(checkpoint['model']) model.load_state_dict(checkpoint['model'])
# if torch.cuda.device_count() > 1:
# model = nn.DataParallel(model)
model.to(device).train()
# Transfer learning (train only YOLO layers) # Transfer learning (train only YOLO layers)
# for i, (name, p) in enumerate(model.named_parameters()): # for i, (name, p) in enumerate(model.named_parameters()):
# p.requires_grad = True if (p.shape[0] == 255) else False # p.requires_grad = True if (p.shape[0] == 255) else False
@ -75,13 +70,13 @@ def train(
load_darknet_weights(model, weights + 'yolov3-tiny.conv.15') load_darknet_weights(model, weights + 'yolov3-tiny.conv.15')
cutoff = 15 cutoff = 15
# if torch.cuda.device_count() > 1:
# model = nn.DataParallel(model)
model.to(device).train()
# Set optimizer # Set optimizer
optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9) optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9)
if torch.cuda.device_count() > 1:
model = nn.DataParallel(model)
model.to(device).train()
# Set scheduler # Set scheduler
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[54, 61], gamma=0.1) # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[54, 61], gamma=0.1)
@ -90,16 +85,17 @@ def train(
model_info(model) model_info(model)
n_burnin = min(round(dataloader.nB / 5 + 1), 1000) # number of burn-in batches n_burnin = min(round(dataloader.nB / 5 + 1), 1000) # number of burn-in batches
for epoch in range(epochs): for epoch in range(epochs):
model.train()
epoch += start_epoch epoch += start_epoch
print(('%8s%12s' + '%10s' * 7) % ( print(('\n%8s%12s' + '%10s' * 7) % (
'Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'nTargets', 'time')) 'Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'nTargets', 'time'))
# Update scheduler (automatic) # Update scheduler (automatic)
# scheduler.step() # scheduler.step()
# Update scheduler (manual) at 0, 54, 61 epochs to 1e-3, 1e-4, 1e-5 # Update scheduler (manual) at 0, 54, 61 epochs to 1e-3, 1e-4, 1e-5
if epoch > 50: if epoch > 250:
lr = lr0 / 10 lr = lr0 / 10
else: else:
lr = lr0 lr = lr0
@ -113,10 +109,12 @@ def train(
p.requires_grad = False if (epoch == 0) else True p.requires_grad = False if (epoch == 0) else True
ui = -1 ui = -1
rloss = defaultdict(float) # running loss
optimizer.zero_grad() optimizer.zero_grad()
rloss = defaultdict(float)
for i, (imgs, targets, _, _) in enumerate(dataloader): for i, (imgs, targets, _, _) in enumerate(dataloader):
if sum([len(x) for x in targets]) < 1: # if no targets continue targets = targets.to(device)
nT = targets.shape[0]
if nT == 0: # if no targets continue
continue continue
# SGD burn-in # SGD burn-in
@ -125,8 +123,14 @@ def train(
for g in optimizer.param_groups: for g in optimizer.param_groups:
g['lr'] = lr g['lr'] = lr
# Run model
pred = model(imgs.to(device))
# Build targets
target_list = build_targets(model, targets, pred)
# Compute loss # Compute loss
loss = model(imgs.to(device), targets, var=var) loss, loss_dict = compute_loss(pred, target_list)
# Compute gradient # Compute gradient
loss.backward() loss.backward()
@ -138,49 +142,51 @@ def train(
# Running epoch-means of tracked metrics # Running epoch-means of tracked metrics
ui += 1 ui += 1
for key, val in model.losses.items(): for key, val in loss_dict.items():
rloss[key] = (rloss[key] * ui + val) / (ui + 1) rloss[key] = (rloss[key] * ui + val) / (ui + 1)
s = ('%8s%12s' + '%10.3g' * 7) % ( s = ('%8s%12s' + '%10.3g' * 7) % (
'%g/%g' % (epoch, epochs - 1), '%g/%g' % (epoch, epochs - 1),
'%g/%g' % (i, len(dataloader) - 1), '%g/%g' % (i, len(dataloader) - 1),
rloss['xy'], rloss['wh'], rloss['conf'], rloss['xy'], rloss['wh'], rloss['conf'],
rloss['cls'], rloss['loss'], rloss['cls'], rloss['total'],
model.losses['nT'], time.time() - t0) nT, time.time() - t0)
t0 = time.time() t0 = time.time()
print(s) print(s)
# Update best loss # Update best loss
if rloss['loss'] < best_loss: if rloss['total'] < best_loss:
best_loss = rloss['loss'] best_loss = rloss['total']
# Save latest checkpoint save = True # save training results
checkpoint = {'epoch': epoch, if save:
'best_loss': best_loss, # Save latest checkpoint
'model': model.state_dict(), checkpoint = {'epoch': epoch,
'optimizer': optimizer.state_dict()} 'best_loss': best_loss,
torch.save(checkpoint, latest) 'model': model.module.state_dict() if type(model) is nn.DataParallel else model.state_dict(),
'optimizer': optimizer.state_dict()}
torch.save(checkpoint, latest)
# Save best checkpoint # Save best checkpoint
if best_loss == rloss['loss']: if best_loss == rloss['total']:
os.system('cp ' + latest + ' ' + best) os.system('cp ' + latest + ' ' + best)
# Save backup weights every 5 epochs (optional) # Save backup weights every 5 epochs (optional)
# if (epoch > 0) & (epoch % 5 == 0): if (epoch > 0) & (epoch % 5 == 0):
# os.system('cp ' + latest + ' ' + weights + 'backup{}.pt'.format(epoch))) os.system('cp ' + latest + ' ' + weights + 'backup{}.pt'.format(epoch))
# Calculate mAP # Calculate mAP
with torch.no_grad(): with torch.no_grad():
mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size) P, R, mAP = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, model=model)
# Write epoch results # Write epoch results
with open('results.txt', 'a') as file: with open('results.txt', 'a') as file:
file.write(s + '%11.3g' * 3 % (mAP, P, R) + '\n') file.write(s + '%11.3g' * 3 % (P, R, mAP) + '\n')
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type=int, default=100, help='number of epochs') parser.add_argument('--epochs', type=int, default=270, help='number of epochs')
parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch') parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step') parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step')
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path') parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
@ -188,7 +194,6 @@ if __name__ == '__main__':
parser.add_argument('--multi-scale', action='store_true', help='random image sizes per batch 320 - 608') parser.add_argument('--multi-scale', action='store_true', help='random image sizes per batch 320 - 608')
parser.add_argument('--img-size', type=int, default=32 * 13, help='pixels') parser.add_argument('--img-size', type=int, default=32 * 13, help='pixels')
parser.add_argument('--resume', action='store_true', help='resume training flag') parser.add_argument('--resume', action='store_true', help='resume training flag')
parser.add_argument('--var', type=float, default=0, help='test variable')
opt = parser.parse_args() opt = parser.parse_args()
print(opt, end='\n\n') print(opt, end='\n\n')
@ -203,5 +208,4 @@ if __name__ == '__main__':
batch_size=opt.batch_size, batch_size=opt.batch_size,
accumulated_batches=opt.accumulated_batches, accumulated_batches=opt.accumulated_batches,
multi_scale=opt.multi_scale, multi_scale=opt.multi_scale,
var=opt.var,
) )

View File

@ -206,8 +206,11 @@ class LoadImagesAndLabels: # for training
if nL > 0: if nL > 0:
labels[:, 2] = 1 - labels[:, 2] labels[:, 2] = 1 - labels[:, 2]
if nL > 0:
labels = np.concatenate((np.zeros((nL, 1), dtype='float32') + index, labels), 1)
labels_all.append(labels)
img_all.append(img) img_all.append(img)
labels_all.append(torch.from_numpy(labels))
img_paths.append(img_path) img_paths.append(img_path)
img_shapes.append((h, w)) img_shapes.append((h, w))
@ -216,6 +219,7 @@ class LoadImagesAndLabels: # for training
img_all = np.ascontiguousarray(img_all, dtype=np.float32) img_all = np.ascontiguousarray(img_all, dtype=np.float32)
img_all /= 255.0 img_all /= 255.0
labels_all = torch.from_numpy(np.concatenate(labels_all, 0))
return torch.from_numpy(img_all), labels_all, img_paths, img_shapes return torch.from_numpy(img_all), labels_all, img_paths, img_shapes
def __len__(self): def __len__(self):

View File

@ -4,6 +4,7 @@
sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3 sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3
bash yolov3/data/get_coco_dataset.sh bash yolov3/data/get_coco_dataset.sh
sudo rm -rf cocoapi && git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3 sudo rm -rf cocoapi && git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
sudo shutdown
# Start # Start
python3 train.py python3 train.py
@ -15,6 +16,14 @@ python3 train.py --resume
gsutil cp gs://ultralytics/yolov3.pt yolov3/weights gsutil cp gs://ultralytics/yolov3.pt yolov3/weights
python3 detect.py python3 detect.py
# Clone branch
sudo rm -rf yolov3 && git clone -b multi_gpu --depth 1 https://github.com/ultralytics/yolov3
cd yolov3 && python3 train.py --batch-size 104
sudo rm -rf yolov3 && git clone -b multigpu --depth 1 https://github.com/alexpolichroniadis/yolov3
cp coco.data yolov3/cfg
cd yolov3 && python3 train.py --batch-size 104
# Test # Test
sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3 sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3
sudo rm -rf cocoapi && git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3 sudo rm -rf cocoapi && git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3

View File

@ -16,10 +16,11 @@ def select_device(force_cpu=False):
device = torch.device('cuda:0' if cuda else 'cpu') device = torch.device('cuda:0' if cuda else 'cpu')
if torch.cuda.device_count() > 1: if torch.cuda.device_count() > 1:
device = torch.device('cuda' if cuda else 'cpu')
print('Found %g GPUs' % torch.cuda.device_count()) print('Found %g GPUs' % torch.cuda.device_count())
print('WARNING Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21') # print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21')
torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available # torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available
# # print('Using ', torch.cuda.device_count(), ' GPUs') # print('Using ', torch.cuda.device_count(), ' GPUs')
print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else '')) print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
return device return device

View File

@ -1,10 +1,12 @@
import glob import glob
import random import random
from collections import defaultdict
import cv2 import cv2
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from utils import torch_utils from utils import torch_utils
@ -25,15 +27,14 @@ def init_seeds(seed=0):
def load_classes(path): def load_classes(path):
""" # Loads class labels at 'path'
Loads class labels at 'path'
"""
fp = open(path, 'r') fp = open(path, 'r')
names = fp.read().split('\n') names = fp.read().split('\n')
return list(filter(None, names)) # filter removes empty strings (such as last line) return list(filter(None, names)) # filter removes empty strings (such as last line)
def model_info(model): # Plots a line-by-line description of a PyTorch model def model_info(model):
# Plots a line-by-line description of a PyTorch model
n_p = sum(x.numel() for x in model.parameters()) # number parameters n_p = sum(x.numel() for x in model.parameters()) # number parameters
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
@ -41,7 +42,7 @@ def model_info(model): # Plots a line-by-line description of a PyTorch model
name = name.replace('module_list.', '') name = name.replace('module_list.', '')
print('%5g %50s %9s %12g %20s %12.3g %12.3g' % ( print('%5g %50s %9s %12g %20s %12.3g %12.3g' % (
i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g)) print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g))
def coco_class_weights(): # frequency of each class in coco train2014 def coco_class_weights(): # frequency of each class in coco train2014
@ -66,7 +67,8 @@ def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
return x return x
def plot_one_box(x, img, color=None, label=None, line_thickness=None): # Plots one bounding box on image img def plot_one_box(x, img, color=None, label=None, line_thickness=None):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 # line thickness tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 # line thickness
color = color or [random.randint(0, 255) for _ in range(3)] color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
@ -122,7 +124,7 @@ def scale_coords(img_size, coords, img0_shape):
def ap_per_class(tp, conf, pred_cls, target_cls): def ap_per_class(tp, conf, pred_cls, target_cls):
""" Compute the average precision, given the recall and precision curves. """ Compute the average precision, given the recall and precision curves.
Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments # Arguments
tp: True positives (list). tp: True positives (list).
conf: Objectness value from 0-1 (list). conf: Objectness value from 0-1 (list).
@ -176,7 +178,7 @@ def ap_per_class(tp, conf, pred_cls, target_cls):
def compute_ap(recall, precision): def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves. """ Compute the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn. Source: https://github.com/rbgirshick/py-faster-rcnn.
# Arguments # Arguments
recall: The recall curve (list). recall: The recall curve (list).
precision: The precision curve (list). precision: The precision curve (list).
@ -203,105 +205,127 @@ def compute_ap(recall, precision):
def bbox_iou(box1, box2, x1y1x2y2=True): def bbox_iou(box1, box2, x1y1x2y2=True):
box1 = box1.t() # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.t() box2 = box2.t()
"""
Returns the IoU of two bounding boxes # Get the coordinates of bounding boxes
"""
if x1y1x2y2: if x1y1x2y2:
# Get the coordinates of bounding boxes # x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else: else:
# x1, y1, w1, h1 = box1 # x, y, w, h = box1
# Transform from center and width to exact coordinates
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# get the coordinates of the intersection rectangle
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
# Intersection area # Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, 0) * torch.clamp(inter_rect_y2 - inter_rect_y1, 0) inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area # Union Area
b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
return inter_area / (b1_area + b2_area - inter_area + 1e-16) return inter_area / union_area # iou
def build_targets(target, anchor_vec, nA, nC, nG): def wh_iou(box1, box2):
""" # Returns the IoU of wh1 to wh2. wh1 is 2, wh2 is nx2
returns nT, nCorrect, tx, ty, tw, th, tconf, tcls box2 = box2.t()
"""
nB = len(target) # number of images in batch
txy = torch.zeros(nB, nA, nG, nG, 2) # batch size, anchors, grid size # w, h = box1
twh = torch.zeros(nB, nA, nG, nG, 2) w1, h1 = box1[0], box1[1]
tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0) w2, h2 = box2[0], box2[1]
tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0) # nC = number of classes
for b in range(nB): # Intersection area
t = target[b] inter_area = torch.min(w1, w2) * torch.min(h1, h2)
nTb = len(t) # number of targets
if nTb == 0:
continue
gxy, gwh = t[:, 1:3] * nG, t[:, 3:5] * nG # Union Area
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
# Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors) return inter_area / union_area # iou
gi, gj = torch.clamp(gxy.long(), min=0, max=nG - 1).t()
# iou of targets-anchors (using wh only)
box1 = gwh
box2 = anchor_vec.unsqueeze(1)
inter_area = torch.min(box1, box2).prod(2) def compute_loss(p, targets): # predictions, targets
iou = inter_area / (box1.prod(1) + box2.prod(2) - inter_area + 1e-16) FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor
loss, lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0])
txy, twh, tcls, tconf, indices = targets
MSE = nn.MSELoss()
CE = nn.CrossEntropyLoss()
BCE = nn.BCEWithLogitsLoss()
# Select best iou_pred and anchor # Compute losses
iou_best, a = iou.max(0) # best anchor [0-2] for each target # gp = [x.numel() for x in tconf] # grid points
for i, pi0 in enumerate(p): # layer i predictions, i
b, a, gj, gi = indices[i] # image, anchor, gridx, gridy
# Select best unique target-anchor combinations # Compute losses
if nTb > 1: k = 1 # nT / bs
iou_order = torch.argsort(-iou_best) # best to worst if len(b) > 0:
pi = pi0[b, a, gj, gi] # predictions closest to anchors
lxy += k * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy
lwh += k * MSE(pi[..., 2:4], twh[i]) # wh
lcls += (k / 4) * CE(pi[..., 5:], tcls[i])
# Unique anchor selection # pos_weight = FT([gp[i] / min(gp) * 4.])
u = torch.stack((gi, gj, a), 0)[:, iou_order] # BCE = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
# _, first_unique = np.unique(u, axis=1, return_index=True) # first unique indices lconf += (k * 64) * BCE(pi0[..., 4], tconf[i])
first_unique = return_torch_unique_index(u, torch.unique(u, dim=1)) # torch alternative loss = lxy + lwh + lconf + lcls
i = iou_order[first_unique] # Add to dictionary
# best anchor must share significant commonality (iou) with target d = defaultdict(float)
i = i[iou_best[i] > 0.10] # TODO: examine arbitrary threshold losses = [loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item()]
if len(i) == 0: for name, x in zip(['total', 'xy', 'wh', 'conf', 'cls'], losses):
continue d[name] = x
a, gj, gi, t = a[i], gj[i], gi[i], t[i] return loss, d
if len(t.shape) == 1:
t = t.view(1, 5)
else:
if iou_best < 0.10:
continue
tc, gxy, gwh = t[:, 0].long(), t[:, 1:3] * nG, t[:, 3:5] * nG
def build_targets(model, targets, pred):
# targets = [image, class, x, y, w, h]
if isinstance(model, nn.DataParallel):
model = model.module
yolo_layers = get_yolo_layers(model)
# anchors = closest_anchor(model, targets) # [layer, anchor, i, j]
txy, twh, tcls, tconf, indices = [], [], [], [], []
for i, layer in enumerate(yolo_layers):
nG = model.module_list[layer][0].nG # grid size
anchor_vec = model.module_list[layer][0].anchor_vec
# iou of targets-anchors
gwh = targets[:, 4:6] * nG
iou = [wh_iou(x, gwh) for x in anchor_vec]
iou, a = torch.stack(iou, 0).max(0) # best iou and anchor
# reject below threshold ious (OPTIONAL)
j = iou > 0.01
t, a, gwh = targets[j], a[j], gwh[j]
# Indices
b, c = t[:, 0:2].long().t() # target image, class
gxy = t[:, 2:4] * nG
gi, gj = gxy.long().t() # grid_i, grid_j
indices.append((b, a, gj, gi))
# XY coordinates # XY coordinates
txy[b, a, gj, gi] = gxy - gxy.floor() txy.append(gxy - gxy.floor())
# Width and height # Width and height
twh[b, a, gj, gi] = torch.log(gwh / anchor_vec[a]) # yolo method twh.append(torch.log(gwh / anchor_vec[a])) # yolo method
# twh[b, a, gj, gi] = torch.sqrt(gwh / anchor_vec[a]) / 2 # power method # twh.append(torch.sqrt(gwh / anchor_vec[a]) / 2) # power method
# One-hot encoding of label # Class
tcls[b, a, gj, gi, tc] = 1 tcls.append(c)
tconf[b, a, gj, gi] = 1
return txy, twh, tconf, tcls # Conf
tci = torch.zeros_like(pred[i][..., 0])
tci[b, a, gj, gi] = 1 # conf
tconf.append(tci)
return txy, twh, tcls, tconf, indices
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
@ -314,34 +338,6 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
output = [None for _ in range(len(prediction))] output = [None for _ in range(len(prediction))]
for image_i, pred in enumerate(prediction): for image_i, pred in enumerate(prediction):
# Filter out confidence scores below threshold
# Get score and class with highest confidence
# cross-class NMS (experimental)
cross_class_nms = False
if cross_class_nms:
a = pred.clone()
_, indices = torch.sort(-a[:, 4], 0) # sort best to worst
a = a[indices]
radius = 30 # area to search for cross-class ious
for i in range(len(a)):
if i >= len(a) - 1:
break
close = (torch.abs(a[i, 0] - a[i + 1:, 0]) < radius) & (torch.abs(a[i, 1] - a[i + 1:, 1]) < radius)
close = close.nonzero()
if len(close) > 0:
close = close + i + 1
iou = bbox_iou(a[i:i + 1, :4], a[close.squeeze(), :4].reshape(-1, 4), x1y1x2y2=False)
bad = close[iou > nms_thres]
if len(bad) > 0:
mask = torch.ones(len(a)).type(torch.ByteTensor)
mask[bad] = 0
a = a[mask]
pred = a
# Experiment: Prior class size rejection # Experiment: Prior class size rejection
# x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3] # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
# a = w * h # area # a = w * h # area
@ -355,6 +351,7 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
# shape_likelihood[:, c] = # shape_likelihood[:, c] =
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2]) # multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
# Filter out confidence scores below threshold
class_prob, class_pred = torch.max(F.softmax(pred[:, 5:], 1), 1) class_prob, class_pred = torch.max(F.softmax(pred[:, 5:], 1), 1)
v = pred[:, 4] > conf_thres v = pred[:, 4] > conf_thres
v = v.nonzero().squeeze() v = v.nonzero().squeeze()
@ -376,9 +373,7 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
# Detections ordered as (x1, y1, x2, y2, obj_conf, class_prob, class_pred) # Detections ordered as (x1, y1, x2, y2, obj_conf, class_prob, class_pred)
detections = torch.cat((pred[:, :5], class_prob.float().unsqueeze(1), class_pred.float().unsqueeze(1)), 1) detections = torch.cat((pred[:, :5], class_prob.float().unsqueeze(1), class_pred.float().unsqueeze(1)), 1)
# Iterate through all predicted classes # Iterate through all predicted classes
unique_labels = detections[:, -1].cpu().unique() unique_labels = detections[:, -1].cpu().unique().to(prediction.device)
if prediction.is_cuda:
unique_labels = unique_labels.cuda(prediction.device)
nms_style = 'OR' # 'OR' (default), 'AND', 'MERGE' (experimental) nms_style = 'OR' # 'OR' (default), 'AND', 'MERGE' (experimental)
for c in unique_labels: for c in unique_labels:
@ -393,15 +388,15 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
ind = list(range(len(dc))) ind = list(range(len(dc)))
if nms_style == 'OR': # default if nms_style == 'OR': # default
while len(ind): while len(ind):
di = dc[ind[0]:ind[0] + 1] j = ind[0]
det_max.append(di) # save highest conf detection det_max.append(dc[j:j + 1]) # save highest conf detection
reject = bbox_iou(di, dc[ind]) > nms_thres reject = bbox_iou(dc[j], dc[ind]) > nms_thres
[ind.pop(i) for i in reversed(reject.nonzero())] [ind.pop(i) for i in reversed(reject.nonzero())]
# while dc.shape[0]: # SLOWER # while dc.shape[0]: # SLOWER METHOD
# det_max.append(dc[:1]) # save highest conf detection # det_max.append(dc[:1]) # save highest conf detection
# if len(dc) == 1: # Stop if we're at the last detection # if len(dc) == 1: # Stop if we're at the last detection
# break # break
# iou = bbox_iou(dc[:1], dc[1:]) # iou with other boxes # iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
# dc = dc[1:][iou < nms_thres] # remove ious > threshold # dc = dc[1:][iou < nms_thres] # remove ious > threshold
# Image Total P R mAP # Image Total P R mAP
@ -409,14 +404,14 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
elif nms_style == 'AND': # requires overlap, single boxes erased elif nms_style == 'AND': # requires overlap, single boxes erased
while len(dc) > 1: while len(dc) > 1:
iou = bbox_iou(dc[:1], dc[1:]) # iou with other boxes iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
if iou.max() > 0.5: if iou.max() > 0.5:
det_max.append(dc[:1]) det_max.append(dc[:1])
dc = dc[1:][iou < nms_thres] # remove ious > threshold dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'MERGE': # weighted mixture box elif nms_style == 'MERGE': # weighted mixture box
while len(dc) > 0: while len(dc) > 0:
iou = bbox_iou(dc[:1], dc[0:]) # iou with other boxes iou = bbox_iou(dc[0], dc[0:]) # iou with other boxes
i = iou > nms_thres i = iou > nms_thres
weights = dc[i, 4:5] * dc[i, 5:6] weights = dc[i, 4:5] * dc[i, 5:6]
@ -435,6 +430,11 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
return output return output
def get_yolo_layers(model):
bool_vec = [x['type'] == 'yolo' for x in model.module_defs]
return [i for i, x in enumerate(bool_vec) if x] # [82, 94, 106] for yolov3
def return_torch_unique_index(u, uv): def return_torch_unique_index(u, uv):
n = uv.shape[1] # number of columns n = uv.shape[1] # number of columns
first_unique = torch.zeros(n, device=u.device).long() first_unique = torch.zeros(n, device=u.device).long()
@ -446,15 +446,13 @@ def return_torch_unique_index(u, uv):
def strip_optimizer_from_checkpoint(filename='weights/best.pt'): def strip_optimizer_from_checkpoint(filename='weights/best.pt'):
# Strip optimizer from *.pt files for lighter files (reduced by 2/3 size) # Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
a = torch.load(filename, map_location='cpu') a = torch.load(filename, map_location='cpu')
a['optimizer'] = [] a['optimizer'] = []
torch.save(a, filename.replace('.pt', '_lite.pt')) torch.save(a, filename.replace('.pt', '_lite.pt'))
def coco_class_count(path='../coco/labels/train2014/'): def coco_class_count(path='../coco/labels/train2014/'):
# histogram of occurrences per class # Histogram of occurrences per class
nC = 80 # number classes nC = 80 # number classes
x = np.zeros(nC, dtype='int32') x = np.zeros(nC, dtype='int32')
files = sorted(glob.glob('%s/*.*' % path)) files = sorted(glob.glob('%s/*.*' % path))
@ -465,8 +463,7 @@ def coco_class_count(path='../coco/labels/train2014/'):
def coco_only_people(path='../coco/labels/val2014/'): def coco_only_people(path='../coco/labels/val2014/'):
# find images with only people # Find images with only people
files = sorted(glob.glob('%s/*.*' % path)) files = sorted(glob.glob('%s/*.*' % path))
for i, file in enumerate(files): for i, file in enumerate(files):
labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5) labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
@ -474,19 +471,20 @@ def coco_only_people(path='../coco/labels/val2014/'):
print(labels.shape[0], file) print(labels.shape[0], file)
def plot_results(): def plot_results(start=0):
# Plot YOLO training results file 'results.txt' # Plot YOLO training results file 'results.txt'
# import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v1.txt') # import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v3.txt')
# from utils.utils import *; plot_results()
plt.figure(figsize=(14, 7)) plt.figure(figsize=(14, 7))
s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'mAP', 'Recall', 'Precision'] s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'Precision', 'Recall', 'mAP']
files = sorted(glob.glob('results*.txt')) files = sorted(glob.glob('results*.txt'))
for f in files: for f in files:
results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11]).T # column 11 is mAP results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11]).T # column 11 is mAP
x = range(1, results.shape[1]) x = range(1, results.shape[1])
for i in range(8): for i in range(8):
plt.subplot(2, 4, i + 1) plt.subplot(2, 4, i + 1)
plt.plot(x, results[i, x], marker='.', label=f) plt.plot(results[i, x[start:]], marker='.', label=f)
plt.title(s[i]) plt.title(s[i])
if i == 0: if i == 0:
plt.legend() plt.legend()