This commit is contained in:
Glenn Jocher 2019-02-10 22:01:53 +01:00
parent 62761cffe6
commit 22dc8c0ea6
3 changed files with 16 additions and 102 deletions

View File

@ -141,7 +141,7 @@ class YOLOLayer(nn.Module):
self.grid_xy =, self.grid_y), 2)
self.anchor_wh =, self.anchor_h), 2) / nG
def forward(self, p, targets=None, batch_report=False, var=None):
def forward(self, p, targets=None, var=None):
FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor
bs = 1 if ONNX_EXPORT else p.shape[0] # batch size
nG = self.nG # number of grid points
@ -178,18 +178,7 @@ class YOLOLayer(nn.Module):
# width = (( * 2) ** 2) * self.anchor_w
# height = (( * 2) ** 2) * self.anchor_h
p_boxes = None
if batch_report:
# Predicted boxes: add offset and scale with anchors (in grid space, i.e. 0-13)
gx = + self.grid_x[:, :, :nG, :nG]
gy = + self.grid_y[:, :, :nG, :nG]
p_boxes = torch.stack((gx - width / 2,
gy - height / 2,
gx + width / 2,
gy + height / 2), 4) # x1y1x2y2
tx, ty, tw, th, mask, tcls, TP, FP, FN, TC = \
build_targets(p_boxes, p_conf, p_cls, targets, self.anchor_wh, self.nA, self.nC, nG, batch_report)
tx, ty, tw, th, mask, tcls = build_targets(targets, self.anchor_wh, self.nA, self.nC, nG)
tcls = tcls[mask]
if x.is_cuda:
@ -214,26 +203,9 @@ class YOLOLayer(nn.Module):
lconf = (k * 64) * BCEWithLogitsLoss(p_conf, mask.float())
# Sum loss components
balance_losses_flag = False
if balance_losses_flag:
k = 1 / self.loss_means.clone()
loss = (lx * k[0] + ly * k[1] + lw * k[2] + lh * k[3] + lconf * k[4] + lcls * k[5]) / k.mean()
loss = lx + ly + lw + lh + lconf + lcls
self.loss_means = self.loss_means * 0.99 + \
FT([,,,,,]) * 0.01
loss = lx + ly + lw + lh + lconf + lcls
# Sum False Positives from unassigned anchors
FPe = torch.zeros(self.nC)
if batch_report:
i = torch.sigmoid(p_conf[~mask]) > 0.5
if i.sum() > 0:
FP_classes = torch.argmax(p_cls[~mask][i], 1)
FPe = torch.bincount(FP_classes, minlength=self.nC).float().cpu() # extra FPs
return loss, loss.item(), lx.item(), ly.item(), lw.item(), lh.item(), lconf.item(), lcls.item(), \
nT, TP, FP, FPe, FN, TC
return loss, loss.item(), lx.item(), ly.item(), lw.item(), lh.item(), lconf.item(), lcls.item(), nT
@ -273,9 +245,9 @@ class Darknet(nn.Module):
self.module_defs[0]['height'] = img_size
self.hyperparams, self.module_list = create_modules(self.module_defs)
self.img_size = img_size
self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT', 'TP', 'FP', 'FPe', 'FN', 'TC']
self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT']
def forward(self, x, targets=None, batch_report=False, var=0):
def forward(self, x, targets=None, var=0):
self.losses = defaultdict(float)
is_training = targets is not None
layer_outputs = []
@ -296,7 +268,7 @@ class Darknet(nn.Module):
elif module_def['type'] == 'yolo':
# Train phase: get loss
if is_training:
x, *losses = module[0](x, targets, batch_report, var)
x, *losses = module[0](x, targets, var)
for name, loss in zip(self.loss_names, losses):
self.losses[name] += loss
# Test phase: Get detections
@ -306,29 +278,7 @@ class Darknet(nn.Module):
if is_training:
if batch_report:
self.losses['TC'] /= 3 # target category
metrics = torch.zeros(3, len(self.losses['FPe'])) # TP, FP, FN
ui = np.unique(self.losses['TC'])[1:]
for i in ui:
j = self.losses['TC'] == float(i)
metrics[0, i] = (self.losses['TP'][j] > 0).sum().float() # TP
metrics[1, i] = (self.losses['FP'][j] > 0).sum().float() # FP
metrics[2, i] = (self.losses['FN'][j] == 3).sum().float() # FN
metrics[1] += self.losses['FPe']
self.losses['TP'] = metrics[0].sum()
self.losses['FP'] = metrics[1].sum()
self.losses['FN'] = metrics[2].sum()
self.losses['metrics'] = metrics
self.losses['TP'] = 0
self.losses['FP'] = 0
self.losses['FN'] = 0
self.losses['nT'] /= 3
self.losses['TC'] = 0
output =, 1) # merge the 3 layers 85 x (507, 2028, 8112) to 85 x 10647

View File

@ -20,7 +20,6 @@ def train(
@ -30,7 +29,7 @@ def train(
if multi_scale: # pass maximum multi_scale size
img_size = 608
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.benchmark = True # unsuitable for multiscale
latest = os.path.join(weights, '')
best = os.path.join(weights, '')
@ -93,12 +92,11 @@ def train(
t0 = time.time()
mean_recall, mean_precision = 0, 0
for epoch in range(epochs):
epoch += start_epoch
print(('%8s%12s' + '%10s' * 14) % ('Epoch', 'Batch', 'x', 'y', 'w', 'h', 'conf', 'cls', 'total', 'P', 'R',
'nTargets', 'TP', 'FP', 'FN', 'time'))
print(('%8s%12s' + '%10s' * 9) % (
'Epoch', 'Batch', 'x', 'y', 'w', 'h', 'conf', 'cls', 'total', 'nTargets', 'time'))
# Update scheduler (automatic)
# scheduler.step()
@ -124,7 +122,6 @@ def train(
ui = -1
rloss = defaultdict(float) # running loss
metrics = torch.zeros(3, num_classes)
for i, (imgs, targets) in enumerate(dataloader):
if sum([len(x) for x in targets]) < 1: # if no targets continue
@ -137,7 +134,7 @@ def train(
g['lr'] = lr
# Compute loss, compute gradient, update parameters
loss = model(, targets, batch_report=report, var=var)
loss = model(, targets, var=var)
# accumulate gradient for x batches before optimizing
@ -150,27 +147,10 @@ def train(
for key, val in model.losses.items():
rloss[key] = (rloss[key] * ui + val) / (ui + 1)
if report:
TP, FP, FN = metrics
metrics += model.losses['metrics']
# Precision
precision = TP / (TP + FP)
k = (TP + FP) > 0
if k.sum() > 0:
mean_precision = precision[k].mean()
# Recall
recall = TP / (TP + FN)
k = (TP + FN) > 0
if k.sum() > 0:
mean_recall = recall[k].mean()
s = ('%8s%12s' + '%10.3g' * 14) % (
s = ('%8s%12s' + '%10.3g' * 9) % (
'%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, len(dataloader) - 1), rloss['x'],
rloss['y'], rloss['w'], rloss['h'], rloss['conf'], rloss['cls'],
rloss['loss'], mean_precision, mean_recall, model.losses['nT'], model.losses['TP'],
model.losses['FP'], model.losses['FN'], time.time() - t0)
rloss['loss'], model.losses['nT'], time.time() - t0)
t0 = time.time()
@ -214,9 +194,8 @@ if __name__ == '__main__':
parser.add_argument('--img-size', type=int, default=32 * 13, help='pixels')
parser.add_argument('--weights', type=str, default='weights', help='path to store weights')
parser.add_argument('--resume', action='store_true', help='resume training flag')
parser.add_argument('--report', action='store_true', help='report TP, FP, FN, P and R per batch (slower)')
parser.add_argument('--freeze', action='store_true', help='freeze darknet53.conv.74 layers for first epoch')
parser.add_argument('--var', type=float, default=0, help='optional test variable')
parser.add_argument('--var', type=float, default=0, help='test variable')
opt = parser.parse_args()
print(opt, end='\n\n')
@ -231,7 +210,6 @@ if __name__ == '__main__':

View File

@ -214,7 +214,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True):
return inter_area / (b1_area + b2_area - inter_area + 1e-16)
def build_targets(pred_boxes, pred_conf, pred_cls, target, anchor_wh, nA, nC, nG, batch_report):
def build_targets(target, anchor_wh, nA, nC, nG):
returns nT, nCorrect, tx, ty, tw, th, tconf, tcls
@ -226,9 +226,6 @@ def build_targets(pred_boxes, pred_conf, pred_cls, target, anchor_wh, nA, nC, nG
th = torch.zeros(nB, nA, nG, nG)
tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0) # nC = number of classes
TP = torch.ByteTensor(nB, max(nT)).fill_(0)
FP = torch.ByteTensor(nB, max(nT)).fill_(0)
FN = torch.ByteTensor(nB, max(nT)).fill_(0)
TC = torch.ShortTensor(nB, max(nT)).fill_(-1) # target category
for b in range(nB):
@ -293,18 +290,7 @@ def build_targets(pred_boxes, pred_conf, pred_cls, target, anchor_wh, nA, nC, nG
tcls[b, a, gj, gi, tc] = 1
tconf[b, a, gj, gi] = 1
if batch_report:
# predicted classes and confidence
tb = - gw / 2, gy - gh / 2, gx + gw / 2, gy + gh / 2)).view(4, -1).t() # target boxes
pcls = torch.argmax(pred_cls[b, a, gj, gi], 1).cpu()
pconf = torch.sigmoid(pred_conf[b, a, gj, gi]).cpu()
iou_pred = bbox_iou(tb, pred_boxes[b, a, gj, gi].cpu())
TP[b, i] = (pconf > 0.5) & (iou_pred > 0.5) & (pcls == tc)
FP[b, i] = (pconf > 0.5) & (TP[b, i] == 0) # coordinates or class are wrong
FN[b, i] = pconf <= 0.5 # confidence score is too low (set to zero)
return tx, ty, tw, th, tconf, tcls, TP, FP, FN, TC
return tx, ty, tw, th, tconf, tcls
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):