diff --git a/models.py b/models.py index c72c10a4..a1988d52 100755 --- a/models.py +++ b/models.py @@ -1,7 +1,4 @@ import os -from collections import defaultdict - -import torch.nn as nn from utils.parse_config import * from utils.utils import * @@ -104,106 +101,63 @@ class YOLOLayer(nn.Module): def __init__(self, anchors, nC, img_size, yolo_layer, cfg): super(YOLOLayer, self).__init__() - nA = len(anchors) self.anchors = torch.FloatTensor(anchors) - self.nA = nA # number of anchors (3) + self.nA = len(anchors) # number of anchors (3) self.nC = nC # number of classes (80) self.img_size = 0 - # self.coco_class_weights = coco_class_weights() - if ONNX_EXPORT: # grids must be computed in __init__ - stride = [32, 16, 8][yolo_layer] # stride of this layer - if cfg.endswith('yolov3-tiny.cfg'): - stride *= 2 + # if ONNX_EXPORT: # grids must be computed in __init__ + stride = [32, 16, 8][yolo_layer] # stride of this layer + if cfg.endswith('yolov3-tiny.cfg'): + stride *= 2 - self.nG = int(img_size / stride) # number grid points - create_grids(self, img_size, self.nG) + nG = int(img_size / stride) # number grid points - def forward(self, p, img_size, targets=None, var=None): + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + create_grids(self, img_size, nG, device) + + def forward(self, p, img_size, var=None): if ONNX_EXPORT: bs, nG = 1, self.nG # batch size, grid size else: bs, nG = p.shape[0], p.shape[-1] if self.img_size != img_size: - create_grids(self, img_size, nG) + create_grids(self, img_size, nG, p.device) - if p.is_cuda: - self.grid_xy = self.grid_xy.cuda() - self.anchor_wh = self.anchor_wh.cuda() - - # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 80) # (bs, anchors, grid, grid, classes + xywh) + # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # prediction - # xy, width and height - xy = torch.sigmoid(p[..., 0:2]) - wh = p[..., 2:4] # wh (yolo method) - # wh = torch.sigmoid(p[..., 2:4]) # wh (power method) + if self.training: + return p - # Training - if targets is not None: - MSELoss = nn.MSELoss() - BCEWithLogitsLoss = nn.BCEWithLogitsLoss() - CrossEntropyLoss = nn.CrossEntropyLoss() + elif ONNX_EXPORT: + grid_xy = self.grid_xy.repeat((1, self.nA, 1, 1, 1)).view((1, -1, 2)) + anchor_wh = self.anchor_wh.repeat((1, 1, nG, nG, 1)).view((1, -1, 2)) / nG - # Get outputs - p_conf = p[..., 4] # Conf - p_cls = p[..., 5:] # Class + # p = p.view(-1, 5 + self.nC) + # xy = xy + self.grid_xy[0] # x, y + # wh = torch.exp(wh) * self.anchor_wh[0] # width, height + # p_conf = torch.sigmoid(p[:, 4:5]) # Conf + # p_cls = F.softmax(p[:, 5:], 1) * p_conf # SSD-like conf + # return torch.cat((xy / nG, wh, p_conf, p_cls), 1).t() - txy, twh, mask, tcls = build_targets(targets, self.anchor_vec, self.nA, self.nC, nG) + p = p.view(1, -1, 5 + self.nC) + xy = torch.sigmoid(p[..., 0:2]) + grid_xy # x, y + wh = torch.exp(p[..., 2:4]) * anchor_wh # width, height + p_conf = torch.sigmoid(p[..., 4:5]) # Conf + p_cls = p[..., 5:] + # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py + # p_cls = F.softmax(p_cls, 2) * p_conf # SSD-like conf + p_cls = torch.exp(p_cls).permute((2, 1, 0)) + p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0)) # F.softmax() equivalent + p_cls = p_cls.permute(2, 1, 0) + return torch.cat((xy / nG, wh, p_conf, p_cls), 2).squeeze().t() - tcls = tcls[mask] - if p.is_cuda: - txy, twh, mask, tcls = txy.cuda(), twh.cuda(), mask.cuda(), tcls.cuda() - - # Compute losses - nT = sum([len(x) for x in targets]) # number of targets - nM = mask.sum().float() # number of anchors (assigned to targets) - k = 1 # nM / bs - if nM > 0: - lxy = k * MSELoss(xy[mask], txy[mask]) - lwh = k * MSELoss(wh[mask], twh[mask]) - - lcls = (k / 4) * CrossEntropyLoss(p_cls[mask], torch.argmax(tcls, 1)) - # lcls = (k * 10) * BCEWithLogitsLoss(p_cls[mask], tcls.float()) - else: - FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor - lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]) - - lconf = (k * 64) * BCEWithLogitsLoss(p_conf, mask.float()) - - # Sum loss components - loss = lxy + lwh + lconf + lcls - - return loss, loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item(), nT - - else: - if ONNX_EXPORT: - grid_xy = self.grid_xy.repeat((1, self.nA, 1, 1, 1)).view((1, -1, 2)) - anchor_wh = self.anchor_wh.repeat((1, 1, nG, nG, 1)).view((1, -1, 2)) / nG - - # p = p.view(-1, 85) - # xy = xy + self.grid_xy[0] # x, y - # wh = torch.exp(wh) * self.anchor_wh[0] # width, height - # p_conf = torch.sigmoid(p[:, 4:5]) # Conf - # p_cls = F.softmax(p[:, 5:85], 1) * p_conf # SSD-like conf - # return torch.cat((xy / nG, wh, p_conf, p_cls), 1).t() - - p = p.view(1, -1, 5 + self.nC) - xy = xy.view(bs, self.nA * nG * nG, 2) + grid_xy # x, y - wh = torch.exp(p[..., 2:4]) * anchor_wh # width, height - p_conf = torch.sigmoid(p[..., 4:5]) # Conf - p_cls = p[..., 5:5 + self.nC] - # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py - # p_cls = F.softmax(p_cls, 2) * p_conf # SSD-like conf - p_cls = torch.exp(p_cls).permute((2, 1, 0)) - p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0)) # F.softmax() equivalent - p_cls = p_cls.permute(2, 1, 0) - return torch.cat((xy / nG, wh, p_conf, p_cls), 2).squeeze().t() - - p[..., 0:2] = xy + self.grid_xy # xy - p[..., 2:4] = torch.exp(wh) * self.anchor_wh # wh yolo method - # p[..., 2:4] = ((wh * 2) ** 2) * self.anchor_wh # wh power method + else: # inference + p[..., 0:2] = torch.sigmoid(p[..., 0:2]) + self.grid_xy # xy + p[..., 2:4] = torch.exp(p[..., 2:4]) * self.anchor_wh # wh yolo method + # p[..., 2:4] = ((torch.sigmoid(p[..., 2:4]) * 2) ** 2) * self.anchor_wh # wh power method p[..., 4] = torch.sigmoid(p[..., 4]) # p_conf p[..., :4] *= self.stride @@ -225,9 +179,7 @@ class Darknet(nn.Module): self.loss_names = ['loss', 'xy', 'wh', 'conf', 'cls', 'nT'] self.losses = [] - def forward(self, x, targets=None, var=0): - self.losses = defaultdict(float) - is_training = targets is not None + def forward(self, x, var=None): img_size = x.shape[-1] layer_outputs = [] output = [] @@ -246,23 +198,15 @@ class Darknet(nn.Module): layer_i = int(module_def['from']) x = layer_outputs[-1] + layer_outputs[layer_i] elif mtype == 'yolo': - if is_training: # get loss - x, *losses = module[0](x, img_size, targets, var) - for name, loss in zip(self.loss_names, losses): - self.losses[name] += loss - else: # get detections - x = module[0](x, img_size) + x = module[0](x, img_size) output.append(x) layer_outputs.append(x) - if is_training: - self.losses['nT'] /= 3 - if ONNX_EXPORT: output = torch.cat(output, 1) # merge the 3 layers 85 x (507, 2028, 8112) to 85 x 10647 return output[5:].t(), output[:4].t() # ONNX scores, boxes - - return sum(output) if is_training else torch.cat(output, 1) + else: + return output if self.training else torch.cat(output, 1) def get_yolo_layers(model): @@ -270,17 +214,18 @@ def get_yolo_layers(model): return [i for i, x in enumerate(a) if x] # [82, 94, 106] for yolov3 -def create_grids(self, img_size, nG): +def create_grids(self, img_size, nG, device): self.stride = img_size / nG # build xy offsets grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float() grid_y = grid_x.permute(0, 1, 3, 2) - self.grid_xy = torch.stack((grid_x, grid_y), 4) + self.grid_xy = torch.stack((grid_x, grid_y), 4).to(device) # build wh gains - self.anchor_vec = self.anchors / self.stride - self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2) + self.anchor_vec = self.anchors.to(device) / self.stride + self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2).to(device) + self.nG = torch.FloatTensor([nG]).to(device) def load_darknet_weights(self, weights, cutoff=-1): diff --git a/test.py b/test.py index a334efcc..031baa05 100644 --- a/test.py +++ b/test.py @@ -17,7 +17,8 @@ def test( iou_thres=0.5, conf_thres=0.3, nms_thres=0.45, - save_json=False + save_json=False, + model=None ): device = torch_utils.select_device() @@ -26,14 +27,15 @@ def test( nC = int(data_cfg_dict['classes']) # number of classes (80 for COCO) test_path = data_cfg_dict['valid'] - # Initialize model - model = Darknet(cfg, img_size) + if model is None: + # Initialize model + model = Darknet(cfg, img_size) - # Load weights - if weights.endswith('.pt'): # pytorch format - model.load_state_dict(torch.load(weights, map_location='cpu')['model']) - else: # darknet format - load_darknet_weights(model, weights) + # Load weights + if weights.endswith('.pt'): # pytorch format + model.load_state_dict(torch.load(weights, map_location='cpu')['model']) + else: # darknet format + load_darknet_weights(model, weights) model.to(device).eval() @@ -43,32 +45,31 @@ def test( mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0 print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) - outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \ - [], [], [], [], [], [], [], [], [] + mP, mR, mAPs, TP, jdict = [], [], [], [], [] AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC) coco91class = coco80_to_coco91_class() - for batch_i, (imgs, targets, paths, shapes) in enumerate(dataloader): + for (imgs, targets, paths, shapes) in dataloader: t = time.time() output = model(imgs.to(device)) output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres) # Compute average precision for each sample - for si, (labels, detections) in enumerate(zip(targets, output)): + for si, detections in enumerate(output): + labels = targets[targets[:, 0] == si, 1:] seen += 1 if detections is None: # If there are labels but no detections mark as zero AP - if labels.size(0) != 0: - mAPs.append(0), mR.append(0), mP.append(0) + if len(labels) != 0: + mP.append(0), mR.append(0), mAPs.append(0) continue # Get detections sorted by decreasing confidence scores - detections = detections.cpu().numpy() - detections = detections[np.argsort(-detections[:, 4])] + detections = detections[(-detections[:, 4]).argsort()] if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... - box = torch.from_numpy(detections[:, :4]).clone() # xyxy + box = detections[:, :4].clone() # xyxy scale_coords(img_size, box, shapes[si]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner @@ -84,28 +85,24 @@ def test( # If no labels add number of detections as incorrect correct = [] - if labels.size(0) == 0: + if len(labels) == 0: # correct.extend([0 for _ in range(len(detections))]) - mAPs.append(0), mR.append(0), mP.append(0) + mP.append(0), mR.append(0), mAPs.append(0) continue else: + # Extract target boxes as (x1, y1, x2, y2) + target_box = xywh2xyxy(labels[:, 1:5]) * img_size target_cls = labels[:, 0] - # Extract target boxes as (x1, y1, x2, y2) - target_boxes = xywh2xyxy(labels[:, 1:5]) * img_size - detected = [] - for *pred_bbox, conf, obj_conf, obj_pred in detections: + for *pred_box, conf, cls_conf, cls_pred in detections: + # Best iou, index between pred and targets + iou, bi = bbox_iou(pred_box, target_box).max(0) - pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1) - # Compute iou with target boxes - iou = bbox_iou(pred_bbox, target_boxes) - # Extract index of largest overlap - best_i = np.argmax(iou) - # If overlap exceeds threshold and classification is correct mark as correct - if iou[best_i] > iou_thres and obj_pred == labels[best_i, 0] and best_i not in detected: + # If iou > threshold and class is correct mark as correct + if iou > iou_thres and cls_pred == target_cls[bi] and bi not in detected: correct.append(1) - detected.append(best_i) + detected.append(bi) else: correct.append(0) @@ -120,24 +117,24 @@ def test( AP_accum += np.bincount(AP_class, minlength=nC, weights=AP) # Compute mean AP across all classes in this image, and append to image list - mAPs.append(AP.mean()) - mR.append(R.mean()) mP.append(P.mean()) + mR.append(R.mean()) + mAPs.append(AP.mean()) # Means of all images - mean_mAP = np.mean(mAPs) - mean_R = np.mean(mR) mean_P = np.mean(mP) + mean_R = np.mean(mR) + mean_mAP = np.mean(mAPs) # Print image mAP and running mean mAP print(('%11s%11s' + '%11.3g' * 4 + 's') % (seen, dataloader.nF, mean_P, mean_R, mean_mAP, time.time() - t)) # Print mAP per class - print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP') + '\n\nmAP Per Class:') - + print('\nmAP Per Class:') for i, c in enumerate(load_classes(data_cfg_dict['names'])): - print('%15s: %-.4f' % (c, AP_accum[i] / (AP_accum_count[i] + 1E-16))) + if AP_accum_count[i]: + print('%15s: %-.4f' % (c, AP_accum[i] / (AP_accum_count[i]))) # Save JSON if save_json: @@ -159,7 +156,7 @@ def test( cocoEval.summarize() # Return mAP - return mean_mAP, mean_R, mean_P + return mean_P, mean_R, mean_mAP if __name__ == '__main__': @@ -186,8 +183,7 @@ if __name__ == '__main__': opt.iou_thres, opt.conf_thres, opt.nms_thres, - opt.save_json - ) + opt.save_json) # Image Total P R mAP # YOLOv3 320 # 32 5000 0.66 0.597 0.591 diff --git a/train.py b/train.py index 6b7b6181..c8ea97c3 100644 --- a/train.py +++ b/train.py @@ -17,7 +17,6 @@ def train( accumulated_batches=1, multi_scale=False, freeze_backbone=False, - var=0, ): weights = 'weights' + os.sep latest = weights + 'latest.pt' @@ -48,10 +47,6 @@ def train( # Load weights to resume from model.load_state_dict(checkpoint['model']) - # if torch.cuda.device_count() > 1: - # model = nn.DataParallel(model) - model.to(device).train() - # Transfer learning (train only YOLO layers) # for i, (name, p) in enumerate(model.named_parameters()): # p.requires_grad = True if (p.shape[0] == 255) else False @@ -75,13 +70,13 @@ def train( load_darknet_weights(model, weights + 'yolov3-tiny.conv.15') cutoff = 15 - # if torch.cuda.device_count() > 1: - # model = nn.DataParallel(model) - model.to(device).train() - # Set optimizer optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9) + if torch.cuda.device_count() > 1: + model = nn.DataParallel(model) + model.to(device).train() + # Set scheduler # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[54, 61], gamma=0.1) @@ -90,16 +85,17 @@ def train( model_info(model) n_burnin = min(round(dataloader.nB / 5 + 1), 1000) # number of burn-in batches for epoch in range(epochs): + model.train() epoch += start_epoch - print(('%8s%12s' + '%10s' * 7) % ( + print(('\n%8s%12s' + '%10s' * 7) % ( 'Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'nTargets', 'time')) # Update scheduler (automatic) # scheduler.step() # Update scheduler (manual) at 0, 54, 61 epochs to 1e-3, 1e-4, 1e-5 - if epoch > 50: + if epoch > 250: lr = lr0 / 10 else: lr = lr0 @@ -113,10 +109,12 @@ def train( p.requires_grad = False if (epoch == 0) else True ui = -1 - rloss = defaultdict(float) # running loss optimizer.zero_grad() + rloss = defaultdict(float) for i, (imgs, targets, _, _) in enumerate(dataloader): - if sum([len(x) for x in targets]) < 1: # if no targets continue + targets = targets.to(device) + nT = targets.shape[0] + if nT == 0: # if no targets continue continue # SGD burn-in @@ -125,8 +123,14 @@ def train( for g in optimizer.param_groups: g['lr'] = lr + # Run model + pred = model(imgs.to(device)) + + # Build targets + target_list = build_targets(model, targets, pred) + # Compute loss - loss = model(imgs.to(device), targets, var=var) + loss, loss_dict = compute_loss(pred, target_list) # Compute gradient loss.backward() @@ -138,49 +142,51 @@ def train( # Running epoch-means of tracked metrics ui += 1 - for key, val in model.losses.items(): + for key, val in loss_dict.items(): rloss[key] = (rloss[key] * ui + val) / (ui + 1) s = ('%8s%12s' + '%10.3g' * 7) % ( '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, len(dataloader) - 1), rloss['xy'], rloss['wh'], rloss['conf'], - rloss['cls'], rloss['loss'], - model.losses['nT'], time.time() - t0) + rloss['cls'], rloss['total'], + nT, time.time() - t0) t0 = time.time() print(s) # Update best loss - if rloss['loss'] < best_loss: - best_loss = rloss['loss'] + if rloss['total'] < best_loss: + best_loss = rloss['total'] - # Save latest checkpoint - checkpoint = {'epoch': epoch, - 'best_loss': best_loss, - 'model': model.state_dict(), - 'optimizer': optimizer.state_dict()} - torch.save(checkpoint, latest) + save = True # save training results + if save: + # Save latest checkpoint + checkpoint = {'epoch': epoch, + 'best_loss': best_loss, + 'model': model.module.state_dict() if type(model) is nn.DataParallel else model.state_dict(), + 'optimizer': optimizer.state_dict()} + torch.save(checkpoint, latest) - # Save best checkpoint - if best_loss == rloss['loss']: - os.system('cp ' + latest + ' ' + best) + # Save best checkpoint + if best_loss == rloss['total']: + os.system('cp ' + latest + ' ' + best) - # Save backup weights every 5 epochs (optional) - # if (epoch > 0) & (epoch % 5 == 0): - # os.system('cp ' + latest + ' ' + weights + 'backup{}.pt'.format(epoch))) + # Save backup weights every 5 epochs (optional) + if (epoch > 0) & (epoch % 5 == 0): + os.system('cp ' + latest + ' ' + weights + 'backup{}.pt'.format(epoch)) # Calculate mAP with torch.no_grad(): - mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size) + P, R, mAP = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, model=model) # Write epoch results with open('results.txt', 'a') as file: - file.write(s + '%11.3g' * 3 % (mAP, P, R) + '\n') + file.write(s + '%11.3g' * 3 % (P, R, mAP) + '\n') if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--epochs', type=int, default=100, help='number of epochs') + parser.add_argument('--epochs', type=int, default=270, help='number of epochs') parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch') parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step') parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path') @@ -188,7 +194,6 @@ if __name__ == '__main__': parser.add_argument('--multi-scale', action='store_true', help='random image sizes per batch 320 - 608') parser.add_argument('--img-size', type=int, default=32 * 13, help='pixels') parser.add_argument('--resume', action='store_true', help='resume training flag') - parser.add_argument('--var', type=float, default=0, help='test variable') opt = parser.parse_args() print(opt, end='\n\n') @@ -203,5 +208,4 @@ if __name__ == '__main__': batch_size=opt.batch_size, accumulated_batches=opt.accumulated_batches, multi_scale=opt.multi_scale, - var=opt.var, ) diff --git a/utils/datasets.py b/utils/datasets.py index 3a0d6bd3..05d1f98d 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -206,8 +206,11 @@ class LoadImagesAndLabels: # for training if nL > 0: labels[:, 2] = 1 - labels[:, 2] + if nL > 0: + labels = np.concatenate((np.zeros((nL, 1), dtype='float32') + index, labels), 1) + labels_all.append(labels) + img_all.append(img) - labels_all.append(torch.from_numpy(labels)) img_paths.append(img_path) img_shapes.append((h, w)) @@ -216,6 +219,7 @@ class LoadImagesAndLabels: # for training img_all = np.ascontiguousarray(img_all, dtype=np.float32) img_all /= 255.0 + labels_all = torch.from_numpy(np.concatenate(labels_all, 0)) return torch.from_numpy(img_all), labels_all, img_paths, img_shapes def __len__(self): diff --git a/utils/gcp.sh b/utils/gcp.sh index 93308d61..074579c4 100755 --- a/utils/gcp.sh +++ b/utils/gcp.sh @@ -4,6 +4,7 @@ sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3 bash yolov3/data/get_coco_dataset.sh sudo rm -rf cocoapi && git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3 +sudo shutdown # Start python3 train.py @@ -15,6 +16,14 @@ python3 train.py --resume gsutil cp gs://ultralytics/yolov3.pt yolov3/weights python3 detect.py +# Clone branch +sudo rm -rf yolov3 && git clone -b multi_gpu --depth 1 https://github.com/ultralytics/yolov3 +cd yolov3 && python3 train.py --batch-size 104 + +sudo rm -rf yolov3 && git clone -b multigpu --depth 1 https://github.com/alexpolichroniadis/yolov3 +cp coco.data yolov3/cfg +cd yolov3 && python3 train.py --batch-size 104 + # Test sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3 sudo rm -rf cocoapi && git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3 diff --git a/utils/torch_utils.py b/utils/torch_utils.py index f98da7fb..a4a26fd4 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -16,10 +16,11 @@ def select_device(force_cpu=False): device = torch.device('cuda:0' if cuda else 'cpu') if torch.cuda.device_count() > 1: + device = torch.device('cuda' if cuda else 'cpu') print('Found %g GPUs' % torch.cuda.device_count()) - print('WARNING Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21') - torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available - # # print('Using ', torch.cuda.device_count(), ' GPUs') + # print('Multi-GPU Issue: https://github.com/ultralytics/yolov3/issues/21') + # torch.cuda.set_device(0) # OPTIONAL: Set your GPU if multiple available + # print('Using ', torch.cuda.device_count(), ' GPUs') print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else '')) return device diff --git a/utils/utils.py b/utils/utils.py index 8e54f7cb..c1a1969a 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -1,10 +1,12 @@ import glob import random +from collections import defaultdict import cv2 import matplotlib.pyplot as plt import numpy as np import torch +import torch.nn as nn import torch.nn.functional as F from utils import torch_utils @@ -25,15 +27,14 @@ def init_seeds(seed=0): def load_classes(path): - """ - Loads class labels at 'path' - """ + # Loads class labels at 'path' fp = open(path, 'r') names = fp.read().split('\n') return list(filter(None, names)) # filter removes empty strings (such as last line) -def model_info(model): # Plots a line-by-line description of a PyTorch model +def model_info(model): + # Plots a line-by-line description of a PyTorch model n_p = sum(x.numel() for x in model.parameters()) # number parameters n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients print('\n%5s %50s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) @@ -41,7 +42,7 @@ def model_info(model): # Plots a line-by-line description of a PyTorch model name = name.replace('module_list.', '') print('%5g %50s %9s %12g %20s %12.3g %12.3g' % ( i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) - print('Model Summary: %g layers, %g parameters, %g gradients\n' % (i + 1, n_p, n_g)) + print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g)) def coco_class_weights(): # frequency of each class in coco train2014 @@ -66,7 +67,8 @@ def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) return x -def plot_one_box(x, img, color=None, label=None, line_thickness=None): # Plots one bounding box on image img +def plot_one_box(x, img, color=None, label=None, line_thickness=None): + # Plots one bounding box on image img tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 # line thickness color = color or [random.randint(0, 255) for _ in range(3)] c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) @@ -122,7 +124,7 @@ def scale_coords(img_size, coords, img0_shape): def ap_per_class(tp, conf, pred_cls, target_cls): """ Compute the average precision, given the recall and precision curves. - Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. # Arguments tp: True positives (list). conf: Objectness value from 0-1 (list). @@ -176,7 +178,7 @@ def ap_per_class(tp, conf, pred_cls, target_cls): def compute_ap(recall, precision): """ Compute the average precision, given the recall and precision curves. - Code originally from https://github.com/rbgirshick/py-faster-rcnn. + Source: https://github.com/rbgirshick/py-faster-rcnn. # Arguments recall: The recall curve (list). precision: The precision curve (list). @@ -203,105 +205,127 @@ def compute_ap(recall, precision): def bbox_iou(box1, box2, x1y1x2y2=True): - box1 = box1.t() + # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 box2 = box2.t() - """ - Returns the IoU of two bounding boxes - """ + + # Get the coordinates of bounding boxes if x1y1x2y2: - # Get the coordinates of bounding boxes + # x1, y1, x2, y2 = box1 b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] else: - # x1, y1, w1, h1 = box1 - # Transform from center and width to exact coordinates + # x, y, w, h = box1 b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 - # get the coordinates of the intersection rectangle - inter_rect_x1 = torch.max(b1_x1, b2_x1) - inter_rect_y1 = torch.max(b1_y1, b2_y1) - inter_rect_x2 = torch.min(b1_x2, b2_x2) - inter_rect_y2 = torch.min(b1_y2, b2_y2) # Intersection area - inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, 0) * torch.clamp(inter_rect_y2 - inter_rect_y1, 0) + inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ + (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) + # Union Area - b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) - b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \ + (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area - return inter_area / (b1_area + b2_area - inter_area + 1e-16) + return inter_area / union_area # iou -def build_targets(target, anchor_vec, nA, nC, nG): - """ - returns nT, nCorrect, tx, ty, tw, th, tconf, tcls - """ - nB = len(target) # number of images in batch +def wh_iou(box1, box2): + # Returns the IoU of wh1 to wh2. wh1 is 2, wh2 is nx2 + box2 = box2.t() - txy = torch.zeros(nB, nA, nG, nG, 2) # batch size, anchors, grid size - twh = torch.zeros(nB, nA, nG, nG, 2) - tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0) - tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0) # nC = number of classes + # w, h = box1 + w1, h1 = box1[0], box1[1] + w2, h2 = box2[0], box2[1] - for b in range(nB): - t = target[b] - nTb = len(t) # number of targets - if nTb == 0: - continue + # Intersection area + inter_area = torch.min(w1, w2) * torch.min(h1, h2) - gxy, gwh = t[:, 1:3] * nG, t[:, 3:5] * nG + # Union Area + union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area - # Get grid box indices and prevent overflows (i.e. 13.01 on 13 anchors) - gi, gj = torch.clamp(gxy.long(), min=0, max=nG - 1).t() + return inter_area / union_area # iou - # iou of targets-anchors (using wh only) - box1 = gwh - box2 = anchor_vec.unsqueeze(1) - inter_area = torch.min(box1, box2).prod(2) - iou = inter_area / (box1.prod(1) + box2.prod(2) - inter_area + 1e-16) +def compute_loss(p, targets): # predictions, targets + FT = torch.cuda.FloatTensor if p[0].is_cuda else torch.FloatTensor + loss, lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0]) + txy, twh, tcls, tconf, indices = targets + MSE = nn.MSELoss() + CE = nn.CrossEntropyLoss() + BCE = nn.BCEWithLogitsLoss() - # Select best iou_pred and anchor - iou_best, a = iou.max(0) # best anchor [0-2] for each target + # Compute losses + # gp = [x.numel() for x in tconf] # grid points + for i, pi0 in enumerate(p): # layer i predictions, i + b, a, gj, gi = indices[i] # image, anchor, gridx, gridy - # Select best unique target-anchor combinations - if nTb > 1: - iou_order = torch.argsort(-iou_best) # best to worst + # Compute losses + k = 1 # nT / bs + if len(b) > 0: + pi = pi0[b, a, gj, gi] # predictions closest to anchors + lxy += k * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy + lwh += k * MSE(pi[..., 2:4], twh[i]) # wh + lcls += (k / 4) * CE(pi[..., 5:], tcls[i]) - # Unique anchor selection - u = torch.stack((gi, gj, a), 0)[:, iou_order] - # _, first_unique = np.unique(u, axis=1, return_index=True) # first unique indices - first_unique = return_torch_unique_index(u, torch.unique(u, dim=1)) # torch alternative + # pos_weight = FT([gp[i] / min(gp) * 4.]) + # BCE = nn.BCEWithLogitsLoss(pos_weight=pos_weight) + lconf += (k * 64) * BCE(pi0[..., 4], tconf[i]) + loss = lxy + lwh + lconf + lcls - i = iou_order[first_unique] - # best anchor must share significant commonality (iou) with target - i = i[iou_best[i] > 0.10] # TODO: examine arbitrary threshold - if len(i) == 0: - continue + # Add to dictionary + d = defaultdict(float) + losses = [loss.item(), lxy.item(), lwh.item(), lconf.item(), lcls.item()] + for name, x in zip(['total', 'xy', 'wh', 'conf', 'cls'], losses): + d[name] = x - a, gj, gi, t = a[i], gj[i], gi[i], t[i] - if len(t.shape) == 1: - t = t.view(1, 5) - else: - if iou_best < 0.10: - continue + return loss, d - tc, gxy, gwh = t[:, 0].long(), t[:, 1:3] * nG, t[:, 3:5] * nG + +def build_targets(model, targets, pred): + # targets = [image, class, x, y, w, h] + if isinstance(model, nn.DataParallel): + model = model.module + yolo_layers = get_yolo_layers(model) + + # anchors = closest_anchor(model, targets) # [layer, anchor, i, j] + txy, twh, tcls, tconf, indices = [], [], [], [], [] + for i, layer in enumerate(yolo_layers): + nG = model.module_list[layer][0].nG # grid size + anchor_vec = model.module_list[layer][0].anchor_vec + + # iou of targets-anchors + gwh = targets[:, 4:6] * nG + iou = [wh_iou(x, gwh) for x in anchor_vec] + iou, a = torch.stack(iou, 0).max(0) # best iou and anchor + + # reject below threshold ious (OPTIONAL) + j = iou > 0.01 + t, a, gwh = targets[j], a[j], gwh[j] + + # Indices + b, c = t[:, 0:2].long().t() # target image, class + gxy = t[:, 2:4] * nG + gi, gj = gxy.long().t() # grid_i, grid_j + indices.append((b, a, gj, gi)) # XY coordinates - txy[b, a, gj, gi] = gxy - gxy.floor() + txy.append(gxy - gxy.floor()) # Width and height - twh[b, a, gj, gi] = torch.log(gwh / anchor_vec[a]) # yolo method - # twh[b, a, gj, gi] = torch.sqrt(gwh / anchor_vec[a]) / 2 # power method + twh.append(torch.log(gwh / anchor_vec[a])) # yolo method + # twh.append(torch.sqrt(gwh / anchor_vec[a]) / 2) # power method - # One-hot encoding of label - tcls[b, a, gj, gi, tc] = 1 - tconf[b, a, gj, gi] = 1 + # Class + tcls.append(c) - return txy, twh, tconf, tcls + # Conf + tci = torch.zeros_like(pred[i][..., 0]) + tci[b, a, gj, gi] = 1 # conf + tconf.append(tci) + + return txy, twh, tcls, tconf, indices def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): @@ -314,34 +338,6 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): output = [None for _ in range(len(prediction))] for image_i, pred in enumerate(prediction): - # Filter out confidence scores below threshold - # Get score and class with highest confidence - - # cross-class NMS (experimental) - cross_class_nms = False - if cross_class_nms: - a = pred.clone() - _, indices = torch.sort(-a[:, 4], 0) # sort best to worst - a = a[indices] - radius = 30 # area to search for cross-class ious - for i in range(len(a)): - if i >= len(a) - 1: - break - - close = (torch.abs(a[i, 0] - a[i + 1:, 0]) < radius) & (torch.abs(a[i, 1] - a[i + 1:, 1]) < radius) - close = close.nonzero() - - if len(close) > 0: - close = close + i + 1 - iou = bbox_iou(a[i:i + 1, :4], a[close.squeeze(), :4].reshape(-1, 4), x1y1x2y2=False) - bad = close[iou > nms_thres] - - if len(bad) > 0: - mask = torch.ones(len(a)).type(torch.ByteTensor) - mask[bad] = 0 - a = a[mask] - pred = a - # Experiment: Prior class size rejection # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3] # a = w * h # area @@ -355,6 +351,7 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): # shape_likelihood[:, c] = # multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2]) + # Filter out confidence scores below threshold class_prob, class_pred = torch.max(F.softmax(pred[:, 5:], 1), 1) v = pred[:, 4] > conf_thres v = v.nonzero().squeeze() @@ -376,9 +373,7 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): # Detections ordered as (x1, y1, x2, y2, obj_conf, class_prob, class_pred) detections = torch.cat((pred[:, :5], class_prob.float().unsqueeze(1), class_pred.float().unsqueeze(1)), 1) # Iterate through all predicted classes - unique_labels = detections[:, -1].cpu().unique() - if prediction.is_cuda: - unique_labels = unique_labels.cuda(prediction.device) + unique_labels = detections[:, -1].cpu().unique().to(prediction.device) nms_style = 'OR' # 'OR' (default), 'AND', 'MERGE' (experimental) for c in unique_labels: @@ -393,15 +388,15 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): ind = list(range(len(dc))) if nms_style == 'OR': # default while len(ind): - di = dc[ind[0]:ind[0] + 1] - det_max.append(di) # save highest conf detection - reject = bbox_iou(di, dc[ind]) > nms_thres + j = ind[0] + det_max.append(dc[j:j + 1]) # save highest conf detection + reject = bbox_iou(dc[j], dc[ind]) > nms_thres [ind.pop(i) for i in reversed(reject.nonzero())] - # while dc.shape[0]: # SLOWER + # while dc.shape[0]: # SLOWER METHOD # det_max.append(dc[:1]) # save highest conf detection # if len(dc) == 1: # Stop if we're at the last detection # break - # iou = bbox_iou(dc[:1], dc[1:]) # iou with other boxes + # iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes # dc = dc[1:][iou < nms_thres] # remove ious > threshold # Image Total P R mAP @@ -409,14 +404,14 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): elif nms_style == 'AND': # requires overlap, single boxes erased while len(dc) > 1: - iou = bbox_iou(dc[:1], dc[1:]) # iou with other boxes + iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes if iou.max() > 0.5: det_max.append(dc[:1]) dc = dc[1:][iou < nms_thres] # remove ious > threshold elif nms_style == 'MERGE': # weighted mixture box while len(dc) > 0: - iou = bbox_iou(dc[:1], dc[0:]) # iou with other boxes + iou = bbox_iou(dc[0], dc[0:]) # iou with other boxes i = iou > nms_thres weights = dc[i, 4:5] * dc[i, 5:6] @@ -435,6 +430,11 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): return output +def get_yolo_layers(model): + bool_vec = [x['type'] == 'yolo' for x in model.module_defs] + return [i for i, x in enumerate(bool_vec) if x] # [82, 94, 106] for yolov3 + + def return_torch_unique_index(u, uv): n = uv.shape[1] # number of columns first_unique = torch.zeros(n, device=u.device).long() @@ -446,15 +446,13 @@ def return_torch_unique_index(u, uv): def strip_optimizer_from_checkpoint(filename='weights/best.pt'): # Strip optimizer from *.pt files for lighter files (reduced by 2/3 size) - a = torch.load(filename, map_location='cpu') a['optimizer'] = [] torch.save(a, filename.replace('.pt', '_lite.pt')) def coco_class_count(path='../coco/labels/train2014/'): - # histogram of occurrences per class - + # Histogram of occurrences per class nC = 80 # number classes x = np.zeros(nC, dtype='int32') files = sorted(glob.glob('%s/*.*' % path)) @@ -465,8 +463,7 @@ def coco_class_count(path='../coco/labels/train2014/'): def coco_only_people(path='../coco/labels/val2014/'): - # find images with only people - + # Find images with only people files = sorted(glob.glob('%s/*.*' % path)) for i, file in enumerate(files): labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5) @@ -474,19 +471,20 @@ def coco_only_people(path='../coco/labels/val2014/'): print(labels.shape[0], file) -def plot_results(): +def plot_results(start=0): # Plot YOLO training results file 'results.txt' - # import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v1.txt') + # import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v3.txt') + # from utils.utils import *; plot_results() plt.figure(figsize=(14, 7)) - s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'mAP', 'Recall', 'Precision'] + s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'Precision', 'Recall', 'mAP'] files = sorted(glob.glob('results*.txt')) for f in files: results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11]).T # column 11 is mAP x = range(1, results.shape[1]) for i in range(8): plt.subplot(2, 4, i + 1) - plt.plot(x, results[i, x], marker='.', label=f) + plt.plot(results[i, x[start:]], marker='.', label=f) plt.title(s[i]) if i == 0: plt.legend()