diff --git a/README.md b/README.md index dfa50321..e323ea40 100755 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Python 3.7 or later with the following `pip3 install -U -r requirements.txt` pac # Tutorials +* [GCP Quickstart](https://github.com/ultralytics/yolov3/wiki/GCP-Quickstart) * [Transfer Learning](https://github.com/ultralytics/yolov3/wiki/Example:-Transfer-Learning) * [Train Single Image](https://github.com/ultralytics/yolov3/wiki/Example:-Train-Single-Image) * [Train Single Class](https://github.com/ultralytics/yolov3/wiki/Example:-Train-Single-Class) @@ -67,13 +68,16 @@ HS**V** Intensity | +/- 50% https://cloud.google.com/deep-learning-vm/ **Machine type:** n1-standard-8 (8 vCPUs, 30 GB memory) **CPU platform:** Intel Skylake -**GPUs:** 1-4x P100 ($0.493/hr), 1-8x V100 ($0.803/hr) +**GPUs:** K80 ($0.198/hr), P4 ($0.279/hr), T4 ($0.353/hr), P100 ($0.493/hr), V100 ($0.803/hr) **HDD:** 100 GB SSD **Dataset:** COCO train 2014 GPUs | `batch_size` | batch time | epoch time | epoch cost --- |---| --- | --- | --- | (images) | (s/batch) | | +1 K80 | 16 | 1.43s | 175min | $0.58 +1 P4 | 8 | 0.51s | 125min | $0.58 +1 T4 | 16 | 0.78s | 94min | $0.55 1 P100 | 16 | 0.39s | 48min | $0.39 2 P100 | 32 | 0.48s | 29min | $0.47 4 P100 | 64 | 0.65s | 20min | $0.65 @@ -108,13 +112,32 @@ Run `detect.py` with `webcam=True` to show a live webcam feed. - Use `test.py --weights weights/yolov3.weights` to test the official YOLOv3 weights. - Use `test.py --weights weights/latest.pt` to test the latest training results. -- Compare to official darknet results from https://arxiv.org/abs/1804.02767. +- Compare to darknet published results https://arxiv.org/abs/1804.02767. - | ultralytics/yolov3 | darknet ---- | ---| --- -YOLOv3-320 | 51.3 | 51.5 -YOLOv3-416 | 54.9 | 55.3 -YOLOv3-608 | 57.9 | 57.9 + + | [ultralytics/yolov3](https://github.com/ultralytics/yolov3) with `pycocotools` | [darknet/yolov3](https://arxiv.org/abs/1804.02767) +--- | --- | --- +YOLOv3-320 | 51.8 | 51.5 +YOLOv3-416 | 55.4 | 55.3 +YOLOv3-608 | 58.2 | 57.9 ``` bash sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3 @@ -123,34 +146,42 @@ sudo rm -rf cocoapi && git clone https://github.com/cocodataset/cocoapi && cd co cd yolov3 python3 test.py --save-json --conf-thres 0.001 --img-size 416 -Namespace(batch_size=32, cfg='cfg/yolov3.cfg', conf_thres=0.001, data_cfg='cfg/coco.data', img_size=416, iou_thres=0.5, nms_thres=0.45, save_json=True, weights='weights/yolov3.weights') - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.308 - Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.549 - Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.310 - Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.141 - Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.334 - Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.454 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.267 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.403 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.428 - Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.237 - Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.464 - Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.585 +Namespace(batch_size=32, cfg='cfg/yolov3.cfg', conf_thres=0.001, data_cfg='cfg/coco.data', img_size=416, iou_thres=0.5, nms_thres=0.5, save_json=True, weights='weights/yolov3.weights') +Using cuda _CudaDeviceProperties(name='Tesla V100-SXM2-16GB', major=7, minor=0, total_memory=16130MB, multi_processor_count=80) + Image Total P R mAP +Calculating mAP: 100%|█████████████████████████████████| 157/157 [08:34<00:00, 2.53s/it] + 5000 5000 0.0896 0.756 0.555 + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.312 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.554 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.317 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.145 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.343 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.452 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.268 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.411 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.435 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.244 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.477 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.587 python3 test.py --save-json --conf-thres 0.001 --img-size 608 --batch-size 16 -Namespace(batch_size=16, cfg='cfg/yolov3.cfg', conf_thres=0.001, data_cfg='cfg/coco.data', img_size=608, iou_thres=0.5, nms_thres=0.45, save_json=True, weights='weights/yolov3.weights') - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.328 - Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.579 - Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.335 - Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.190 - Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.357 - Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.428 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.279 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.429 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.456 - Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.299 - Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.483 - Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.572 +Namespace(batch_size=16, cfg='cfg/yolov3.cfg', conf_thres=0.001, data_cfg='cfg/coco.data', img_size=608, iou_thres=0.5, nms_thres=0.5, save_json=True, weights='weights/yolov3.weights') +Using cuda _CudaDeviceProperties(name='Tesla V100-SXM2-16GB', major=7, minor=0, total_memory=16130MB, multi_processor_count=80) + Image Total P R mAP +Calculating mAP: 100%|█████████████████████████████████| 313/313 [08:54<00:00, 1.55s/it] + 5000 5000 0.0966 0.786 0.579 + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.331 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.582 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.344 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.198 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.362 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.427 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.281 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.437 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.463 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.309 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.494 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.577 ``` # Contact diff --git a/detect.py b/detect.py index 32c41c55..36e2138e 100644 --- a/detect.py +++ b/detect.py @@ -14,7 +14,7 @@ def detect( output='output', # output folder img_size=416, conf_thres=0.3, - nms_thres=0.45, + nms_thres=0.5, save_txt=False, save_images=True, webcam=False @@ -29,9 +29,6 @@ def detect( # Load weights if weights.endswith('.pt'): # pytorch format - if weights.endswith('yolov3.pt') and not os.path.exists(weights): - if platform in ('darwin', 'linux'): # linux/macos - os.system('wget https://storage.googleapis.com/ultralytics/yolov3.pt -O ' + weights) model.load_state_dict(torch.load(weights, map_location=device)['model']) else: # darknet format _ = load_darknet_weights(model, weights) @@ -63,26 +60,22 @@ def detect( torch.onnx.export(model, img, 'weights/model.onnx', verbose=True) return pred = model(img) - pred = pred[pred[:, :, 4] > conf_thres] # remove boxes < threshold - - if len(pred) > 0: - # Run NMS on predictions - detections = non_max_suppression(pred.unsqueeze(0), conf_thres, nms_thres)[0] + detections = non_max_suppression(pred, conf_thres, nms_thres)[0] + if len(detections) > 0: # Rescale boxes from 416 to true image size scale_coords(img_size, detections[:, :4], im0.shape).round() # Print results to screen - unique_classes = detections[:, -1].cpu().unique() - for c in unique_classes: - n = (detections[:, -1].cpu() == c).sum() + for c in detections[:, -1].unique(): + n = (detections[:, -1] == c).sum() print('%g %ss' % (n, classes[int(c)]), end=', ') # Draw bounding boxes and labels of detections for *xyxy, conf, cls_conf, cls in detections: if save_txt: # Write to file with open(save_path + '.txt', 'a') as file: - file.write(('%g ' * 6 + '\n') % (*xyxy, cls, cls_conf * conf)) + file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf)) # Add bbox to the image label = '%s %.2f' % (classes[int(cls)], conf) @@ -106,8 +99,8 @@ if __name__ == '__main__': parser.add_argument('--weights', type=str, default='weights/yolov3.weights', help='path to weights file') parser.add_argument('--images', type=str, default='data/samples', help='path to images') parser.add_argument('--img-size', type=int, default=32 * 13, help='size of each image dimension') - parser.add_argument('--conf-thres', type=float, default=0.50, help='object confidence threshold') - parser.add_argument('--nms-thres', type=float, default=0.45, help='iou threshold for non-maximum suppression') + parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') + parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') opt = parser.parse_args() print(opt) diff --git a/models.py b/models.py index 67e8a83d..0e92c79f 100755 --- a/models.py +++ b/models.py @@ -1,5 +1,7 @@ import os +import torch.nn.functional as F + from utils.parse_config import * from utils.utils import * @@ -158,6 +160,8 @@ class YOLOLayer(nn.Module): p[..., 2:4] = torch.exp(p[..., 2:4]) * self.anchor_wh # wh yolo method # p[..., 2:4] = ((torch.sigmoid(p[..., 2:4]) * 2) ** 2) * self.anchor_wh # wh power method p[..., 4] = torch.sigmoid(p[..., 4]) # p_conf + p[..., 5:] = torch.sigmoid(p[..., 5:]) # p_class + # p[..., 5:] = F.softmax(p[..., 5:], dim=4) # p_class p[..., :4] *= self.stride # reshape from [1, 3, 13, 13, 85] to [1, 507, 85] diff --git a/test.py b/test.py index c1cc3d2a..171159b6 100644 --- a/test.py +++ b/test.py @@ -1,6 +1,5 @@ import argparse import json -import time from torch.utils.data import DataLoader @@ -12,18 +11,18 @@ from utils.utils import * def test( cfg, data_cfg, - weights, + weights=None, batch_size=16, img_size=416, iou_thres=0.5, - conf_thres=0.3, - nms_thres=0.45, + conf_thres=0.1, + nms_thres=0.5, save_json=False, model=None ): - device = torch_utils.select_device() - if model is None: + device = torch_utils.select_device() + # Initialize model model = Darknet(cfg, img_size).to(device) @@ -33,13 +32,16 @@ def test( else: # darknet format _ = load_darknet_weights(model, weights) - if torch.cuda.device_count() > 1: - model = nn.DataParallel(model) + if torch.cuda.device_count() > 1: + model = nn.DataParallel(model) + else: + device = next(model.parameters()).device # Configure run data_cfg = parse_data_cfg(data_cfg) - nC = int(data_cfg['classes']) # number of classes (80 for COCO) test_path = data_cfg['valid'] + if (os.sep + 'coco' + os.sep) in test_path: # COCO dataset probable + save_json = True # use pycocotools # Dataloader dataset = LoadImagesAndLabels(test_path, img_size=img_size) @@ -50,104 +52,111 @@ def test( collate_fn=dataset.collate_fn) model.eval() - mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0 + seen = 0 print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) - mP, mR, mAPs, TP, jdict = [], [], [], [], [] - AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC) + mP, mR, mAP, mAPj = 0.0, 0.0, 0.0, 0.0 + jdict, tdict, stats, AP, AP_class = [], [], [], [], [] coco91class = coco80_to_coco91_class() - for imgs, targets, paths, shapes in tqdm(dataloader): - t = time.time() + for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc='Calculating mAP')): targets = targets.to(device) imgs = imgs.to(device) output = model(imgs) output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres) - # Compute average precision for each sample - for si, detections in enumerate(output): + # Per image + for si, pred in enumerate(output): + image_id = int(Path(paths[si]).stem.split('_')[-1]) labels = targets[targets[:, 0] == si, 1:] seen += 1 - if detections is None: - # If there are labels but no detections mark as zero AP - if len(labels) != 0: - mP.append(0), mR.append(0), mAPs.append(0) + if pred is None: continue - # Get detections sorted by decreasing confidence scores - detections = detections[(-detections[:, 4]).argsort()] - if save_json: + # add to json pred dictionary # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... - box = detections[:, :4].clone() # xyxy + box = pred[:, :4].clone() # xyxy scale_coords(img_size, box, shapes[si]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner - - # add to json dictionary - for di, d in enumerate(detections): + for di, d in enumerate(pred): jdict.append({ - 'image_id': int(Path(paths[si]).stem.split('_')[-1]), + 'image_id': image_id, 'category_id': coco91class[int(d[6])], 'bbox': [float3(x) for x in box[di]], - 'score': float3(d[4] * d[5]) + 'score': float(d[4]) }) + # if len(labels) > 0: + # # add to json targets dictionary + # # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], ... + # box = labels[:, 1:].clone() + # box[:, [0, 2]] *= shapes[si][1] # scale width + # box[:, [1, 3]] *= shapes[si][0] # scale height + # box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + # for di, d in enumerate(labels): + # tdict.append({ + # 'segmentation': [[]], + # 'iscrowd': 0, + # 'image_id': image_id, + # 'category_id': coco91class[int(d[0])], + # 'id': seen, + # 'bbox': [float3(x) for x in box[di]], + # 'area': float3(box[di][2:4].prod()) + # }) + # If no labels add number of detections as incorrect correct = [] + detected = [] if len(labels) == 0: # correct.extend([0 for _ in range(len(detections))]) - mP.append(0), mR.append(0), mAPs.append(0) continue else: # Extract target boxes as (x1, y1, x2, y2) target_box = xywh2xyxy(labels[:, 1:5]) * img_size target_cls = labels[:, 0] - detected = [] - for *pred_box, conf, cls_conf, cls_pred in detections: + for *pred_box, conf, cls_conf, cls_pred in pred: + if cls_pred not in target_cls: + correct.append(0) + continue + # Best iou, index between pred and targets iou, bi = bbox_iou(pred_box, target_box).max(0) # If iou > threshold and class is correct mark as correct - if iou > iou_thres and cls_pred == target_cls[bi] and bi not in detected: + if iou > iou_thres and bi not in detected: correct.append(1) detected.append(bi) else: correct.append(0) - # Compute Average Precision (AP) per class - AP, AP_class, R, P = ap_per_class(tp=np.array(correct), - conf=detections[:, 4].cpu().numpy(), - pred_cls=detections[:, 6].cpu().numpy(), - target_cls=target_cls.cpu().numpy()) + # Convert to Numpy + tp = np.array(correct) + conf = pred[:, 4].cpu().numpy() + pred_cls = pred[:, 6].cpu().numpy() + target_cls = target_cls.cpu().numpy() + stats.append((tp, conf, pred_cls, target_cls)) - # Accumulate AP per class - AP_accum_count += np.bincount(AP_class, minlength=nC) - AP_accum += np.bincount(AP_class, minlength=nC, weights=AP) + # Compute means + stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))] + if len(stats_np): + AP, AP_class, R, P = ap_per_class(*stats_np) + mP, mR, mAP = P.mean(), R.mean(), AP.mean() - # Compute mean AP across all classes in this image, and append to image list - mP.append(P.mean()) - mR.append(R.mean()) - mAPs.append(AP.mean()) - - # Means of all images - mean_P = np.mean(mP) - mean_R = np.mean(mR) - mean_mAP = np.mean(mAPs) - - # Print image mAP and running mean mAP - print(('%11s%11s' + '%11.3g' * 4 + 's') % - (seen, len(dataset), mean_P, mean_R, mean_mAP, time.time() - t)) + # Print P, R, mAP + print(('%11s%11s' + '%11.3g' * 3) % (seen, len(dataset), mP, mR, mAP)) # Print mAP per class - print('\nmAP Per Class:') - for i, c in enumerate(load_classes(data_cfg['names'])): - if AP_accum_count[i]: - print('%15s: %-.4f' % (c, AP_accum[i] / (AP_accum_count[i]))) + if len(stats_np): + print('\nmAP Per Class:') + names = load_classes(data_cfg['names']) + for c, a in zip(AP_class, AP): + print('%15s: %-.4f' % (names[c], a)) # Save JSON - if save_json: + if save_json and mAP and len(jdict): imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files] with open('results.json', 'w') as file: json.dump(jdict, file) @@ -157,16 +166,20 @@ def test( # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocoGt = COCO('../coco/annotations/instances_val2014.json') # initialize COCO ground truth api - cocoDt = cocoGt.loadRes('results.json') # initialize COCO detections api + cocoDt = cocoGt.loadRes('results.json') # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() + mAP = cocoEval.stats[1] # update mAP to pycocotools mAP + + # F1 score = harmonic mean of precision and recall + # F1 = 2 * (mP * mR) / (mP + mR) # Return mAP - return mean_P, mean_R, mean_mAP + return mP, mR, mAP if __name__ == '__main__': @@ -176,8 +189,8 @@ if __name__ == '__main__': parser.add_argument('--data-cfg', type=str, default='cfg/coco.data', help='coco.data file path') parser.add_argument('--weights', type=str, default='weights/yolov3.weights', help='path to weights file') parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected') - parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') - parser.add_argument('--nms-thres', type=float, default=0.45, help='iou threshold for non-maximum suppression') + parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') + parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') parser.add_argument('--img-size', type=int, default=416, help='size of each image dimension') opt = parser.parse_args() diff --git a/train.py b/train.py index 30a07f3e..c714bc27 100644 --- a/train.py +++ b/train.py @@ -40,7 +40,7 @@ def train( # Optimizer lr0 = 0.001 # initial learning rate - optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=.9, weight_decay=0.0005) + optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005) cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 @@ -119,9 +119,9 @@ def train( if plot_images: fig = plt.figure(figsize=(10, 10)) for ip in range(batch_size): - labels = xywh2xyxy(targets[targets[:, 0] == ip, 2:6]).numpy() * img_size + boxes = xywh2xyxy(targets[targets[:, 0] == ip, 2:6]).numpy().T * img_size plt.subplot(4, 4, ip + 1).imshow(imgs[ip].numpy().transpose(1, 2, 0)) - plt.plot(labels[:, [0, 2, 2, 0, 0]].T, labels[:, [1, 1, 3, 3, 1]].T, '.-') + plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-') plt.axis('off') fig.tight_layout() fig.savefig('batch_%g.jpg' % i, dpi=fig.dpi) @@ -170,7 +170,7 @@ def train( best_loss = mloss['total'] # Save training results - save = True + save = False if save: # Save latest checkpoint checkpoint = {'epoch': epoch, @@ -190,11 +190,11 @@ def train( # Calculate mAP with torch.no_grad(): - P, R, mAP = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size) + results = test.test(cfg, data_cfg, batch_size=batch_size, img_size=img_size, model=model) # Write epoch results with open('results.txt', 'a') as file: - file.write(s + '%11.3g' * 3 % (P, R, mAP) + '\n') + file.write(s + '%11.3g' * 3 % results + '\n') # append P, R, mAP if __name__ == '__main__': diff --git a/utils/gcp.sh b/utils/gcp.sh index 3d398a46..e644ef7e 100755 --- a/utils/gcp.sh +++ b/utils/gcp.sh @@ -10,8 +10,8 @@ sudo reboot now # Re-clone sudo rm -rf yolov3 -git clone https://github.com/ultralytics/yolov3 # master -# git clone -b multi_gpu --depth 1 https://github.com/ultralytics/yolov3 # branch +# git clone https://github.com/ultralytics/yolov3 # master +git clone -b map_update --depth 1 https://github.com/ultralytics/yolov3 yolov3 # branch cp -r weights yolov3 cp -r cocoapi/PythonAPI/pycocotools yolov3 cd yolov3 @@ -26,11 +26,11 @@ python3 train.py --resume python3 detect.py # Test -python3 detect.py --save-json --conf-thres 0.001 --img-size 416 +python3 test.py --save-json # Git pull git pull https://github.com/ultralytics/yolov3 # master -git pull https://github.com/ultralytics/yolov3 multi_gpu # branch +git pull https://github.com/ultralytics/yolov3 map_update # branch # Test Darknet training python3 test.py --weights ../darknet/backup/yolov3.backup @@ -40,10 +40,16 @@ gsutil cp yolov3/weights/latest1gpu.pt gs://ultralytics # Copy latest.pt FROM bucket gsutil cp gs://ultralytics/latest.pt yolov3/weights/latest.pt -wget https://storage.googleapis.com/ultralytics/latest.pt -O weights/latest.pt +wget https://storage.googleapis.com/ultralytics/yolov3/latest_v1_0.pt -O weights/latest_v1_0.pt +wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt -# Trade Studies -sudo rm -rf yolov3 && git clone https://github.com/ultralytics/yolov3 +# Debug/Development +sudo rm -rf yolov3 +# git clone https://github.com/ultralytics/yolov3 # master +git clone -b map_update --depth 1 https://github.com/ultralytics/yolov3 yolov3 # branch cp -r weights yolov3 -cd yolov3 && python3 train.py --batch-size 16 --epochs 1 -sudo shutdown +cp -r cocoapi/PythonAPI/pycocotools yolov3 +cd yolov3 + +#git pull https://github.com/ultralytics/yolov3 map_update # branch +python3 test.py --img-size 320 diff --git a/utils/utils.py b/utils/utils.py index f33ac340..dbc9e82a 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -7,7 +7,6 @@ import matplotlib.pyplot as plt import numpy as np import torch import torch.nn as nn -import torch.nn.functional as F from utils import torch_utils @@ -106,10 +105,10 @@ def xyxy2xywh(x): def xywh2xyxy(x): # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x) - y[:, 0] = (x[:, 0] - x[:, 2] / 2) - y[:, 1] = (x[:, 1] - x[:, 3] / 2) - y[:, 2] = (x[:, 0] + x[:, 2] / 2) - y[:, 3] = (x[:, 1] + x[:, 3] / 2) + y[:, 0] = x[:, 0] - x[:, 2] / 2 + y[:, 1] = x[:, 1] - x[:, 3] / 2 + y[:, 2] = x[:, 0] + x[:, 2] / 2 + y[:, 3] = x[:, 1] + x[:, 3] / 2 return y @@ -142,25 +141,25 @@ def ap_per_class(tp, conf, pred_cls, target_cls): tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] # Find unique classes - unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0)) + unique_classes = np.unique(target_cls) # Create Precision-Recall curve and compute AP for each class ap, p, r = [], [], [] for c in unique_classes: i = pred_cls == c - n_gt = sum(target_cls == c) # Number of ground truth objects - n_p = sum(i) # Number of predicted objects + n_gt = (target_cls == c).sum() # Number of ground truth objects + n_p = i.sum() # Number of predicted objects - if (n_p == 0) and (n_gt == 0): + if n_p == 0 and n_gt == 0: continue - elif (n_p == 0) or (n_gt == 0): + elif n_p == 0 or n_gt == 0: ap.append(0) r.append(0) p.append(0) else: # Accumulate FPs and TPs - fpc = np.cumsum(1 - tp[i]) - tpc = np.cumsum(tp[i]) + fpc = (1 - tp[i]).cumsum() + tpc = (tp[i]).cumsum() # Recall recall_curve = tpc / (n_gt + 1e-16) @@ -328,15 +327,18 @@ def build_targets(model, targets): return txy, twh, tcls, indices +# @profile def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): """ Removes detections with lower object confidence score than 'conf_thres' Non-Maximum Suppression to further filter detections. Returns detections with shape: - (x1, y1, x2, y2, object_conf, class_score, class_pred) + (x1, y1, x2, y2, object_conf, class_conf, class) """ - output = [None for _ in range(len(prediction))] + min_wh = 2 # (pixels) minimum box width and height + + output = [None] * len(prediction) for image_i, pred in enumerate(prediction): # Experiment: Prior class size rejection # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3] @@ -352,56 +354,53 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): # multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2]) # Filter out confidence scores below threshold - class_prob, class_pred = torch.max(F.softmax(pred[:, 5:], 1), 1) - v = pred[:, 4] > conf_thres - v = v.nonzero().squeeze() - if len(v.shape) == 0: - v = v.unsqueeze(0) + class_conf, class_pred = pred[:, 5:].max(1) + # pred[:, 4] *= class_conf - pred = pred[v] - class_prob = class_prob[v] - class_pred = class_pred[v] + i = (pred[:, 4] > conf_thres) & (pred[:, 2] > min_wh) & (pred[:, 3] > min_wh) + pred = pred[i] # If none are remaining => process next image - nP = pred.shape[0] - if not nP: + if len(pred) == 0: continue - # From (center x, center y, width, height) to (x1, y1, x2, y2) + # Select predicted classes + class_conf = class_conf[i] + class_pred = class_pred[i].unsqueeze(1).float() + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) pred[:, :4] = xywh2xyxy(pred[:, :4]) + pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551 - # Detections ordered as (x1, y1, x2, y2, obj_conf, class_prob, class_pred) - detections = torch.cat((pred[:, :5], class_prob.float().unsqueeze(1), class_pred.float().unsqueeze(1)), 1) - # Iterate through all predicted classes - unique_labels = detections[:, -1].cpu().unique().to(prediction.device) + # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred) + pred = torch.cat((pred[:, :5], class_conf.unsqueeze(1), class_pred), 1) - nms_style = 'OR' # 'OR' (default), 'AND', 'MERGE' (experimental) - for c in unique_labels: - # Get the detections with class c - dc = detections[detections[:, -1] == c] - # Sort the detections by maximum object confidence - _, conf_sort_index = torch.sort(dc[:, 4] * dc[:, 5], descending=True) - dc = dc[conf_sort_index] + # Get detections sorted by decreasing confidence scores + pred = pred[(-pred[:, 4]).argsort()] + + det_max = [] + nms_style = 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental) + for c in pred[:, -1].unique(): + dc = pred[pred[:, -1] == c] # select class c dc = dc[:min(len(dc), 100)] # limit to first 100 boxes: https://github.com/ultralytics/yolov3/issues/117 # Non-maximum suppression - det_max = [] - ind = list(range(len(dc))) if nms_style == 'OR': # default - while len(ind): - j = ind[0] - det_max.append(dc[j:j + 1]) # save highest conf detection - reject = bbox_iou(dc[j], dc[ind]) > nms_thres - [ind.pop(i) for i in reversed(reject.nonzero())] - # while dc.shape[0]: # SLOWER METHOD - # det_max.append(dc[:1]) # save highest conf detection - # if len(dc) == 1: # Stop if we're at the last detection - # break - # iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes - # dc = dc[1:][iou < nms_thres] # remove ious > threshold + # METHOD1 + # ind = list(range(len(dc))) + # while len(ind): + # j = ind[0] + # det_max.append(dc[j:j + 1]) # save highest conf detection + # reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero() + # [ind.pop(i) for i in reversed(reject)] - # Image Total P R mAP - # 4964 5000 0.629 0.594 0.586 + # METHOD2 + while dc.shape[0]: + det_max.append(dc[:1]) # save highest conf detection + if len(dc) == 1: # Stop if we're at the last detection + break + iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes + dc = dc[1:][iou < nms_thres] # remove ious > threshold elif nms_style == 'AND': # requires overlap, single boxes erased while len(dc) > 1: @@ -411,22 +410,16 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): dc = dc[1:][iou < nms_thres] # remove ious > threshold elif nms_style == 'MERGE': # weighted mixture box - while len(dc) > 0: - iou = bbox_iou(dc[0], dc[0:]) # iou with other boxes - i = iou > nms_thres - - weights = dc[i, 4:5] * dc[i, 5:6] + while len(dc): + i = bbox_iou(dc[0], dc) > nms_thres # iou with other boxes + weights = dc[i, 4:5] dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum() det_max.append(dc[:1]) - dc = dc[iou < nms_thres] + dc = dc[i == 0] - # Image Total P R mAP - # 4964 5000 0.633 0.598 0.589 # normal - - if len(det_max) > 0: - det_max = torch.cat(det_max) - # Add max detections to outputs - output[image_i] = det_max if output[image_i] is None else torch.cat((output[image_i], det_max)) + if len(det_max): + det_max = torch.cat(det_max) # concatenate + output[image_i] = det_max[(-det_max[:, 4]).argsort()] # sort return output @@ -463,20 +456,42 @@ def coco_only_people(path='../coco/labels/val2014/'): print(labels.shape[0], file) -def plot_results(start=0): +def plot_wh_methods(): # from utils.utils import *; plot_wh_methods() + # Compares the two methods for width-height anchor multiplication + # https://github.com/ultralytics/yolov3/issues/168 + x = np.arange(-4.0, 4.0, .1) + ya = np.exp(x) + yb = (torch.sigmoid(torch.from_numpy(x)).numpy() * 2) + + fig = plt.figure(figsize=(6, 3), dpi=150) + plt.plot(x, ya, '.-', label='yolo method') + plt.plot(x, yb ** 2, '.-', label='^2 power method') + plt.plot(x, yb ** 2.5, '.-', label='^2.5 power method') + plt.xlim(left=-4, right=4) + plt.ylim(bottom=0, top=6) + plt.xlabel('input') + plt.ylabel('output') + plt.legend() + fig.tight_layout() + fig.savefig('comparison.jpg', dpi=fig.dpi) + + +def plot_results(start=0): # from utils.utils import *; plot_results() # Plot YOLO training results file 'results.txt' # import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v3.txt') - # from utils.utils import *; plot_results() fig = plt.figure(figsize=(14, 7)) s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Total Loss', 'Precision', 'Recall', 'mAP'] for f in sorted(glob.glob('results*.txt')): - results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11]).T # column 11 is mAP - x = range(1, results.shape[1]) + results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11, 12]).T # column 11 is mAP + x = range(start, results.shape[1]) for i in range(8): plt.subplot(2, 4, i + 1) - plt.plot(results[i, x[start:]], marker='.', label=f) + plt.plot(x, results[i, x], marker='.', label=f) plt.title(s[i]) if i == 0: plt.legend() + if i == 7: + plt.plot(x, results[i + 1, x], marker='.', label=f) fig.tight_layout() + fig.savefig('results.jpg', dpi=fig.dpi)