From 90a20f93e51a1b65da438e1419aece4b97cdf26e Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 26 Feb 2019 02:53:11 +0100
Subject: [PATCH] updates

---
 data/coco.names      | 12 +++++------
 detect.py            |  2 +-
 test.py              | 51 +++++++++++++++++++++++++++++++++++++++++---
 train.py             |  2 +-
 utils/datasets.py    |  7 +++---
 utils/torch_utils.py |  2 +-
 utils/utils.py       | 10 ++++++++-
 7 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/data/coco.names b/data/coco.names
index ca76c80..941cb4e 100755
--- a/data/coco.names
+++ b/data/coco.names
@@ -1,8 +1,8 @@
 person
 bicycle
 car
-motorbike
-aeroplane
+motorcycle
+airplane
 bus
 train
 truck
@@ -55,12 +55,12 @@ pizza
 donut
 cake
 chair
-sofa
-pottedplant
+couch
+potted plant
 bed
-diningtable
+dining table
 toilet
-tvmonitor
+tv
 laptop
 mouse
 remote
diff --git a/detect.py b/detect.py
index aad9ab5..6f706b4 100644
--- a/detect.py
+++ b/detect.py
@@ -72,7 +72,7 @@ def detect(
             detections = non_max_suppression(pred.unsqueeze(0), conf_thres, nms_thres)[0]
 
             # Rescale boxes from 416 to true image size
-            detections[:, :4] = scale_coords(img_size, detections[:, :4], im0.shape)
+            scale_coords(img_size, detections[:, :4], im0.shape).round()
 
             # Print results to screen
             unique_classes = detections[:, -1].cpu().unique()
diff --git a/test.py b/test.py
index c8c9502..d62f4aa 100644
--- a/test.py
+++ b/test.py
@@ -1,4 +1,6 @@
 import argparse
+import json
+from pathlib import Path
 
 from models import *
 from utils.datasets import *
@@ -13,7 +15,8 @@ def test(
         img_size=416,
         iou_thres=0.5,
         conf_thres=0.3,
-        nms_thres=0.45
+        nms_thres=0.45,
+        save_json=False
 ):
     device = torch_utils.select_device()
 
@@ -37,16 +40,21 @@ def test(
     # dataloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path), batch_size=batch_size)  # pytorch
     dataloader = LoadImagesAndLabels(test_path, batch_size=batch_size, img_size=img_size)
 
+    # Create JSON
+    jdict = []
+    float3 = lambda x: float(format(x, '.3f'))  # print json to 3 decimals
+    # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
+
     mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0
     print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP'))
     outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class = [], [], [], [], [], [], [], []
     AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC)
-    for batch_i, (imgs, targets) in enumerate(dataloader):
+    for batch_i, (imgs, targets, paths, shapes) in enumerate(dataloader):
         output = model(imgs.to(device))
         output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres)
 
         # Compute average precision for each sample
-        for sample_i, (labels, detections) in enumerate(zip(targets, output)):
+        for si, (labels, detections) in enumerate(zip(targets, output)):
             seen += 1
 
             if detections is None:
@@ -59,6 +67,22 @@ def test(
             detections = detections.cpu().numpy()
             detections = detections[np.argsort(-detections[:, 4])]
 
+            # Save JSON
+            if save_json:
+                # rescale box to original image size, top left origin
+                sbox = torch.from_numpy(detections[:, :4]).clone()  # x1y1x2y2
+                scale_coords(img_size, sbox, shapes[si])
+                sbox = xyxy2xywh(sbox)
+                sbox[:, :2] -= sbox[:, 2:] / 2  # origin from center to corner
+
+                for di, d in enumerate(detections):
+                    jdict.append({  # add to json dictionary
+                        'image_id': int(Path(paths[si]).stem.split('_')[-1]),
+                        'category_id': darknet2coco_class(int(d[6])),
+                        'bbox': [float3(x) for x in sbox[di]],
+                        'score': float3(d[4] * d[5])
+                    })
+
             # If no labels add number of detections as incorrect
             correct = []
             if labels.size(0) == 0:
@@ -116,6 +140,27 @@ def test(
     for i, c in enumerate(classes):
         print('%15s: %-.4f' % (c, AP_accum[i] / (AP_accum_count[i] + 1E-16)))
 
+    # Save JSON
+    if save_json:
+        imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataloader.img_files]
+        with open('results.json', 'w') as file:
+            json.dump(jdict, file)
+
+        from utils.pycocotools.coco import COCO
+        from utils.pycocotools.cocoeval import COCOeval
+
+        # initialize COCO ground truth api
+        cocoGt = COCO('../coco/annotations/instances_val2014.json')
+
+        # initialize COCO detections api
+        cocoDt = cocoGt.loadRes('results.json')
+
+        cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
+        cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
+        cocoEval.evaluate()
+        cocoEval.accumulate()
+        cocoEval.summarize()
+
     # Return mAP
     return mean_mAP, mean_R, mean_P
 
diff --git a/train.py b/train.py
index aa355c6..86253e5 100644
--- a/train.py
+++ b/train.py
@@ -113,7 +113,7 @@ def train(
         ui = -1
         rloss = defaultdict(float)  # running loss
         optimizer.zero_grad()
-        for i, (imgs, targets) in enumerate(dataloader):
+        for i, (imgs, targets, _, _) in enumerate(dataloader):
             if sum([len(x) for x in targets]) < 1:  # if no targets continue
                 continue
 
diff --git a/utils/datasets.py b/utils/datasets.py
index 17ebbfa..e274fab 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -128,8 +128,7 @@ class LoadImagesAndLabels:  # for training
             # Fixed-Scale YOLO Training
             height = self.height
 
-        img_all = []
-        labels_all = []
+        img_all, labels_all, img_paths, img_shapes = [], [], [], []
         for index, files_index in enumerate(range(ia, ib)):
             img_path = self.img_files[self.shuffled_vector[files_index]]
             label_path = self.label_files[self.shuffled_vector[files_index]]
@@ -210,13 +209,15 @@ class LoadImagesAndLabels:  # for training
 
             img_all.append(img)
             labels_all.append(torch.from_numpy(labels))
+            img_paths.append(img_path)
+            img_shapes.append((h, w))
 
         # Normalize
         img_all = np.stack(img_all)[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
         img_all = np.ascontiguousarray(img_all, dtype=np.float32)
         img_all /= 255.0
 
-        return torch.from_numpy(img_all), labels_all
+        return torch.from_numpy(img_all), labels_all, img_paths, img_shapes
 
     def __len__(self):
         return self.nB  # number of batches
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index 2eb6544..a45be2c 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -20,5 +20,5 @@ def select_device(force_cpu=False):
             torch.cuda.set_device(0)  # OPTIONAL: Set your GPU if multiple available
             # print('Using ', torch.cuda.device_count(), ' GPUs')
 
-    print('Using %s  %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
+    print('Using %s %s\n' % (device.type, torch.cuda.get_device_properties(0) if cuda else ''))
     return device
diff --git a/utils/utils.py b/utils/utils.py
index fc2d1b4..fa9df88 100755
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -49,6 +49,14 @@ def coco_class_weights():  # frequency of each class in coco train2014
     return weights
 
 
+def darknet2coco_class(c):  # returns the coco class for each darknet class
+    # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
+    a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
+    b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
+    x = [list(a[i] == b).index(True) + 1 for i in range(80)]  # darknet to coco
+    return x[c]
+
+
 def plot_one_box(x, img, color=None, label=None, line_thickness=None):  # Plots one bounding box on image img
     tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1  # line thickness
     color = color or [random.randint(0, 255) for _ in range(3)]
@@ -99,7 +107,7 @@ def scale_coords(img_size, coords, img0_shape):
     coords[:, [0, 2]] -= pad_x
     coords[:, [1, 3]] -= pad_y
     coords[:, :4] /= gain
-    coords[:, :4] = torch.round(torch.clamp(coords[:, :4], min=0))
+    coords[:, :4] = torch.clamp(coords[:, :4], min=0)
     return coords