From 01569d15e3997f7672aa952ff4441f183c3b8d49 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 2 Apr 2019 13:43:18 +0200
Subject: [PATCH] updates

---
 detect.py         | 32 +++++++++++++++-------
 test.py           | 55 +++++++++++--------------------------
 train.py          |  2 +-
 utils/datasets.py | 69 +++++++++++++++++++++++++++++++++++------------
 utils/utils.py    |  7 +++--
 5 files changed, 95 insertions(+), 70 deletions(-)

diff --git a/detect.py b/detect.py
index a308cd20..4461af5b 100644
--- a/detect.py
+++ b/detect.py
@@ -9,6 +9,7 @@ from utils.utils import *
 
 def detect(
         cfg,
+        data_cfg,
         weights,
         images,
         output='output',  # output folder
@@ -36,6 +37,7 @@ def detect(
     model.to(device).eval()
 
     # Set Dataloader
+    vid_path, vid_writer = None, None
     if webcam:
         save_images = False
         dataloader = LoadWebcam(img_size=img_size)
@@ -43,16 +45,12 @@ def detect(
         dataloader = LoadImages(images, img_size=img_size)
 
     # Get classes and colors
-    classes = load_classes(parse_data_cfg('cfg/coco.data')['names'])
+    classes = load_classes(parse_data_cfg(data_cfg)['names'])
     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
 
-    for i, (path, img, im0) in enumerate(dataloader):
+    for i, (path, img, im0, vid_cap) in enumerate(dataloader):
         t = time.time()
         save_path = str(Path(output) / Path(path).name)
-        if webcam:
-            print('webcam frame %g: ' % (i + 1), end='')
-        else:
-            print('image %g/%g %s: ' % (i + 1, len(dataloader), path), end='')
 
         # Get detections
         img = torch.from_numpy(img).unsqueeze(0).to(device)
@@ -83,12 +81,24 @@ def detect(
 
         print('Done. (%.3fs)' % (time.time() - t))
 
-        if save_images:  # Save generated image with detections
-            cv2.imwrite(save_path, im0)
-
         if webcam:  # Show live webcam
             cv2.imshow(weights, im0)
 
+        if save_images:  # Save generated image with detections
+            if dataloader.mode == 'video':
+                if vid_path != save_path:  # new video
+                    vid_path = save_path
+                    if isinstance(vid_writer, cv2.VideoWriter):
+                        vid_writer.release()  # release previous video writer
+                    width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                    height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                    fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                    vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'avc1'), fps, (width, height))
+                vid_writer.write(im0)
+
+            else:
+                cv2.imwrite(save_path, im0)
+
     if save_images and platform == 'darwin':  # macos
         os.system('open ' + output + ' ' + save_path)
 
@@ -96,10 +106,11 @@ def detect(
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
+    parser.add_argument('--data-cfg', type=str, default='cfg/coco.data', help='coco.data file path')
     parser.add_argument('--weights', type=str, default='weights/yolov3.weights', help='path to weights file')
     parser.add_argument('--images', type=str, default='data/samples', help='path to images')
     parser.add_argument('--img-size', type=int, default=32 * 13, help='size of each image dimension')
-    parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
+    parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
     opt = parser.parse_args()
     print(opt)
@@ -107,6 +118,7 @@ if __name__ == '__main__':
     with torch.no_grad():
         detect(
             opt.cfg,
+            opt.data_cfg,
             opt.weights,
             opt.images,
             img_size=opt.img_size,
diff --git a/test.py b/test.py
index 0a5a877a..4ed93b17 100644
--- a/test.py
+++ b/test.py
@@ -35,19 +35,19 @@ def test(
         if torch.cuda.device_count() > 1:
             model = nn.DataParallel(model)
     else:
-        device = next(model.parameters()).device
+        device = next(model.parameters()).device  # get model device
 
     # Configure run
     data_cfg = parse_data_cfg(data_cfg)
     test_path = data_cfg['valid']
-    # if (os.sep + 'coco' + os.sep) in test_path:  # COCO dataset probable
-    #     save_json = True  # use pycocotools
+    if (os.sep + 'coco' + os.sep) in test_path:  # COCO dataset probable
+        save_json = True  # use pycocotools
 
     # Dataloader
     dataset = LoadImagesAndLabels(test_path, img_size=img_size)
     dataloader = DataLoader(dataset,
                             batch_size=batch_size,
-                            num_workers=4,
+                            num_workers=0,
                             pin_memory=False,
                             collate_fn=dataset.collate_fn)
 
@@ -66,16 +66,16 @@ def test(
 
         # Per image
         for si, pred in enumerate(output):
-            image_id = int(Path(paths[si]).stem.split('_')[-1])
             labels = targets[targets[:, 0] == si, 1:]
+            correct, detected, tcls = [], [], []
             seen += 1
 
             if pred is None:
                 continue
 
-            if save_json:
-                # add to json pred dictionary
+            if save_json:  # add to json pred dictionary
                 # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
+                image_id = int(Path(paths[si]).stem.split('_')[-1])
                 box = pred[:, :4].clone()  # xyxy
                 scale_coords(img_size, box, shapes[si])  # to original shape
                 box = xyxy2xywh(box)  # xywh
@@ -88,42 +88,21 @@ def test(
                         'score': float(d[4])
                     })
 
-                # if len(labels) > 0:
-                #     # add to json targets dictionary
-                #     # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], ...
-                #     box = labels[:, 1:].clone()
-                #     box[:, [0, 2]] *= shapes[si][1]  # scale width
-                #     box[:, [1, 3]] *= shapes[si][0]  # scale height
-                #     box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-                #     for di, d in enumerate(labels):
-                #         tdict.append({
-                #             'segmentation': [[]],
-                #             'iscrowd': 0,
-                #             'image_id': image_id,
-                #             'category_id': coco91class[int(d[0])],
-                #             'id': seen,
-                #             'bbox': [float3(x) for x in box[di]],
-                #             'area': float3(box[di][2:4].prod())
-                #         })
-
             # If no labels add number of detections as incorrect
-            correct = []
-            detected = []
             if len(labels) == 0:
-                # correct.extend([0 for _ in range(len(detections))])
-                continue
+                correct.extend([0] * len(pred))
             else:
                 # Extract target boxes as (x1, y1, x2, y2)
-                target_box = xywh2xyxy(labels[:, 1:5]) * img_size
-                target_cls = labels[:, 0]
+                tbox = xywh2xyxy(labels[:, 1:5]) * img_size
+                tcls = labels[:, 0].cpu()
 
-                for *pred_box, conf, cls_conf, cls_pred in pred:
-                    if cls_pred not in target_cls:
+                for *pbox, pconf, pcls_conf, pcls in pred:
+                    if pcls not in tcls:
                         correct.append(0)
                         continue
 
                     # Best iou, index between pred and targets
-                    iou, bi = bbox_iou(pred_box, target_box).max(0)
+                    iou, bi = bbox_iou(pbox, tbox).max(0)
 
                     # If iou > threshold and class is correct mark as correct
                     if iou > iou_thres and bi not in detected:
@@ -132,12 +111,8 @@ def test(
                     else:
                         correct.append(0)
 
-            # Convert to Numpy
-            tp = np.array(correct)
-            conf = pred[:, 4].cpu().numpy()
-            pred_cls = pred[:, 6].cpu().numpy()
-            target_cls = target_cls.cpu().numpy()
-            stats.append((tp, conf, pred_cls, target_cls))
+            # Append Statistics (correct, conf, pcls, tcls)
+            stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
 
     # Compute means
     stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))]
diff --git a/train.py b/train.py
index d0fbd837..58196e65 100644
--- a/train.py
+++ b/train.py
@@ -119,7 +119,7 @@ def train(
             plot_images = False
             if plot_images:
                 fig = plt.figure(figsize=(10, 10))
-                for ip in range(batch_size):
+                for ip in range(len(imgs)):
                     boxes = xywh2xyxy(targets[targets[:, 0] == ip, 2:6]).numpy().T * img_size
                     plt.subplot(4, 4, ip + 1).imshow(imgs[ip].numpy().transpose(1, 2, 0))
                     plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-')
diff --git a/utils/datasets.py b/utils/datasets.py
index b6397fbd..9e808e83 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -16,31 +16,61 @@ from utils.utils import xyxy2xywh
 
 class LoadImages:  # for inference
     def __init__(self, path, img_size=416):
-        if os.path.isdir(path):
-            image_format = ['.jpg', '.jpeg', '.png', '.tif']
-            self.files = sorted(glob.glob('%s/*.*' % path))
-            self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files))
-        elif os.path.isfile(path):
-            self.files = [path]
-
-        self.nF = len(self.files)  # number of image files
         self.height = img_size
+        img_formats = ['.jpg', '.jpeg', '.png', '.tif']
+        vid_formats = ['.mov', '.avi', '.mp4']
 
-        assert self.nF > 0, 'No images found in ' + path
+        files = []
+        if os.path.isdir(path):
+            files = sorted(glob.glob('%s/*.*' % path))
+        elif os.path.isfile(path):
+            files = [path]
+
+        # self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in img_formats, files))
+        images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
+        videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
+        self.files = images + videos
+        self.nI, self.nV = len(images), len(videos)
+        self.nF = self.nI + self.nV  # number of files
+        self.video_flag = [False] * self.nI + [True] * self.nV
+        self.mode = 'images'
+        if any(videos):
+            self.new_video(videos[0])  # new video
+        else:
+            self.cap = None
+        assert self.nF > 0, 'No images or videos found in ' + path
 
     def __iter__(self):
-        self.count = -1
+        self.count = 0
         return self
 
     def __next__(self):
-        self.count += 1
         if self.count == self.nF:
             raise StopIteration
-        img_path = self.files[self.count]
+        path = self.files[self.count]
 
-        # Read image
-        img0 = cv2.imread(img_path)  # BGR
-        assert img0 is not None, 'File Not Found ' + img_path
+        if self.video_flag[self.count]:
+            self.mode = 'video'
+            ret_val, img0 = self.cap.read()
+            if not ret_val:
+                self.count += 1
+                self.cap.release()
+                if self.count == self.nF:  # last video
+                    raise StopIteration
+                else:
+                    path = self.files[self.count]
+                    self.new_video(path)
+                    ret_val, img0 = self.cap.read()
+
+            self.frame += 1
+            print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
+
+        else:
+            # Read image
+            self.count += 1
+            img0 = cv2.imread(path)  # BGR
+            assert img0 is not None, 'File Not Found ' + path
+            print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
 
         # Padded resize
         img, _, _, _ = letterbox(img0, height=self.height)
@@ -50,8 +80,13 @@ class LoadImages:  # for inference
         img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 
-        # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
-        return img_path, img, img0
+        # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
+        return path, img, img0, self.cap
+
+    def new_video(self, path):
+        self.frame = 0
+        self.cap = cv2.VideoCapture(path)
+        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
     def __len__(self):
         return self.nF  # number of files
diff --git a/utils/utils.py b/utils/utils.py
index d00b89d1..22561c3c 100755
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -163,15 +163,18 @@ def ap_per_class(tp, conf, pred_cls, target_cls):
 
             # Recall
             recall_curve = tpc / (n_gt + 1e-16)
-            r.append(tpc[-1] / (n_gt + 1e-16))
+            r.append(recall_curve[-1])
 
             # Precision
             precision_curve = tpc / (tpc + fpc)
-            p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
+            p.append(precision_curve[-1])
 
             # AP from recall-precision curve
             ap.append(compute_ap(recall_curve, precision_curve))
 
+            # Plot
+            # plt.plot(recall_curve, precision_curve)
+
     return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p)