updates

2019-04-02 13:43:18 +02:00 · 2019-04-02 13:43:18 +02:00 · 01569d15e3
parent bd32517528
commit 01569d15e3
5 changed files with 95 additions and 70 deletions
--- a/detect.py
+++ b/detect.py
@ -9,6 +9,7 @@ from utils.utils import *

 def detect(
        cfg,
+        data_cfg,
        weights,
        images,
        output='output',  # output folder
@ -36,6 +37,7 @@ def detect(
    model.to(device).eval()

    # Set Dataloader
+    vid_path, vid_writer = None, None
    if webcam:
        save_images = False
        dataloader = LoadWebcam(img_size=img_size)
@ -43,16 +45,12 @@ def detect(
        dataloader = LoadImages(images, img_size=img_size)

    # Get classes and colors
-    classes = load_classes(parse_data_cfg('cfg/coco.data')['names'])
+    classes = load_classes(parse_data_cfg(data_cfg)['names'])
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]

-    for i, (path, img, im0) in enumerate(dataloader):
+    for i, (path, img, im0, vid_cap) in enumerate(dataloader):
        t = time.time()
        save_path = str(Path(output) / Path(path).name)
-        if webcam:
-            print('webcam frame %g: ' % (i + 1), end='')
-        else:
-            print('image %g/%g %s: ' % (i + 1, len(dataloader), path), end='')

        # Get detections
        img = torch.from_numpy(img).unsqueeze(0).to(device)
@ -83,12 +81,24 @@ def detect(

        print('Done. (%.3fs)' % (time.time() - t))

-        if save_images:  # Save generated image with detections
-            cv2.imwrite(save_path, im0)
-
        if webcam:  # Show live webcam
            cv2.imshow(weights, im0)

+        if save_images:  # Save generated image with detections
+            if dataloader.mode == 'video':
+                if vid_path != save_path:  # new video
+                    vid_path = save_path
+                    if isinstance(vid_writer, cv2.VideoWriter):
+                        vid_writer.release()  # release previous video writer
+                    width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                    height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                    fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                    vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'avc1'), fps, (width, height))
+                vid_writer.write(im0)
+
+            else:
+                cv2.imwrite(save_path, im0)
+
    if save_images and platform == 'darwin':  # macos
        os.system('open ' + output + ' ' + save_path)

@ -96,10 +106,11 @@ def detect(
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
+    parser.add_argument('--data-cfg', type=str, default='cfg/coco.data', help='coco.data file path')
    parser.add_argument('--weights', type=str, default='weights/yolov3.weights', help='path to weights file')
    parser.add_argument('--images', type=str, default='data/samples', help='path to images')
    parser.add_argument('--img-size', type=int, default=32 * 13, help='size of each image dimension')
-    parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
+    parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
    parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
    opt = parser.parse_args()
    print(opt)
@ -107,6 +118,7 @@ if __name__ == '__main__':
    with torch.no_grad():
        detect(
            opt.cfg,
+            opt.data_cfg,
            opt.weights,
            opt.images,
            img_size=opt.img_size,
--- a/test.py
+++ b/test.py
@ -35,19 +35,19 @@ def test(
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
-        device = next(model.parameters()).device
+        device = next(model.parameters()).device  # get model device

    # Configure run
    data_cfg = parse_data_cfg(data_cfg)
    test_path = data_cfg['valid']
-    # if (os.sep + 'coco' + os.sep) in test_path:  # COCO dataset probable
-    #     save_json = True  # use pycocotools
+    if (os.sep + 'coco' + os.sep) in test_path:  # COCO dataset probable
+        save_json = True  # use pycocotools

    # Dataloader
    dataset = LoadImagesAndLabels(test_path, img_size=img_size)
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
-                            num_workers=4,
+                            num_workers=0,
                            pin_memory=False,
                            collate_fn=dataset.collate_fn)

@ -66,16 +66,16 @@ def test(

        # Per image
        for si, pred in enumerate(output):
-            image_id = int(Path(paths[si]).stem.split('_')[-1])
            labels = targets[targets[:, 0] == si, 1:]
+            correct, detected, tcls = [], [], []
            seen += 1

            if pred is None:
                continue

-            if save_json:
-                # add to json pred dictionary
+            if save_json:  # add to json pred dictionary
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
+                image_id = int(Path(paths[si]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(img_size, box, shapes[si])  # to original shape
                box = xyxy2xywh(box)  # xywh
@ -88,42 +88,21 @@ def test(
                        'score': float(d[4])
                    })

-                # if len(labels) > 0:
-                #     # add to json targets dictionary
-                #     # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], ...
-                #     box = labels[:, 1:].clone()
-                #     box[:, [0, 2]] *= shapes[si][1]  # scale width
-                #     box[:, [1, 3]] *= shapes[si][0]  # scale height
-                #     box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
-                #     for di, d in enumerate(labels):
-                #         tdict.append({
-                #             'segmentation': [[]],
-                #             'iscrowd': 0,
-                #             'image_id': image_id,
-                #             'category_id': coco91class[int(d[0])],
-                #             'id': seen,
-                #             'bbox': [float3(x) for x in box[di]],
-                #             'area': float3(box[di][2:4].prod())
-                #         })
-
            # If no labels add number of detections as incorrect
-            correct = []
-            detected = []
            if len(labels) == 0:
-                # correct.extend([0 for _ in range(len(detections))])
-                continue
+                correct.extend([0] * len(pred))
            else:
                # Extract target boxes as (x1, y1, x2, y2)
-                target_box = xywh2xyxy(labels[:, 1:5]) * img_size
-                target_cls = labels[:, 0]
+                tbox = xywh2xyxy(labels[:, 1:5]) * img_size
+                tcls = labels[:, 0].cpu()

-                for *pred_box, conf, cls_conf, cls_pred in pred:
-                    if cls_pred not in target_cls:
+                for *pbox, pconf, pcls_conf, pcls in pred:
+                    if pcls not in tcls:
                        correct.append(0)
                        continue

                    # Best iou, index between pred and targets
-                    iou, bi = bbox_iou(pred_box, target_box).max(0)
+                    iou, bi = bbox_iou(pbox, tbox).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and bi not in detected:
@ -132,12 +111,8 @@ def test(
                    else:
                        correct.append(0)

-            # Convert to Numpy
-            tp = np.array(correct)
-            conf = pred[:, 4].cpu().numpy()
-            pred_cls = pred[:, 6].cpu().numpy()
-            target_cls = target_cls.cpu().numpy()
-            stats.append((tp, conf, pred_cls, target_cls))
+            # Append Statistics (correct, conf, pcls, tcls)
+            stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))

    # Compute means
    stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))]
--- a/train.py
+++ b/train.py
@ -119,7 +119,7 @@ def train(
            plot_images = False
            if plot_images:
                fig = plt.figure(figsize=(10, 10))
-                for ip in range(batch_size):
+                for ip in range(len(imgs)):
                    boxes = xywh2xyxy(targets[targets[:, 0] == ip, 2:6]).numpy().T * img_size
                    plt.subplot(4, 4, ip + 1).imshow(imgs[ip].numpy().transpose(1, 2, 0))
                    plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-')
--- a/utils/datasets.py
+++ b/utils/datasets.py
@ -16,31 +16,61 @@ from utils.utils import xyxy2xywh

 class LoadImages:  # for inference
    def __init__(self, path, img_size=416):
-        if os.path.isdir(path):
-            image_format = ['.jpg', '.jpeg', '.png', '.tif']
-            self.files = sorted(glob.glob('%s/*.*' % path))
-            self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files))
-        elif os.path.isfile(path):
-            self.files = [path]
-
-        self.nF = len(self.files)  # number of image files
        self.height = img_size
+        img_formats = ['.jpg', '.jpeg', '.png', '.tif']
+        vid_formats = ['.mov', '.avi', '.mp4']

-        assert self.nF > 0, 'No images found in ' + path
+        files = []
+        if os.path.isdir(path):
+            files = sorted(glob.glob('%s/*.*' % path))
+        elif os.path.isfile(path):
+            files = [path]
+
+        # self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in img_formats, files))
+        images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
+        videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
+        self.files = images + videos
+        self.nI, self.nV = len(images), len(videos)
+        self.nF = self.nI + self.nV  # number of files
+        self.video_flag = [False] * self.nI + [True] * self.nV
+        self.mode = 'images'
+        if any(videos):
+            self.new_video(videos[0])  # new video
+        else:
+            self.cap = None
+        assert self.nF > 0, 'No images or videos found in ' + path

    def __iter__(self):
-        self.count = -1
+        self.count = 0
        return self

    def __next__(self):
-        self.count += 1
        if self.count == self.nF:
            raise StopIteration
-        img_path = self.files[self.count]
+        path = self.files[self.count]

-        # Read image
-        img0 = cv2.imread(img_path)  # BGR
-        assert img0 is not None, 'File Not Found ' + img_path
+        if self.video_flag[self.count]:
+            self.mode = 'video'
+            ret_val, img0 = self.cap.read()
+            if not ret_val:
+                self.count += 1
+                self.cap.release()
+                if self.count == self.nF:  # last video
+                    raise StopIteration
+                else:
+                    path = self.files[self.count]
+                    self.new_video(path)
+                    ret_val, img0 = self.cap.read()
+
+            self.frame += 1
+            print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
+
+        else:
+            # Read image
+            self.count += 1
+            img0 = cv2.imread(path)  # BGR
+            assert img0 is not None, 'File Not Found ' + path
+            print('image %g/%g %s: ' % (self.count, self.nF, path), end='')

        # Padded resize
        img, _, _, _ = letterbox(img0, height=self.height)
@ -50,8 +80,13 @@ class LoadImages:  # for inference
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

-        # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
-        return img_path, img, img0
+        # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
+        return path, img, img0, self.cap
+
+    def new_video(self, path):
+        self.frame = 0
+        self.cap = cv2.VideoCapture(path)
+        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))

    def __len__(self):
        return self.nF  # number of files
--- a/utils/utils.py
+++ b/utils/utils.py
@ -163,15 +163,18 @@ def ap_per_class(tp, conf, pred_cls, target_cls):

            # Recall
            recall_curve = tpc / (n_gt + 1e-16)
-            r.append(tpc[-1] / (n_gt + 1e-16))
+            r.append(recall_curve[-1])

            # Precision
            precision_curve = tpc / (tpc + fpc)
-            p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
+            p.append(precision_curve[-1])

            # AP from recall-precision curve
            ap.append(compute_ap(recall_curve, precision_curve))

+            # Plot
+            # plt.plot(recall_curve, precision_curve)
+
    return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p)