updates

2019-04-02 13:43:18 +02:00 · 2019-04-02 13:43:18 +02:00 · 01569d15e3
parent bd32517528
commit 01569d15e3
5 changed files with 95 additions and 70 deletions
--- a/detect.py
+++ b/detect.py
@ -9,6 +9,7 @@ from utils.utils import *
 def detect(
        cfg,
        data_cfg,
        weights,
        images,
        output='output',  # output folder
@ -36,6 +37,7 @@ def detect(
    model.to(device).eval()
    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        save_images = False
        dataloader = LoadWebcam(img_size=img_size)
@ -43,16 +45,12 @@ def detect(
        dataloader = LoadImages(images, img_size=img_size)
    # Get classes and colors
-    classes = load_classes(parse_data_cfg('cfg/coco.data')['names'])
+    classes = load_classes(parse_data_cfg(data_cfg)['names'])
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
-    for i, (path, img, im0) in enumerate(dataloader):
+    for i, (path, img, im0, vid_cap) in enumerate(dataloader):
        t = time.time()
        save_path = str(Path(output) / Path(path).name)
        if webcam:
            print('webcam frame %g: ' % (i + 1), end='')
        else:
            print('image %g/%g %s: ' % (i + 1, len(dataloader), path), end='')
        # Get detections
        img = torch.from_numpy(img).unsqueeze(0).to(device)
@ -83,12 +81,24 @@ def detect(
        print('Done. (%.3fs)' % (time.time() - t))
        if save_images:  # Save generated image with detections
            cv2.imwrite(save_path, im0)
        if webcam:  # Show live webcam
            cv2.imshow(weights, im0)
        if save_images:  # Save generated image with detections
            if dataloader.mode == 'video':
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer
                    width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                    height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    fps = vid_cap.get(cv2.CAP_PROP_FPS)
                    vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'avc1'), fps, (width, height))
                vid_writer.write(im0)
            else:
                cv2.imwrite(save_path, im0)
    if save_images and platform == 'darwin':  # macos
        os.system('open ' + output + ' ' + save_path)
@ -96,10 +106,11 @@ def detect(
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
    parser.add_argument('--data-cfg', type=str, default='cfg/coco.data', help='coco.data file path')
    parser.add_argument('--weights', type=str, default='weights/yolov3.weights', help='path to weights file')
    parser.add_argument('--images', type=str, default='data/samples', help='path to images')
    parser.add_argument('--img-size', type=int, default=32 * 13, help='size of each image dimension')
-    parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
+    parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
    parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
    opt = parser.parse_args()
    print(opt)
@ -107,6 +118,7 @@ if __name__ == '__main__':
    with torch.no_grad():
        detect(
            opt.cfg,
            opt.data_cfg,
            opt.weights,
            opt.images,
            img_size=opt.img_size,
--- a/test.py
+++ b/test.py
@ -35,19 +35,19 @@ def test(
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
-        device = next(model.parameters()).device
+        device = next(model.parameters()).device  # get model device
    # Configure run
    data_cfg = parse_data_cfg(data_cfg)
    test_path = data_cfg['valid']
-    # if (os.sep + 'coco' + os.sep) in test_path:  # COCO dataset probable
+    if (os.sep + 'coco' + os.sep) in test_path:  # COCO dataset probable
-    #     save_json = True  # use pycocotools
+        save_json = True  # use pycocotools
    # Dataloader
    dataset = LoadImagesAndLabels(test_path, img_size=img_size)
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
-                            num_workers=4,
+                            num_workers=0,
                            pin_memory=False,
                            collate_fn=dataset.collate_fn)
@ -66,16 +66,16 @@ def test(
        # Per image
        for si, pred in enumerate(output):
            image_id = int(Path(paths[si]).stem.split('_')[-1])
            labels = targets[targets[:, 0] == si, 1:]
            correct, detected, tcls = [], [], []
            seen += 1
            if pred is None:
                continue
-            if save_json:
+            if save_json:  # add to json pred dictionary
                # add to json pred dictionary
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(img_size, box, shapes[si])  # to original shape
                box = xyxy2xywh(box)  # xywh
@ -88,42 +88,21 @@ def test(
                        'score': float(d[4])
                    })
                # if len(labels) > 0:
                #     # add to json targets dictionary
                #     # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], ...
                #     box = labels[:, 1:].clone()
                #     box[:, [0, 2]] *= shapes[si][1]  # scale width
                #     box[:, [1, 3]] *= shapes[si][0]  # scale height
                #     box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                #     for di, d in enumerate(labels):
                #         tdict.append({
                #             'segmentation': [[]],
                #             'iscrowd': 0,
                #             'image_id': image_id,
                #             'category_id': coco91class[int(d[0])],
                #             'id': seen,
                #             'bbox': [float3(x) for x in box[di]],
                #             'area': float3(box[di][2:4].prod())
                #         })
            # If no labels add number of detections as incorrect
            correct = []
            detected = []
            if len(labels) == 0:
-                # correct.extend([0 for _ in range(len(detections))])
+                correct.extend([0] * len(pred))
                continue
            else:
                # Extract target boxes as (x1, y1, x2, y2)
-                target_box = xywh2xyxy(labels[:, 1:5]) * img_size
+                tbox = xywh2xyxy(labels[:, 1:5]) * img_size
-                target_cls = labels[:, 0]
+                tcls = labels[:, 0].cpu()
-                for *pred_box, conf, cls_conf, cls_pred in pred:
+                for *pbox, pconf, pcls_conf, pcls in pred:
-                    if cls_pred not in target_cls:
+                    if pcls not in tcls:
                        correct.append(0)
                        continue
                    # Best iou, index between pred and targets
-                    iou, bi = bbox_iou(pred_box, target_box).max(0)
+                    iou, bi = bbox_iou(pbox, tbox).max(0)
                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and bi not in detected:
@ -132,12 +111,8 @@ def test(
                    else:
                        correct.append(0)
-            # Convert to Numpy
+            # Append Statistics (correct, conf, pcls, tcls)
-            tp = np.array(correct)
+            stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
            conf = pred[:, 4].cpu().numpy()
            pred_cls = pred[:, 6].cpu().numpy()
            target_cls = target_cls.cpu().numpy()
            stats.append((tp, conf, pred_cls, target_cls))
    # Compute means
    stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))]
--- a/train.py
+++ b/train.py
@ -119,7 +119,7 @@ def train(
            plot_images = False
            if plot_images:
                fig = plt.figure(figsize=(10, 10))
-                for ip in range(batch_size):
+                for ip in range(len(imgs)):
                    boxes = xywh2xyxy(targets[targets[:, 0] == ip, 2:6]).numpy().T * img_size
                    plt.subplot(4, 4, ip + 1).imshow(imgs[ip].numpy().transpose(1, 2, 0))
                    plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-')
--- a/utils/datasets.py
+++ b/utils/datasets.py
@ -16,31 +16,61 @@ from utils.utils import xyxy2xywh
 class LoadImages:  # for inference
    def __init__(self, path, img_size=416):
        if os.path.isdir(path):
            image_format = ['.jpg', '.jpeg', '.png', '.tif']
            self.files = sorted(glob.glob('%s/*.*' % path))
            self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files))
        elif os.path.isfile(path):
            self.files = [path]
        self.nF = len(self.files)  # number of image files
        self.height = img_size
        img_formats = ['.jpg', '.jpeg', '.png', '.tif']
        vid_formats = ['.mov', '.avi', '.mp4']
-        assert self.nF > 0, 'No images found in ' + path
+        files = []
        if os.path.isdir(path):
            files = sorted(glob.glob('%s/*.*' % path))
        elif os.path.isfile(path):
            files = [path]
        # self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in img_formats, files))
        images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
        videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
        self.files = images + videos
        self.nI, self.nV = len(images), len(videos)
        self.nF = self.nI + self.nV  # number of files
        self.video_flag = [False] * self.nI + [True] * self.nV
        self.mode = 'images'
        if any(videos):
            self.new_video(videos[0])  # new video
        else:
            self.cap = None
        assert self.nF > 0, 'No images or videos found in ' + path
    def __iter__(self):
-        self.count = -1
+        self.count = 0
        return self
    def __next__(self):
        self.count += 1
        if self.count == self.nF:
            raise StopIteration
-        img_path = self.files[self.count]
+        path = self.files[self.count]
        if self.video_flag[self.count]:
            self.mode = 'video'
            ret_val, img0 = self.cap.read()
            if not ret_val:
                self.count += 1
                self.cap.release()
                if self.count == self.nF:  # last video
                    raise StopIteration
                else:
                    path = self.files[self.count]
                    self.new_video(path)
                    ret_val, img0 = self.cap.read()
            self.frame += 1
            print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
        else:
            # Read image
-        img0 = cv2.imread(img_path)  # BGR
+            self.count += 1
-        assert img0 is not None, 'File Not Found ' + img_path
+            img0 = cv2.imread(path)  # BGR
            assert img0 is not None, 'File Not Found ' + path
            print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
        # Padded resize
        img, _, _, _ = letterbox(img0, height=self.height)
@ -50,8 +80,13 @@ class LoadImages:  # for inference
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
+        # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
-        return img_path, img, img0
+        return path, img, img0, self.cap
    def new_video(self, path):
        self.frame = 0
        self.cap = cv2.VideoCapture(path)
        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
    def __len__(self):
        return self.nF  # number of files
--- a/utils/utils.py
+++ b/utils/utils.py
@ -163,15 +163,18 @@ def ap_per_class(tp, conf, pred_cls, target_cls):
            # Recall
            recall_curve = tpc / (n_gt + 1e-16)
-            r.append(tpc[-1] / (n_gt + 1e-16))
+            r.append(recall_curve[-1])
            # Precision
            precision_curve = tpc / (tpc + fpc)
-            p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
+            p.append(precision_curve[-1])
            # AP from recall-precision curve
            ap.append(compute_ap(recall_curve, precision_curve))
            # Plot
            # plt.plot(recall_curve, precision_curve)
    return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p)