diff --git a/detect.py b/detect.py index a308cd20..4461af5b 100644 --- a/detect.py +++ b/detect.py @@ -9,6 +9,7 @@ from utils.utils import * def detect( cfg, + data_cfg, weights, images, output='output', # output folder @@ -36,6 +37,7 @@ def detect( model.to(device).eval() # Set Dataloader + vid_path, vid_writer = None, None if webcam: save_images = False dataloader = LoadWebcam(img_size=img_size) @@ -43,16 +45,12 @@ def detect( dataloader = LoadImages(images, img_size=img_size) # Get classes and colors - classes = load_classes(parse_data_cfg('cfg/coco.data')['names']) + classes = load_classes(parse_data_cfg(data_cfg)['names']) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] - for i, (path, img, im0) in enumerate(dataloader): + for i, (path, img, im0, vid_cap) in enumerate(dataloader): t = time.time() save_path = str(Path(output) / Path(path).name) - if webcam: - print('webcam frame %g: ' % (i + 1), end='') - else: - print('image %g/%g %s: ' % (i + 1, len(dataloader), path), end='') # Get detections img = torch.from_numpy(img).unsqueeze(0).to(device) @@ -83,12 +81,24 @@ def detect( print('Done. (%.3fs)' % (time.time() - t)) - if save_images: # Save generated image with detections - cv2.imwrite(save_path, im0) - if webcam: # Show live webcam cv2.imshow(weights, im0) + if save_images: # Save generated image with detections + if dataloader.mode == 'video': + if vid_path != save_path: # new video + vid_path = save_path + if isinstance(vid_writer, cv2.VideoWriter): + vid_writer.release() # release previous video writer + width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = vid_cap.get(cv2.CAP_PROP_FPS) + vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'avc1'), fps, (width, height)) + vid_writer.write(im0) + + else: + cv2.imwrite(save_path, im0) + if save_images and platform == 'darwin': # macos os.system('open ' + output + ' ' + save_path) @@ -96,10 +106,11 @@ def detect( if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path') + parser.add_argument('--data-cfg', type=str, default='cfg/coco.data', help='coco.data file path') parser.add_argument('--weights', type=str, default='weights/yolov3.weights', help='path to weights file') parser.add_argument('--images', type=str, default='data/samples', help='path to images') parser.add_argument('--img-size', type=int, default=32 * 13, help='size of each image dimension') - parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') + parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold') parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') opt = parser.parse_args() print(opt) @@ -107,6 +118,7 @@ if __name__ == '__main__': with torch.no_grad(): detect( opt.cfg, + opt.data_cfg, opt.weights, opt.images, img_size=opt.img_size, diff --git a/test.py b/test.py index 0a5a877a..4ed93b17 100644 --- a/test.py +++ b/test.py @@ -35,19 +35,19 @@ def test( if torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: - device = next(model.parameters()).device + device = next(model.parameters()).device # get model device # Configure run data_cfg = parse_data_cfg(data_cfg) test_path = data_cfg['valid'] - # if (os.sep + 'coco' + os.sep) in test_path: # COCO dataset probable - # save_json = True # use pycocotools + if (os.sep + 'coco' + os.sep) in test_path: # COCO dataset probable + save_json = True # use pycocotools # Dataloader dataset = LoadImagesAndLabels(test_path, img_size=img_size) dataloader = DataLoader(dataset, batch_size=batch_size, - num_workers=4, + num_workers=0, pin_memory=False, collate_fn=dataset.collate_fn) @@ -66,16 +66,16 @@ def test( # Per image for si, pred in enumerate(output): - image_id = int(Path(paths[si]).stem.split('_')[-1]) labels = targets[targets[:, 0] == si, 1:] + correct, detected, tcls = [], [], [] seen += 1 if pred is None: continue - if save_json: - # add to json pred dictionary + if save_json: # add to json pred dictionary # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... + image_id = int(Path(paths[si]).stem.split('_')[-1]) box = pred[:, :4].clone() # xyxy scale_coords(img_size, box, shapes[si]) # to original shape box = xyxy2xywh(box) # xywh @@ -88,42 +88,21 @@ def test( 'score': float(d[4]) }) - # if len(labels) > 0: - # # add to json targets dictionary - # # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], ... - # box = labels[:, 1:].clone() - # box[:, [0, 2]] *= shapes[si][1] # scale width - # box[:, [1, 3]] *= shapes[si][0] # scale height - # box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner - # for di, d in enumerate(labels): - # tdict.append({ - # 'segmentation': [[]], - # 'iscrowd': 0, - # 'image_id': image_id, - # 'category_id': coco91class[int(d[0])], - # 'id': seen, - # 'bbox': [float3(x) for x in box[di]], - # 'area': float3(box[di][2:4].prod()) - # }) - # If no labels add number of detections as incorrect - correct = [] - detected = [] if len(labels) == 0: - # correct.extend([0 for _ in range(len(detections))]) - continue + correct.extend([0] * len(pred)) else: # Extract target boxes as (x1, y1, x2, y2) - target_box = xywh2xyxy(labels[:, 1:5]) * img_size - target_cls = labels[:, 0] + tbox = xywh2xyxy(labels[:, 1:5]) * img_size + tcls = labels[:, 0].cpu() - for *pred_box, conf, cls_conf, cls_pred in pred: - if cls_pred not in target_cls: + for *pbox, pconf, pcls_conf, pcls in pred: + if pcls not in tcls: correct.append(0) continue # Best iou, index between pred and targets - iou, bi = bbox_iou(pred_box, target_box).max(0) + iou, bi = bbox_iou(pbox, tbox).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and bi not in detected: @@ -132,12 +111,8 @@ def test( else: correct.append(0) - # Convert to Numpy - tp = np.array(correct) - conf = pred[:, 4].cpu().numpy() - pred_cls = pred[:, 6].cpu().numpy() - target_cls = target_cls.cpu().numpy() - stats.append((tp, conf, pred_cls, target_cls)) + # Append Statistics (correct, conf, pcls, tcls) + stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) # Compute means stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))] diff --git a/train.py b/train.py index d0fbd837..58196e65 100644 --- a/train.py +++ b/train.py @@ -119,7 +119,7 @@ def train( plot_images = False if plot_images: fig = plt.figure(figsize=(10, 10)) - for ip in range(batch_size): + for ip in range(len(imgs)): boxes = xywh2xyxy(targets[targets[:, 0] == ip, 2:6]).numpy().T * img_size plt.subplot(4, 4, ip + 1).imshow(imgs[ip].numpy().transpose(1, 2, 0)) plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-') diff --git a/utils/datasets.py b/utils/datasets.py index b6397fbd..9e808e83 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -16,31 +16,61 @@ from utils.utils import xyxy2xywh class LoadImages: # for inference def __init__(self, path, img_size=416): - if os.path.isdir(path): - image_format = ['.jpg', '.jpeg', '.png', '.tif'] - self.files = sorted(glob.glob('%s/*.*' % path)) - self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files)) - elif os.path.isfile(path): - self.files = [path] - - self.nF = len(self.files) # number of image files self.height = img_size + img_formats = ['.jpg', '.jpeg', '.png', '.tif'] + vid_formats = ['.mov', '.avi', '.mp4'] - assert self.nF > 0, 'No images found in ' + path + files = [] + if os.path.isdir(path): + files = sorted(glob.glob('%s/*.*' % path)) + elif os.path.isfile(path): + files = [path] + + # self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in img_formats, files)) + images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats] + videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats] + self.files = images + videos + self.nI, self.nV = len(images), len(videos) + self.nF = self.nI + self.nV # number of files + self.video_flag = [False] * self.nI + [True] * self.nV + self.mode = 'images' + if any(videos): + self.new_video(videos[0]) # new video + else: + self.cap = None + assert self.nF > 0, 'No images or videos found in ' + path def __iter__(self): - self.count = -1 + self.count = 0 return self def __next__(self): - self.count += 1 if self.count == self.nF: raise StopIteration - img_path = self.files[self.count] + path = self.files[self.count] - # Read image - img0 = cv2.imread(img_path) # BGR - assert img0 is not None, 'File Not Found ' + img_path + if self.video_flag[self.count]: + self.mode = 'video' + ret_val, img0 = self.cap.read() + if not ret_val: + self.count += 1 + self.cap.release() + if self.count == self.nF: # last video + raise StopIteration + else: + path = self.files[self.count] + self.new_video(path) + ret_val, img0 = self.cap.read() + + self.frame += 1 + print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='') + + else: + # Read image + self.count += 1 + img0 = cv2.imread(path) # BGR + assert img0 is not None, 'File Not Found ' + path + print('image %g/%g %s: ' % (self.count, self.nF, path), end='') # Padded resize img, _, _, _ = letterbox(img0, height=self.height) @@ -50,8 +80,13 @@ class LoadImages: # for inference img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 - # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image - return img_path, img, img0 + # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image + return path, img, img0, self.cap + + def new_video(self, path): + self.frame = 0 + self.cap = cv2.VideoCapture(path) + self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) def __len__(self): return self.nF # number of files diff --git a/utils/utils.py b/utils/utils.py index d00b89d1..22561c3c 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -163,15 +163,18 @@ def ap_per_class(tp, conf, pred_cls, target_cls): # Recall recall_curve = tpc / (n_gt + 1e-16) - r.append(tpc[-1] / (n_gt + 1e-16)) + r.append(recall_curve[-1]) # Precision precision_curve = tpc / (tpc + fpc) - p.append(tpc[-1] / (tpc[-1] + fpc[-1])) + p.append(precision_curve[-1]) # AP from recall-precision curve ap.append(compute_ap(recall_curve, precision_curve)) + # Plot + # plt.plot(recall_curve, precision_curve) + return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p)