From 5c288ca970e16a3f7a0a9c3180726ac3cfca6c32 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 1 Aug 2019 00:08:28 +0200 Subject: [PATCH] updates --- detect.py | 11 +++++++++-- test.py | 5 +++-- utils/datasets.py | 10 ++++++---- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/detect.py b/detect.py index 6ef3a00f..05a91089 100644 --- a/detect.py +++ b/detect.py @@ -51,18 +51,24 @@ def detect(cfg, torch.onnx.export(model, img, 'weights/export.onnx', verbose=True) return + # Half precision + opt.half = opt.half and device.type != 'cpu' # half precision only supported on cuda + if opt.half: + model.half() + # Set Dataloader vid_path, vid_writer = None, None if webcam: save_images = False - dataloader = LoadWebcam(img_size=img_size) + dataloader = LoadWebcam(img_size=img_size, half=opt.half) else: - dataloader = LoadImages(images, img_size=img_size) + dataloader = LoadImages(images, img_size=img_size, half=opt.half) # Get classes and colors classes = load_classes(parse_data_cfg(data)['names']) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] + # Run inference for i, (path, img, im0, vid_cap) in enumerate(dataloader): t = time.time() save_path = str(Path(output) / Path(path).name) @@ -129,6 +135,7 @@ if __name__ == '__main__': parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') parser.add_argument('--fourcc', type=str, default='mp4v', help='fourcc output video codec (verify ffmpeg support)') parser.add_argument('--output', type=str, default='output', help='specifies the output path for images and videos') + parser.add_argument('--half', action='store_true', help='half precision FP16 inference') opt = parser.parse_args() print(opt) diff --git a/test.py b/test.py index c16790e0..843f077b 100644 --- a/test.py +++ b/test.py @@ -6,6 +6,7 @@ from torch.utils.data import DataLoader from models import * from utils.datasets import * from utils.utils import * +from utils import nms def test(cfg, @@ -75,7 +76,7 @@ def test(cfg, loss += compute_loss(train_out, targets, model)[0].item() # Run NMS - output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) + output = nms.multiprocess_nms(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) # Statistics per image for si, pred in enumerate(output): @@ -191,7 +192,7 @@ def test(cfg, if __name__ == '__main__': parser = argparse.ArgumentParser(prog='test.py') - parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch') + parser.add_argument('--batch-size', type=int, default=4, help='size of each image batch') parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path') parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file') diff --git a/utils/datasets.py b/utils/datasets.py index f35671ba..02525c99 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -39,7 +39,7 @@ def exif_size(img): class LoadImages: # for inference - def __init__(self, path, img_size=416): + def __init__(self, path, img_size=416, half=False): path = str(Path(path)) # os-agnostic files = [] if os.path.isdir(path): @@ -56,6 +56,7 @@ class LoadImages: # for inference self.nF = nI + nV # number of files self.video_flag = [False] * nI + [True] * nV self.mode = 'images' + self.half = half # half precision fp16 images if any(videos): self.new_video(videos[0]) # new video else: @@ -100,7 +101,7 @@ class LoadImages: # for inference # Normalize RGB img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB - img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 + img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32) # uint8 to fp16/fp32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image @@ -116,8 +117,9 @@ class LoadImages: # for inference class LoadWebcam: # for inference - def __init__(self, img_size=416): + def __init__(self, img_size=416, half=False): self.img_size = img_size + self.half = half # half precision fp16 images self.cam = cv2.VideoCapture(0) # local camera # self.cam = cv2.VideoCapture('rtsp://192.168.1.64/1') # IP camera # self.cam = cv2.VideoCapture('rtsp://username:password@192.168.1.64/1') # IP camera with login @@ -144,7 +146,7 @@ class LoadWebcam: # for inference # Normalize RGB img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB - img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 + img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32) # uint8 to fp16/fp32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 return img_path, img, img0, None