From 5c288ca970e16a3f7a0a9c3180726ac3cfca6c32 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 1 Aug 2019 00:08:28 +0200
Subject: [PATCH] updates

---
 detect.py         | 11 +++++++++--
 test.py           |  5 +++--
 utils/datasets.py | 10 ++++++----
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/detect.py b/detect.py
index 6ef3a00f..05a91089 100644
--- a/detect.py
+++ b/detect.py
@@ -51,18 +51,24 @@ def detect(cfg,
         torch.onnx.export(model, img, 'weights/export.onnx', verbose=True)
         return
 
+    # Half precision
+    opt.half = opt.half and device.type != 'cpu'  # half precision only supported on cuda
+    if opt.half:
+        model.half()
+
     # Set Dataloader
     vid_path, vid_writer = None, None
     if webcam:
         save_images = False
-        dataloader = LoadWebcam(img_size=img_size)
+        dataloader = LoadWebcam(img_size=img_size, half=opt.half)
     else:
-        dataloader = LoadImages(images, img_size=img_size)
+        dataloader = LoadImages(images, img_size=img_size, half=opt.half)
 
     # Get classes and colors
     classes = load_classes(parse_data_cfg(data)['names'])
     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
 
+    # Run inference
     for i, (path, img, im0, vid_cap) in enumerate(dataloader):
         t = time.time()
         save_path = str(Path(output) / Path(path).name)
@@ -129,6 +135,7 @@ if __name__ == '__main__':
     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
     parser.add_argument('--fourcc', type=str, default='mp4v', help='fourcc output video codec (verify ffmpeg support)')
     parser.add_argument('--output', type=str, default='output', help='specifies the output path for images and videos')
+    parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
     opt = parser.parse_args()
     print(opt)
 
diff --git a/test.py b/test.py
index c16790e0..843f077b 100644
--- a/test.py
+++ b/test.py
@@ -6,6 +6,7 @@ from torch.utils.data import DataLoader
 from models import *
 from utils.datasets import *
 from utils.utils import *
+from utils import nms
 
 
 def test(cfg,
@@ -75,7 +76,7 @@ def test(cfg,
             loss += compute_loss(train_out, targets, model)[0].item()
 
         # Run NMS
-        output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
+        output = nms.multiprocess_nms(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
 
         # Statistics per image
         for si, pred in enumerate(output):
@@ -191,7 +192,7 @@ def test(cfg,
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(prog='test.py')
-    parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
+    parser.add_argument('--batch-size', type=int, default=4, help='size of each image batch')
     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
     parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
     parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
diff --git a/utils/datasets.py b/utils/datasets.py
index f35671ba..02525c99 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -39,7 +39,7 @@ def exif_size(img):
 
 
 class LoadImages:  # for inference
-    def __init__(self, path, img_size=416):
+    def __init__(self, path, img_size=416, half=False):
         path = str(Path(path))  # os-agnostic
         files = []
         if os.path.isdir(path):
@@ -56,6 +56,7 @@ class LoadImages:  # for inference
         self.nF = nI + nV  # number of files
         self.video_flag = [False] * nI + [True] * nV
         self.mode = 'images'
+        self.half = half  # half precision fp16 images
         if any(videos):
             self.new_video(videos[0])  # new video
         else:
@@ -100,7 +101,7 @@ class LoadImages:  # for inference
 
         # Normalize RGB
         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
-        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+        img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32)  # uint8 to fp16/fp32
         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 
         # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
@@ -116,8 +117,9 @@ class LoadImages:  # for inference
 
 
 class LoadWebcam:  # for inference
-    def __init__(self, img_size=416):
+    def __init__(self, img_size=416, half=False):
         self.img_size = img_size
+        self.half = half  # half precision fp16 images
         self.cam = cv2.VideoCapture(0)  # local camera
         # self.cam = cv2.VideoCapture('rtsp://192.168.1.64/1')  # IP camera
         # self.cam = cv2.VideoCapture('rtsp://username:password@192.168.1.64/1')  # IP camera with login
@@ -144,7 +146,7 @@ class LoadWebcam:  # for inference
 
         # Normalize RGB
         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
-        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
+        img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32)  # uint8 to fp16/fp32
         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 
         return img_path, img, img0, None