updates

2020-02-22 18:18:38 -08:00 · 2020-02-22 18:18:38 -08:00 · bc741f30e8
parent 7608047531
commit bc741f30e8
2 changed files with 13 additions and 20 deletions
--- a/detect.py
+++ b/detect.py
@ -64,10 +64,10 @@ def detect(save_img=False):
    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
-        dataset = LoadStreams(source, img_size=img_size, half=half)
+        dataset = LoadStreams(source, img_size=img_size)
    else:
        save_img = True
-        dataset = LoadImages(source, img_size=img_size, half=half)
+        dataset = LoadImages(source, img_size=img_size)

    # Get names and colors
    names = load_classes(opt.names)
@ -77,15 +77,14 @@ def detect(save_img=False):
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        t = time.time()
-
-        # Get detections
        img = torch.from_numpy(img).to(device)
+        img = img.half() if half else img.float()  # uint8 to fp16/32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
-        pred = model(img)[0]

-        if opt.half:
-            pred = pred.float()
+        # Inference
+        pred = model(img)[0].float() if half else model(img)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
--- a/utils/datasets.py
+++ b/utils/datasets.py
@ -42,7 +42,7 @@ def exif_size(img):


 class LoadImages:  # for inference
-    def __init__(self, path, img_size=416, half=False):
+    def __init__(self, path, img_size=416):
        path = str(Path(path))  # os-agnostic
        files = []
        if os.path.isdir(path):
@ -59,7 +59,6 @@ class LoadImages:  # for inference
        self.nF = nI + nV  # number of files
        self.video_flag = [False] * nI + [True] * nV
        self.mode = 'images'
-        self.half = half  # half precision fp16 images
        if any(videos):
            self.new_video(videos[0])  # new video
        else:
@ -104,8 +103,7 @@ class LoadImages:  # for inference

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
-        img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32)  # uint8 to fp16/fp32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        img = np.ascontiguousarray(img)

        # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
        return path, img, img0, self.cap
@ -120,9 +118,8 @@ class LoadImages:  # for inference


 class LoadWebcam:  # for inference
-    def __init__(self, pipe=0, img_size=416, half=False):
+    def __init__(self, pipe=0, img_size=416):
        self.img_size = img_size
-        self.half = half  # half precision fp16 images

        if pipe == '0':
            pipe = 0  # local camera
@ -177,8 +174,7 @@ class LoadWebcam:  # for inference

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
-        img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32)  # uint8 to fp16/fp32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        img = np.ascontiguousarray(img)

        return img_path, img, img0, None

@ -187,10 +183,9 @@ class LoadWebcam:  # for inference


 class LoadStreams:  # multiple IP or RTSP cameras
-    def __init__(self, sources='streams.txt', img_size=416, half=False):
+    def __init__(self, sources='streams.txt', img_size=416):
        self.mode = 'images'
        self.img_size = img_size
-        self.half = half  # half precision fp16 images

        if os.path.isfile(sources):
            with open(sources, 'r') as f:
@ -251,9 +246,8 @@ class LoadStreams:  # multiple IP or RTSP cameras
        img = np.stack(img, 0)

        # Convert
-        img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to 3x416x416, uint8 to float32
-        img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32)
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to bsx3x416x416
+        img = np.ascontiguousarray(img)

        return self.sources, img, img0, None