updates

2019-02-08 22:43:05 +01:00 · 2019-02-08 22:43:05 +01:00 · c2436d8197
parent d6abdaf8d0
commit c2436d8197
7 changed files with 107 additions and 161 deletions
--- a/detect.py
+++ b/detect.py
@ -9,53 +9,48 @@ from utils import torch_utils


 def detect(
-        net_config_path,
-        data_config_path,
-        weights_path,
+        cfg,
+        weights,
        images_path,
        output='output',
-        batch_size=16,
        img_size=416,
        conf_thres=0.3,
        nms_thres=0.45,
        save_txt=False,
-        save_images=False,
+        save_images=True,
 ):
    device = torch_utils.select_device()
-    print("Using device: \"{}\"".format(device))

    os.system('rm -rf ' + output)
    os.makedirs(output, exist_ok=True)

-    data_config = parse_data_config(data_config_path)
-
    # Load model
-    model = Darknet(net_config_path, img_size)
+    model = Darknet(cfg, img_size)

-    if weights_path.endswith('.pt'):  # pytorch format
-        if weights_path.endswith('weights/yolov3.pt') and not os.path.isfile(weights_path):
-            os.system('wget https://storage.googleapis.com/ultralytics/yolov3.pt -O ' + weights_path)
-        checkpoint = torch.load(weights_path, map_location='cpu')
+    if weights.endswith('.pt'):  # pytorch format
+        if weights.endswith('weights/yolov3.pt') and not os.path.isfile(weights):
+            os.system('wget https://storage.googleapis.com/ultralytics/yolov3.pt -O ' + weights)
+        checkpoint = torch.load(weights, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        del checkpoint
    else:  # darknet format
-        load_darknet_weights(model, weights_path)
+        load_darknet_weights(model, weights)

    model.to(device).eval()

    # Set Dataloader
-    classes = load_classes(data_config['names'])  # Extracts class labels from file
-    dataloader = load_images(images_path, batch_size=batch_size, img_size=img_size)
+    dataloader = load_images(images_path, img_size=img_size)

-    imgs = []  # Stores image paths
-    img_detections = []  # Stores detections for each image index
-    prev_time = time.time()
-    for i, (img_paths, img) in enumerate(dataloader):
-        print('%g/%g' % (i + 1, len(dataloader)), end=' ')
+    # Classes and colors
+    classes = load_classes(parse_data_cfg('cfg/coco.data')['names'])  # Extracts class labels from file
+    color_list = [[random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)] for _ in range(len(classes))]
+
+    for i, (path, img, img0) in enumerate(dataloader):
+        print('image %g/%g: %s' % (i + 1, len(dataloader), path))
+        t = time.time()

        # Get detections
        with torch.no_grad():
-            # cv2.imwrite('zidane_416.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # letterboxed
            img = torch.from_numpy(img).unsqueeze(0).to(device)
            if ONNX_EXPORT:
                pred = torch.onnx._export(model, img, 'weights/model.onnx', verbose=True)
@ -64,71 +59,58 @@ def detect(
            pred = pred[pred[:, :, 4] > conf_thres]

            if len(pred) > 0:
-                detections = non_max_suppression(pred.unsqueeze(0), conf_thres, nms_thres)
-                img_detections.extend(detections)
-                imgs.extend(img_paths)
+                detections = non_max_suppression(pred.unsqueeze(0), conf_thres, nms_thres)[0]

-        print('Batch %d... Done. (%.3fs)' % (i, time.time() - prev_time))
-        prev_time = time.time()
+            # Draw bounding boxes and labels of detections
+            if detections is not None:
+                img = img0

-    # Bounding-box colors
-    color_list = [[random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)] for _ in range(len(classes))]
+                # The amount of padding that was added
+                pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))
+                pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))
+                # Image height and width after padding is removed
+                unpad_h = img_size - pad_y
+                unpad_w = img_size - pad_x

-    if len(img_detections) == 0:
-        return
+                unique_classes = detections[:, -1].cpu().unique()
+                bbox_colors = random.sample(color_list, len(unique_classes))

-    # Iterate through images and save plot of detections
-    for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
-        print("image %g: '%s'" % (img_i, path))
+                # write results to .txt file
+                results_img_path = os.path.join(output, path.split('/')[-1])
+                results_txt_path = results_img_path + '.txt'
+                if os.path.isfile(results_txt_path):
+                    os.remove(results_txt_path)

-        # Draw bounding boxes and labels of detections
-        if detections is not None:
-            img = cv2.imread(path)
+                for i in unique_classes:
+                    n = (detections[:, -1].cpu() == i).sum()
+                    print('%g %ss' % (n, classes[int(i)]))

-            # The amount of padding that was added
-            pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))
-            pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))
-            # Image height and width after padding is removed
-            unpad_h = img_size - pad_y
-            unpad_w = img_size - pad_x
+                for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
+                    # Rescale coordinates to original dimensions
+                    box_h = ((y2 - y1) / unpad_h) * img.shape[0]
+                    box_w = ((x2 - x1) / unpad_w) * img.shape[1]
+                    y1 = (((y1 - pad_y // 2) / unpad_h) * img.shape[0]).round().item()
+                    x1 = (((x1 - pad_x // 2) / unpad_w) * img.shape[1]).round().item()
+                    x2 = (x1 + box_w).round().item()
+                    y2 = (y1 + box_h).round().item()
+                    x1, y1, x2, y2 = max(x1, 0), max(y1, 0), max(x2, 0), max(y2, 0)

-            unique_classes = detections[:, -1].cpu().unique()
-            bbox_colors = random.sample(color_list, len(unique_classes))
+                    # write to file
+                    if save_txt:
+                        with open(results_txt_path, 'a') as file:
+                            file.write(('%g %g %g %g %g %g \n') % (x1, y1, x2, y2, cls_pred, cls_conf * conf))

-            # write results to .txt file
-            results_img_path = os.path.join(output, path.split('/')[-1])
-            results_txt_path = results_img_path + '.txt'
-            if os.path.isfile(results_txt_path):
-                os.remove(results_txt_path)
-
-            for i in unique_classes:
-                n = (detections[:, -1].cpu() == i).sum()
-                print('%g %ss' % (n, classes[int(i)]))
-
-            for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
-                # Rescale coordinates to original dimensions
-                box_h = ((y2 - y1) / unpad_h) * img.shape[0]
-                box_w = ((x2 - x1) / unpad_w) * img.shape[1]
-                y1 = (((y1 - pad_y // 2) / unpad_h) * img.shape[0]).round().item()
-                x1 = (((x1 - pad_x // 2) / unpad_w) * img.shape[1]).round().item()
-                x2 = (x1 + box_w).round().item()
-                y2 = (y1 + box_h).round().item()
-                x1, y1, x2, y2 = max(x1, 0), max(y1, 0), max(x2, 0), max(y2, 0)
-
-                # write to file
-                if save_txt:
-                    with open(results_txt_path, 'a') as file:
-                        file.write(('%g %g %g %g %g %g \n') % (x1, y1, x2, y2, cls_pred, cls_conf * conf))
+                    if save_images:
+                        # Add the bbox to the plot
+                        label = '%s %.2f' % (classes[int(cls_pred)], conf)
+                        color = bbox_colors[int(np.where(unique_classes == int(cls_pred))[0])]
+                        plot_one_box([x1, y1, x2, y2], img, label=label, color=color)

                if save_images:
-                    # Add the bbox to the plot
-                    label = '%s %.2f' % (classes[int(cls_pred)], conf)
-                    color = bbox_colors[int(np.where(unique_classes == int(cls_pred))[0])]
-                    plot_one_box([x1, y1, x2, y2], img, label=label, color=color)
+                    # Save generated image with detections
+                    cv2.imwrite(results_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'), img)

-            if save_images:
-                # Save generated image with detections
-                cv2.imwrite(results_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'), img)
+        print('Done. (%.3fs)\n' % (time.time() - t))

    if platform == 'darwin':  # MacOS (local)
        os.system('open ' + output)
@ -138,32 +120,20 @@ if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--image-folder', type=str, default='data/samples', help='path to images')
    parser.add_argument('--output-folder', type=str, default='output', help='path to outputs')
-    parser.add_argument('--plot-flag', type=bool, default=True)
-    parser.add_argument('--txt-out', type=bool, default=False)
    parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
-    parser.add_argument('--data-config', type=str, default='cfg/coco.data', help='path to data config file')
    parser.add_argument('--weights', type=str, default='weights/yolov3.pt', help='path to weights file')
    parser.add_argument('--conf-thres', type=float, default=0.50, help='object confidence threshold')
    parser.add_argument('--nms-thres', type=float, default=0.45, help='iou threshold for non-maximum suppression')
-    parser.add_argument('--batch-size', type=int, default=1, help='size of the batches')
    parser.add_argument('--img-size', type=int, default=32 * 13, help='size of each image dimension')
    opt = parser.parse_args()
    print(opt)

-    torch.cuda.empty_cache()
-
-    init_seeds()
-
    detect(
        opt.cfg,
-        opt.data_config,
        opt.weights,
        opt.image_folder,
        output=opt.output_folder,
-        batch_size=opt.batch_size,
        img_size=opt.img_size,
        conf_thres=opt.conf_thres,
        nms_thres=opt.nms_thres,
-        save_txt=opt.txt_out,
-        save_images=opt.plot_flag,
    )
--- a/models.py
+++ b/models.py
@ -334,17 +334,17 @@ class Darknet(nn.Module):
        return sum(output) if is_training else torch.cat(output, 1)


-def load_darknet_weights(self, weights_path, cutoff=-1):
-    # Parses and loads the weights stored in 'weights_path'
+def load_darknet_weights(self, weights, cutoff=-1):
+    # Parses and loads the weights stored in 'weights'
    # cutoff: save layers between 0 and cutoff (if cutoff = -1 all are saved)
-    weights_file = weights_path.split(os.sep)[-1]
+    weights_file = weights.split(os.sep)[-1]

    # Try to download weights if not available locally
-    if not os.path.isfile(weights_path):
+    if not os.path.isfile(weights):
        try:
-            os.system('wget https://pjreddie.com/media/files/' + weights_file + ' -P ' + weights_path)
+            os.system('wget https://pjreddie.com/media/files/' + weights_file + ' -P ' + weights)
        except:
-            assert os.path.isfile(weights_path)
+            assert os.path.isfile(weights)

    # Establish cutoffs
    if weights_file == 'darknet53.conv.74':
@ -353,7 +353,7 @@ def load_darknet_weights(self, weights_path, cutoff=-1):
        cutoff = 16

    # Open the weights file
-    fp = open(weights_path, 'rb')
+    fp = open(weights, 'rb')
    header = np.fromfile(fp, dtype=np.int32, count=5)  # First five are header values

    # Needed to write header when saving weights
--- a/test.py
+++ b/test.py
@ -8,34 +8,32 @@ from utils import torch_utils


 def test(
-        net_config_path,
-        data_config_path,
-        weights_path,
+        cfg,
+        data_cfg,
+        weights,
        batch_size=16,
        img_size=416,
        iou_thres=0.5,
        conf_thres=0.3,
        nms_thres=0.45,
-        n_cpus=0,
 ):
    device = torch_utils.select_device()
-    print("Using device: \"{}\"".format(device))

    # Configure run
-    data_config = parse_data_config(data_config_path)
-    nC = int(data_config['classes'])  # number of classes (80 for COCO)
-    test_path = data_config['valid']
+    data_cfg = parse_data_cfg(data_cfg)
+    nC = int(data_cfg['classes'])  # number of classes (80 for COCO)
+    test_path = data_cfg['valid']

    # Initiate model
-    model = Darknet(net_config_path, img_size)
+    model = Darknet(cfg, img_size)

    # Load weights
-    if weights_path.endswith('.pt'):  # pytorch format
-        checkpoint = torch.load(weights_path, map_location='cpu')
+    if weights.endswith('.pt'):  # pytorch format
+        checkpoint = torch.load(weights, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        del checkpoint
    else:  # darknet format
-        load_darknet_weights(model, weights_path)
+        load_darknet_weights(model, weights)

    model.to(device).eval()

@ -118,7 +116,7 @@ def test(
    # Print mAP per class
    print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP') + '\n\nmAP Per Class:')

-    classes = load_classes(data_config['names'])  # Extracts class labels from file
+    classes = load_classes(data_cfg['names'])  # Extracts class labels from file
    for i, c in enumerate(classes):
        print('%15s: %-.4f' % (c, AP_accum[i] / AP_accum_count[i]))

@ -130,12 +128,11 @@ if __name__ == '__main__':
    parser = argparse.ArgumentParser(prog='test.py')
    parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
    parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='path to model config file')
-    parser.add_argument('--data-config', type=str, default='cfg/coco.data', help='path to data config file')
+    parser.add_argument('--data-cfg', type=str, default='cfg/coco.data', help='path to data config file')
    parser.add_argument('--weights', type=str, default='weights/yolov3.pt', help='path to weights file')
    parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
    parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
    parser.add_argument('--nms-thres', type=float, default=0.45, help='iou threshold for non-maximum suppression')
-    parser.add_argument('--n-cpus', type=int, default=0, help='number of cpu threads to use during batch generation')
    parser.add_argument('--img-size', type=int, default=416, help='size of each image dimension')
    opt = parser.parse_args()
    print(opt, end='\n\n')
@ -144,12 +141,11 @@ if __name__ == '__main__':

    mAP = test(
        opt.cfg,
-        opt.data_config,
+        opt.data_cfg,
        opt.weights,
        batch_size=opt.batch_size,
        img_size=opt.img_size,
        iou_thres=opt.iou_thres,
        conf_thres=opt.conf_thres,
        nms_thres=opt.nms_thres,
-        n_cpus=opt.n_cpus,
    )
--- a/train.py
+++ b/train.py
@ -12,38 +12,37 @@ import test


 def train(
-        net_config_path,
-        data_config_path,
+        cfg,
+        data_cfg,
        img_size=416,
        resume=False,
        epochs=100,
        batch_size=16,
        accumulated_batches=1,
-        weights_path='weights',
+        weights='weights',
        report=False,
        multi_scale=False,
        freeze_backbone=True,
        var=0,
 ):
    device = torch_utils.select_device()
-    print("Using device: \"{}\"".format(device))

    if multi_scale:  # pass maximum multi_scale size
        img_size = 608
    else:
        torch.backends.cudnn.benchmark = True

-    os.makedirs(weights_path, exist_ok=True)
-    latest_weights_file = os.path.join(weights_path, 'latest.pt')
-    best_weights_file = os.path.join(weights_path, 'best.pt')
+    os.makedirs(weights, exist_ok=True)
+    latest_weights_file = os.path.join(weights, 'latest.pt')
+    best_weights_file = os.path.join(weights, 'best.pt')

    # Configure run
-    data_config = parse_data_config(data_config_path)
-    num_classes = int(data_config['classes'])
-    train_path = data_config['train']
+    data_cfg = parse_data_cfg(data_cfg)
+    num_classes = int(data_cfg['classes'])
+    train_path = data_cfg['train']

    # Initialize model
-    model = Darknet(net_config_path, img_size)
+    model = Darknet(cfg, img_size)

    # Get dataloader
    dataloader = load_images_and_labels(train_path, batch_size=batch_size, img_size=img_size,
@ -80,7 +79,7 @@ def train(
        best_loss = float('inf')

        # Initialize model with darknet53 weights (optional)
-        load_darknet_weights(model, os.path.join(weights_path, 'darknet53.conv.74'))
+        load_darknet_weights(model, os.path.join(weights, 'darknet53.conv.74'))

        if torch.cuda.device_count() > 1:
            raise Exception('Multi-GPU not currently supported: https://github.com/ultralytics/yolov3/issues/21')
@ -191,24 +190,16 @@ def train(

        # Save best checkpoint
        if best_loss == loss_per_target:
-            os.system('cp {} {}'.format(
-                latest_weights_file,
-                best_weights_file,
-            ))
+            os.system('cp ' + latest_weights_file + ' ' + best_weights_file)

        # Save backup weights every 5 epochs
        if (epoch > 0) & (epoch % 5 == 0):
-            backup_file_name = 'backup{}.pt'.format(epoch)
-            backup_file_path = os.path.join(weights_path, backup_file_name)
-            os.system('cp {} {}'.format(
-                latest_weights_file,
-                backup_file_path,
-            ))
+            os.system('cp ' + latest_weights_file + ' ' + os.path.join(weights, 'backup{}.pt'.format(epoch)))

        # Calculate mAP
        mAP, R, P = test.test(
-            net_config_path,
-            data_config_path,
+            cfg,
+            data_cfg,
            latest_weights_file,
            batch_size=batch_size,
            img_size=img_size,
@ -224,11 +215,11 @@ if __name__ == '__main__':
    parser.add_argument('--epochs', type=int, default=100, help='number of epochs')
    parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
    parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step')
-    parser.add_argument('--data-config', type=str, default='cfg/coco.data', help='path to data config file')
+    parser.add_argument('--data-cfg', type=str, default='cfg/coco.data', help='path to data config file')
    parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
    parser.add_argument('--multi-scale', action='store_true', help='random image sizes per batch 320 - 608')
    parser.add_argument('--img-size', type=int, default=32 * 13, help='pixels')
-    parser.add_argument('--weights-path', type=str, default='weights', help='path to store weights')
+    parser.add_argument('--weights', type=str, default='weights', help='path to store weights')
    parser.add_argument('--resume', action='store_true', help='resume training flag')
    parser.add_argument('--report', action='store_true', help='report TP, FP, FN, P and R per batch (slower)')
    parser.add_argument('--freeze', action='store_true', help='freeze darknet53.conv.74 layers for first epoch')
@ -241,13 +232,13 @@ if __name__ == '__main__':
    torch.cuda.empty_cache()
    train(
        opt.cfg,
-        opt.data_config,
+        opt.data_cfg,
        img_size=opt.img_size,
        resume=opt.resume,
        epochs=opt.epochs,
        batch_size=opt.batch_size,
        accumulated_batches=opt.accumulated_batches,
-        weights_path=opt.weights_path,
+        weights=opt.weights,
        report=opt.report,
        multi_scale=opt.multi_scale,
        freeze_backbone=opt.freeze,
--- a/utils/datasets.py
+++ b/utils/datasets.py
@ -13,7 +13,7 @@ from utils.utils import xyxy2xywh


 class load_images():  # for inference
-    def __init__(self, path, batch_size=1, img_size=416):
+    def __init__(self, path, img_size=416):
        if os.path.isdir(path):
            image_format = ['.jpg', '.jpeg', '.png', '.tif']
            self.files = sorted(glob.glob('%s/*.*' % path))
@ -22,43 +22,37 @@ class load_images():  # for inference
            self.files = [path]

        self.nF = len(self.files)  # number of image files
-        self.nB = math.ceil(self.nF / batch_size)  # number of batches
-        self.batch_size = batch_size
        self.height = img_size

        assert self.nF > 0, 'No images found in path %s' % path

-        # RGB normalization values
-        # self.rgb_mean = np.array([60.134, 49.697, 40.746], dtype=np.float32).reshape((3, 1, 1))
-        # self.rgb_std = np.array([29.99, 24.498, 22.046], dtype=np.float32).reshape((3, 1, 1))
-
    def __iter__(self):
        self.count = -1
        return self

    def __next__(self):
        self.count += 1
-        if self.count == self.nB:
+        if self.count == self.nF:
            raise StopIteration
        img_path = self.files[self.count]

        # Read image
-        img = cv2.imread(img_path)  # BGR
+        img0 = cv2.imread(img_path)  # BGR
+        assert img0 is not None, 'Failed to load ' + img_path

        # Padded resize
-        img, _, _, _ = resize_square(img, height=self.height, color=(127.5, 127.5, 127.5))
+        img, _, _, _ = resize_square(img0, height=self.height, color=(127.5, 127.5, 127.5))

        # Normalize RGB
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img, dtype=np.float32)
-        # img -= self.rgb_mean
-        # img /= self.rgb_std
        img /= 255.0

-        return [img_path], img
+        # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
+        return img_path, img, img0

    def __len__(self):
-        return self.nB  # number of batches
+        return self.nF  # number of files


 class load_images_and_labels():  # for training
@ -81,10 +75,6 @@ class load_images_and_labels():  # for training

        assert self.nB > 0, 'No images found in path %s' % path

-        # RGB normalization values
-        # self.rgb_mean = np.array([60.134, 49.697, 40.746], dtype=np.float32).reshape((1, 3, 1, 1))
-        # self.rgb_std = np.array([29.99, 24.498, 22.046], dtype=np.float32).reshape((1, 3, 1, 1))
-
    def __iter__(self):
        self.count = -1
        self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
@ -191,8 +181,6 @@ class load_images_and_labels():  # for training
        # Normalize
        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
-        # img_all -= self.rgb_mean
-        # img_all /= self.rgb_std
        img_all /= 255.0

        return torch.from_numpy(img_all), labels_all
--- a/utils/parse_config.py
+++ b/utils/parse_config.py
@ -20,7 +20,7 @@ def parse_model_config(path):

    return module_defs

-def parse_data_config(path):
+def parse_data_cfg(path):
    """Parses the data configuration file"""
    options = dict()
    options['gpus'] = '0,1,2,3'
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@ -21,4 +21,5 @@ def select_device(force_cpu=False):
        device = torch.device('cpu')
    else:
        device = torch.device('cuda:0' if CUDA_AVAILABLE else 'cpu')
+    print('Using ' + str(device) + '\n')
    return device