webcam updates

2019-02-11 18:15:51 +01:00 · 2019-02-11 18:15:51 +01:00 · e23b1a3d73
parent 585f2e2cc1
commit e23b1a3d73
7 changed files with 29 additions and 233 deletions
--- a/detect.py
+++ b/detect.py
@ -50,13 +50,16 @@ def detect(

    for i, (path, img, im0) in enumerate(dataloader):
        t = time.time()
-        print("%g/%g '%s': " % (i + 1, len(dataloader), path if not webcam else 'webcam'), end='')
+        if webcam:
+            print('webcam frame %g: ' % (i + 1), end='')
+        else:
+            print('image %g/%g %s: ' % (i + 1, len(dataloader), path), end='')
        save_path = os.path.join(output, path.split('/')[-1])

        # Get detections
        img = torch.from_numpy(img).unsqueeze(0).to(device)
        if ONNX_EXPORT:
-            torch.onnx._export(model, img, 'weights/model.onnx', verbose=True)
+            torch.onnx.export(model, img, 'weights/model.onnx', verbose=True)
            return  # ONNX export
        pred = model(img)
        pred = pred[pred[:, :, 4] > conf_thres]  # remove boxes < threshold
@ -70,9 +73,9 @@ def detect(

            # Print results to screen
            unique_classes = detections[:, -1].cpu().unique()
-            for i in unique_classes:
-                n = (detections[:, -1].cpu() == i).sum()
-                print('%g %ss' % (n, classes[int(i)]), end=', ')
+            for c in unique_classes:
+                n = (detections[:, -1].cpu() == c).sum()
+                print('%g %ss' % (n, classes[int(c)]), end=', ')

            # Draw bounding boxes and labels of detections
            for x1, y1, x2, y2, conf, cls_conf, cls in detections:
--- a/models.py
+++ b/models.py
@ -82,6 +82,9 @@ class EmptyLayer(nn.Module):
    def __init__(self):
        super(EmptyLayer, self).__init__()

+    def forward(self, x):
+        return x
+

 class Upsample(nn.Module):
    # Custom Upsample layer (nn.Upsample gives deprecated warning message)
@ -121,8 +124,8 @@ class YOLOLayer(nn.Module):

        # Build anchor grids
        nG = int(self.img_dim / stride)  # number grid points
-        self.grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).float()
-        self.grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).float()
+        self.grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float()
+        self.grid_y = torch.arange(nG).repeat((nG, 1)).t().view((1, 1, nG, nG)).float()
        self.anchor_wh = torch.FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors])  # scale anchors
        self.anchor_w = self.anchor_wh[:, 0].view((1, nA, 1, 1))
        self.anchor_h = self.anchor_wh[:, 1].view((1, nA, 1, 1))
@ -169,8 +172,8 @@ class YOLOLayer(nn.Module):
            # Width and height (yolo method)
            w = p[..., 2]  # Width
            h = p[..., 3]  # Height
-            width = torch.exp(w.data) * self.anchor_w
-            height = torch.exp(h.data) * self.anchor_h
+            # width = torch.exp(w.data) * self.anchor_w
+            # height = torch.exp(h.data) * self.anchor_h

            # Width and height (power method)
            # w = torch.sigmoid(p[..., 2])  # Width
@ -217,8 +220,8 @@ class YOLOLayer(nn.Module):

                # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py
                # p_cls = F.softmax(p_cls, 2) * p_conf  # SSD-like conf
-                p_cls = torch.exp(p_cls).permute(2, 1, 0)
-                p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute(2, 1, 0)  # F.softmax() equivalent
+                p_cls = torch.exp(p_cls).permute((2, 1, 0))
+                p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0))  # F.softmax() equivalent
                p_cls = p_cls.permute(2, 1, 0)

                return torch.cat((xy / nG, width_height, p_conf, p_cls), 2).squeeze().t()
@ -246,6 +249,7 @@ class Darknet(nn.Module):
        self.hyperparams, self.module_list = create_modules(self.module_defs)
        self.img_size = img_size
        self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT']
+        self.losses = []

    def forward(self, x, targets=None, var=0):
        self.losses = defaultdict(float)
@ -296,8 +300,8 @@ def load_darknet_weights(self, weights, cutoff=-1):
    if not os.path.isfile(weights):
        try:
            os.system('wget https://pjreddie.com/media/files/' + weights_file + ' -P ' + weights)
-        except:
-            assert os.path.isfile(weights)
+        except IOError:
+            print(weights + ' not found')

    # Establish cutoffs
    if weights_file == 'darknet53.conv.74':
--- a/train.py
+++ b/train.py
@ -36,7 +36,6 @@ def train(

    # Configure run
    data_cfg = parse_data_cfg(data_cfg)
-    num_classes = int(data_cfg['classes'])
    train_path = data_cfg['train']

    # Initialize model
@ -62,7 +61,7 @@ def train(
        #         p.requires_grad = False

        # Set optimizer
-        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr0, momentum=.9)
+        optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9)

        start_epoch = checkpoint['epoch'] + 1
        if checkpoint['optimizer'] is not None:
@ -85,7 +84,7 @@ def train(
        model.to(device).train()

        # Set optimizer
-        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr0, momentum=.9)
+        optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9)

    # Set scheduler
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[54, 61], gamma=0.1)
--- a/utils/datasets.py
+++ b/utils/datasets.py
@ -58,7 +58,6 @@ class LoadImages:  # for inference
 class LoadWebcam:  # for inference
    def __init__(self, img_size=416):
        self.cam = cv2.VideoCapture(0)
-        self.nF = 9999  # number of image files
        self.height = img_size

    def __iter__(self):
@ -88,7 +87,7 @@ class LoadWebcam:  # for inference
        return img_path, img, img0

    def __len__(self):
-        return self.nF  # number of files
+        return 0


 class LoadImagesAndLabels:  # for training
--- a/utils/onnx2coreml.py
+++ b/utils/onnx2coreml.py
@ -1,208 +0,0 @@
-import os
-import onnx
-from onnx import onnx_pb
-from onnx_coreml import convert
-import glob
-
-
-# https://github.com/onnx/onnx-coreml
-# http://machinethink.net/blog/mobilenet-ssdlite-coreml/
-# https://github.com/hollance/YOLO-CoreML-MPSNNGraph
-
-def main():
-    os.system('rm -rf saved_models && mkdir saved_models')
-    files = glob.glob('saved_models/*.onnx') + glob.glob('../yolov3/weights/*.onnx')
-
-    for f in files:
-        # 1. ONNX to CoreML
-        name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '')
-
-        # # Load the ONNX model
-        model = onnx.load(f)
-
-        # Check that the IR is well formed
-        print(onnx.checker.check_model(model))
-
-        # Print a human readable representation of the graph
-        print(onnx.helper.printable_graph(model.graph))
-
-        model_file = open(f, 'rb')
-        model_proto = onnx_pb.ModelProto()
-        model_proto.ParseFromString(model_file.read())
-        yolov3_model = convert(model_proto, image_input_names=['0'], preprocessing_args={'image_scale': 1. / 255})
-
-        # 2. Reduce model to FP16, change outputs to DOUBLE and save
-        import coremltools
-
-        spec = yolov3_model.get_spec()
-        for i in range(2):
-            spec.description.output[i].type.multiArrayType.dataType = \
-                coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value('DOUBLE')
-
-        spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec)
-        yolov3_model = coremltools.models.MLModel(spec)
-
-        name_out0 = spec.description.output[0].name
-        name_out1 = spec.description.output[1].name
-
-        num_classes = 80
-        num_anchors = 507  # 507 for yolov3-tiny,
-        spec.description.output[0].type.multiArrayType.shape.append(num_anchors)
-        spec.description.output[0].type.multiArrayType.shape.append(num_classes)
-        # spec.description.output[0].type.multiArrayType.shape.append(1)
-
-        spec.description.output[1].type.multiArrayType.shape.append(num_anchors)
-        spec.description.output[1].type.multiArrayType.shape.append(4)
-        # spec.description.output[1].type.multiArrayType.shape.append(1)
-
-        # rename
-        # input_mlmodel = input_tensor.replace(":", "__").replace("/", "__")
-        # class_output_mlmodel = class_output_tensor.replace(":", "__").replace("/", "__")
-        # bbox_output_mlmodel = bbox_output_tensor.replace(":", "__").replace("/", "__")
-        #
-        # for i in range(len(spec.neuralNetwork.layers)):
-        #     if spec.neuralNetwork.layers[i].input[0] == input_mlmodel:
-        #         spec.neuralNetwork.layers[i].input[0] = 'image'
-        #     if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel:
-        #         spec.neuralNetwork.layers[i].output[0] = 'scores'
-        #     if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel:
-        #         spec.neuralNetwork.layers[i].output[0] = 'boxes'
-
-        spec.neuralNetwork.preprocessing[0].featureName = '0'
-
-        yolov3_model.save(name + '.mlmodel')
-        # yolov3_model.visualize_spec()
-        print(spec.description)
-
-        # 2.5. Try to Predict:
-        from PIL import Image
-        img = Image.open('../yolov3/data/samples/zidane_416.jpg')
-        out = yolov3_model.predict({'0': img}, useCPUOnly=True)
-        print(out[name_out0].shape, out[name_out1].shape)
-
-        # 3. Create NMS protobuf
-        import numpy as np
-
-        nms_spec = coremltools.proto.Model_pb2.Model()
-        nms_spec.specificationVersion = 3
-
-        for i in range(2):
-            decoder_output = yolov3_model._spec.description.output[i].SerializeToString()
-
-            nms_spec.description.input.add()
-            nms_spec.description.input[i].ParseFromString(decoder_output)
-
-            nms_spec.description.output.add()
-            nms_spec.description.output[i].ParseFromString(decoder_output)
-
-        nms_spec.description.output[0].name = 'confidence'
-        nms_spec.description.output[1].name = 'coordinates'
-
-        output_sizes = [num_classes, 4]
-        for i in range(2):
-            ma_type = nms_spec.description.output[i].type.multiArrayType
-            ma_type.shapeRange.sizeRanges.add()
-            ma_type.shapeRange.sizeRanges[0].lowerBound = 0
-            ma_type.shapeRange.sizeRanges[0].upperBound = -1
-            ma_type.shapeRange.sizeRanges.add()
-            ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
-            ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
-            del ma_type.shape[:]
-
-        nms = nms_spec.nonMaximumSuppression
-        nms.confidenceInputFeatureName = name_out0  # 1x507x80
-        nms.coordinatesInputFeatureName = name_out1  # 1x507x4
-        nms.confidenceOutputFeatureName = 'confidence'
-        nms.coordinatesOutputFeatureName = 'coordinates'
-        nms.iouThresholdInputFeatureName = 'iouThreshold'
-        nms.confidenceThresholdInputFeatureName = 'confidenceThreshold'
-
-        nms.iouThreshold = 0.4
-        nms.confidenceThreshold = 0.5
-        nms.pickTop.perClass = True
-
-        labels = np.loadtxt('../yolov3/data/coco.names', dtype=str, delimiter='\n')
-        nms.stringClassLabels.vector.extend(labels)
-
-        nms_model = coremltools.models.MLModel(nms_spec)
-        nms_model.save(name + '_nms.mlmodel')
-
-        # out_nms = nms_model.predict({
-        #     '143': out['143'].squeeze().reshape((80, 507)),
-        #     '144': out['144'].squeeze().reshape((4, 507))
-        # })
-        # print(out_nms['confidence'].shape, out_nms['coordinates'].shape)
-
-        # # # 3.5 Add Softmax model
-        # from coremltools.models import datatypes
-        # from coremltools.models import neural_network
-        #
-        # input_features = [
-        #     ("141", datatypes.Array(num_anchors, num_classes, 1)),
-        #     ("143", datatypes.Array(num_anchors, 4, 1))
-        # ]
-        #
-        # output_features = [
-        #     ("141", datatypes.Array(num_anchors, num_classes, 1)),
-        #     ("143", datatypes.Array(num_anchors, 4, 1))
-        # ]
-        #
-        # builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
-        # builder.add_softmax(name="softmax_pcls",
-        #                     dim=(0, 3, 2, 1),
-        #                     input_name="scores",
-        #                     output_name="permute_scores_output")
-        # softmax_model = coremltools.models.MLModel(builder.spec)
-        # softmax_model.save("softmax.mlmodel")
-
-        # 4. Pipeline models togethor
-        from coremltools.models import datatypes
-        # from coremltools.models import neural_network
-        from coremltools.models.pipeline import Pipeline
-
-        input_features = [('0', datatypes.Array(3, 416, 416)),
-                          ('iouThreshold', datatypes.Double()),
-                          ('confidenceThreshold', datatypes.Double())]
-
-        output_features = ['confidence', 'coordinates']
-
-        pipeline = Pipeline(input_features, output_features)
-
-        # Add 3rd dimension of size 1 (apparently not needed, produces error on compile)
-        yolov3_output = yolov3_model._spec.description.output
-        yolov3_output[0].type.multiArrayType.shape[:] = [num_anchors, num_classes, 1]
-        yolov3_output[1].type.multiArrayType.shape[:] = [num_anchors, 4, 1]
-
-        nms_input = nms_model._spec.description.input
-        for i in range(2):
-            nms_input[i].type.multiArrayType.shape[:] = yolov3_output[i].type.multiArrayType.shape[:]
-
-        # And now we can add the three models, in order:
-        pipeline.add_model(yolov3_model)
-
-        pipeline.add_model(nms_model)
-
-        # Correct datatypes
-        pipeline.spec.description.input[0].ParseFromString(yolov3_model._spec.description.input[0].SerializeToString())
-        pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString())
-        pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString())
-
-        # Update metadata
-        pipeline.spec.description.metadata.versionString = 'yolov3-tiny.pt imported from PyTorch'
-        pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov3'
-        pipeline.spec.description.metadata.author = 'glenn.jocher@ultralytics.com'
-        pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov3'
-
-        user_defined_metadata = {'classes': ','.join(labels),
-                                 'iou_threshold': str(nms.iouThreshold),
-                                 'confidence_threshold': str(nms.confidenceThreshold)}
-        pipeline.spec.description.metadata.userDefined.update(user_defined_metadata)
-
-        # Save the model
-        pipeline.spec.specificationVersion = 3
-        final_model = coremltools.models.MLModel(pipeline.spec)
-        final_model.save((name + '_pipelined.mlmodel'))
-
-
-if __name__ == '__main__':
-    main()
--- a/utils/parse_config.py
+++ b/utils/parse_config.py
@ -1,14 +1,12 @@
-
-
 def parse_model_config(path):
    """Parses the yolo-v3 layer configuration file and returns module definitions"""
    file = open(path, 'r')
    lines = file.read().split('\n')
    lines = [x for x in lines if x and not x.startswith('#')]
-    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
+    lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
    module_defs = []
    for line in lines:
-        if line.startswith('['): # This marks the start of a new block
+        if line.startswith('['):  # This marks the start of a new block
            module_defs.append({})
            module_defs[-1]['type'] = line[1:-1].rstrip()
            if module_defs[-1]['type'] == 'convolutional':
@ -20,6 +18,7 @@ def parse_model_config(path):

    return module_defs

+
 def parse_data_cfg(path):
    """Parses the data configuration file"""
    options = dict()
--- a/utils/utils.py
+++ b/utils/utils.py
@ -254,7 +254,7 @@ def build_targets(target, anchor_wh, nA, nC, nG):
            iou_order = torch.argsort(-iou_best)  # best to worst

            # Unique anchor selection
-            u = torch.cat((gi, gj, a), 0).view(3, -1)
+            u = torch.cat((gi, gj, a), 0).view((3, -1))
            _, first_unique = np.unique(u[:, iou_order], axis=1, return_index=True)  # first unique indices
            # _, first_unique = torch.unique(u[:, iou_order], dim=1, return_inverse=True)  # different than numpy?

@ -340,7 +340,8 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
        # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
        # from scipy.stats import multivariate_normal
        # for c in range(60):
-        # shape_likelihood[:, c] = multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
+        # shape_likelihood[:, c] =
+        #   multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])

        class_prob, class_pred = torch.max(F.softmax(pred[:, 5:], 1), 1)

@ -436,7 +437,6 @@ def coco_class_count(path='../coco/labels/train2014/'):
 def plot_results():
    # Plot YOLO training results file 'results.txt'
    import glob
-    import numpy as np
    import matplotlib.pyplot as plt
    # import os; os.system('rm -rf results.txt && wget https://storage.googleapis.com/ultralytics/results_v1_0.txt')