From e23b1a3d7336cfa876a241b09a8aadfba421fe07 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 11 Feb 2019 18:15:51 +0100 Subject: [PATCH] webcam updates --- detect.py | 13 ++- models.py | 20 ++-- train.py | 5 +- utils/datasets.py | 3 +- utils/onnx2coreml.py | 208 ------------------------------------------ utils/parse_config.py | 7 +- utils/utils.py | 6 +- 7 files changed, 29 insertions(+), 233 deletions(-) delete mode 100644 utils/onnx2coreml.py diff --git a/detect.py b/detect.py index 57f149d4..fb4c8672 100755 --- a/detect.py +++ b/detect.py @@ -50,13 +50,16 @@ def detect( for i, (path, img, im0) in enumerate(dataloader): t = time.time() - print("%g/%g '%s': " % (i + 1, len(dataloader), path if not webcam else 'webcam'), end='') + if webcam: + print('webcam frame %g: ' % (i + 1), end='') + else: + print('image %g/%g %s: ' % (i + 1, len(dataloader), path), end='') save_path = os.path.join(output, path.split('/')[-1]) # Get detections img = torch.from_numpy(img).unsqueeze(0).to(device) if ONNX_EXPORT: - torch.onnx._export(model, img, 'weights/model.onnx', verbose=True) + torch.onnx.export(model, img, 'weights/model.onnx', verbose=True) return # ONNX export pred = model(img) pred = pred[pred[:, :, 4] > conf_thres] # remove boxes < threshold @@ -70,9 +73,9 @@ def detect( # Print results to screen unique_classes = detections[:, -1].cpu().unique() - for i in unique_classes: - n = (detections[:, -1].cpu() == i).sum() - print('%g %ss' % (n, classes[int(i)]), end=', ') + for c in unique_classes: + n = (detections[:, -1].cpu() == c).sum() + print('%g %ss' % (n, classes[int(c)]), end=', ') # Draw bounding boxes and labels of detections for x1, y1, x2, y2, conf, cls_conf, cls in detections: diff --git a/models.py b/models.py index b6538fc4..9e4ca010 100755 --- a/models.py +++ b/models.py @@ -82,6 +82,9 @@ class EmptyLayer(nn.Module): def __init__(self): super(EmptyLayer, self).__init__() + def forward(self, x): + return x + class Upsample(nn.Module): # Custom Upsample layer (nn.Upsample gives deprecated warning message) @@ -121,8 +124,8 @@ class YOLOLayer(nn.Module): # Build anchor grids nG = int(self.img_dim / stride) # number grid points - self.grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).float() - self.grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).float() + self.grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float() + self.grid_y = torch.arange(nG).repeat((nG, 1)).t().view((1, 1, nG, nG)).float() self.anchor_wh = torch.FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors]) # scale anchors self.anchor_w = self.anchor_wh[:, 0].view((1, nA, 1, 1)) self.anchor_h = self.anchor_wh[:, 1].view((1, nA, 1, 1)) @@ -169,8 +172,8 @@ class YOLOLayer(nn.Module): # Width and height (yolo method) w = p[..., 2] # Width h = p[..., 3] # Height - width = torch.exp(w.data) * self.anchor_w - height = torch.exp(h.data) * self.anchor_h + # width = torch.exp(w.data) * self.anchor_w + # height = torch.exp(h.data) * self.anchor_h # Width and height (power method) # w = torch.sigmoid(p[..., 2]) # Width @@ -217,8 +220,8 @@ class YOLOLayer(nn.Module): # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py # p_cls = F.softmax(p_cls, 2) * p_conf # SSD-like conf - p_cls = torch.exp(p_cls).permute(2, 1, 0) - p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute(2, 1, 0) # F.softmax() equivalent + p_cls = torch.exp(p_cls).permute((2, 1, 0)) + p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0)) # F.softmax() equivalent p_cls = p_cls.permute(2, 1, 0) return torch.cat((xy / nG, width_height, p_conf, p_cls), 2).squeeze().t() @@ -246,6 +249,7 @@ class Darknet(nn.Module): self.hyperparams, self.module_list = create_modules(self.module_defs) self.img_size = img_size self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT'] + self.losses = [] def forward(self, x, targets=None, var=0): self.losses = defaultdict(float) @@ -296,8 +300,8 @@ def load_darknet_weights(self, weights, cutoff=-1): if not os.path.isfile(weights): try: os.system('wget https://pjreddie.com/media/files/' + weights_file + ' -P ' + weights) - except: - assert os.path.isfile(weights) + except IOError: + print(weights + ' not found') # Establish cutoffs if weights_file == 'darknet53.conv.74': diff --git a/train.py b/train.py index e7290b13..030b808f 100644 --- a/train.py +++ b/train.py @@ -36,7 +36,6 @@ def train( # Configure run data_cfg = parse_data_cfg(data_cfg) - num_classes = int(data_cfg['classes']) train_path = data_cfg['train'] # Initialize model @@ -62,7 +61,7 @@ def train( # p.requires_grad = False # Set optimizer - optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr0, momentum=.9) + optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9) start_epoch = checkpoint['epoch'] + 1 if checkpoint['optimizer'] is not None: @@ -85,7 +84,7 @@ def train( model.to(device).train() # Set optimizer - optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr0, momentum=.9) + optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9) # Set scheduler # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[54, 61], gamma=0.1) diff --git a/utils/datasets.py b/utils/datasets.py index 979e07af..fdd30901 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -58,7 +58,6 @@ class LoadImages: # for inference class LoadWebcam: # for inference def __init__(self, img_size=416): self.cam = cv2.VideoCapture(0) - self.nF = 9999 # number of image files self.height = img_size def __iter__(self): @@ -88,7 +87,7 @@ class LoadWebcam: # for inference return img_path, img, img0 def __len__(self): - return self.nF # number of files + return 0 class LoadImagesAndLabels: # for training diff --git a/utils/onnx2coreml.py b/utils/onnx2coreml.py deleted file mode 100644 index 4c00c20a..00000000 --- a/utils/onnx2coreml.py +++ /dev/null @@ -1,208 +0,0 @@ -import os -import onnx -from onnx import onnx_pb -from onnx_coreml import convert -import glob - - -# https://github.com/onnx/onnx-coreml -# http://machinethink.net/blog/mobilenet-ssdlite-coreml/ -# https://github.com/hollance/YOLO-CoreML-MPSNNGraph - -def main(): - os.system('rm -rf saved_models && mkdir saved_models') - files = glob.glob('saved_models/*.onnx') + glob.glob('../yolov3/weights/*.onnx') - - for f in files: - # 1. ONNX to CoreML - name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '') - - # # Load the ONNX model - model = onnx.load(f) - - # Check that the IR is well formed - print(onnx.checker.check_model(model)) - - # Print a human readable representation of the graph - print(onnx.helper.printable_graph(model.graph)) - - model_file = open(f, 'rb') - model_proto = onnx_pb.ModelProto() - model_proto.ParseFromString(model_file.read()) - yolov3_model = convert(model_proto, image_input_names=['0'], preprocessing_args={'image_scale': 1. / 255}) - - # 2. Reduce model to FP16, change outputs to DOUBLE and save - import coremltools - - spec = yolov3_model.get_spec() - for i in range(2): - spec.description.output[i].type.multiArrayType.dataType = \ - coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value('DOUBLE') - - spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec) - yolov3_model = coremltools.models.MLModel(spec) - - name_out0 = spec.description.output[0].name - name_out1 = spec.description.output[1].name - - num_classes = 80 - num_anchors = 507 # 507 for yolov3-tiny, - spec.description.output[0].type.multiArrayType.shape.append(num_anchors) - spec.description.output[0].type.multiArrayType.shape.append(num_classes) - # spec.description.output[0].type.multiArrayType.shape.append(1) - - spec.description.output[1].type.multiArrayType.shape.append(num_anchors) - spec.description.output[1].type.multiArrayType.shape.append(4) - # spec.description.output[1].type.multiArrayType.shape.append(1) - - # rename - # input_mlmodel = input_tensor.replace(":", "__").replace("/", "__") - # class_output_mlmodel = class_output_tensor.replace(":", "__").replace("/", "__") - # bbox_output_mlmodel = bbox_output_tensor.replace(":", "__").replace("/", "__") - # - # for i in range(len(spec.neuralNetwork.layers)): - # if spec.neuralNetwork.layers[i].input[0] == input_mlmodel: - # spec.neuralNetwork.layers[i].input[0] = 'image' - # if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel: - # spec.neuralNetwork.layers[i].output[0] = 'scores' - # if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel: - # spec.neuralNetwork.layers[i].output[0] = 'boxes' - - spec.neuralNetwork.preprocessing[0].featureName = '0' - - yolov3_model.save(name + '.mlmodel') - # yolov3_model.visualize_spec() - print(spec.description) - - # 2.5. Try to Predict: - from PIL import Image - img = Image.open('../yolov3/data/samples/zidane_416.jpg') - out = yolov3_model.predict({'0': img}, useCPUOnly=True) - print(out[name_out0].shape, out[name_out1].shape) - - # 3. Create NMS protobuf - import numpy as np - - nms_spec = coremltools.proto.Model_pb2.Model() - nms_spec.specificationVersion = 3 - - for i in range(2): - decoder_output = yolov3_model._spec.description.output[i].SerializeToString() - - nms_spec.description.input.add() - nms_spec.description.input[i].ParseFromString(decoder_output) - - nms_spec.description.output.add() - nms_spec.description.output[i].ParseFromString(decoder_output) - - nms_spec.description.output[0].name = 'confidence' - nms_spec.description.output[1].name = 'coordinates' - - output_sizes = [num_classes, 4] - for i in range(2): - ma_type = nms_spec.description.output[i].type.multiArrayType - ma_type.shapeRange.sizeRanges.add() - ma_type.shapeRange.sizeRanges[0].lowerBound = 0 - ma_type.shapeRange.sizeRanges[0].upperBound = -1 - ma_type.shapeRange.sizeRanges.add() - ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i] - ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i] - del ma_type.shape[:] - - nms = nms_spec.nonMaximumSuppression - nms.confidenceInputFeatureName = name_out0 # 1x507x80 - nms.coordinatesInputFeatureName = name_out1 # 1x507x4 - nms.confidenceOutputFeatureName = 'confidence' - nms.coordinatesOutputFeatureName = 'coordinates' - nms.iouThresholdInputFeatureName = 'iouThreshold' - nms.confidenceThresholdInputFeatureName = 'confidenceThreshold' - - nms.iouThreshold = 0.4 - nms.confidenceThreshold = 0.5 - nms.pickTop.perClass = True - - labels = np.loadtxt('../yolov3/data/coco.names', dtype=str, delimiter='\n') - nms.stringClassLabels.vector.extend(labels) - - nms_model = coremltools.models.MLModel(nms_spec) - nms_model.save(name + '_nms.mlmodel') - - # out_nms = nms_model.predict({ - # '143': out['143'].squeeze().reshape((80, 507)), - # '144': out['144'].squeeze().reshape((4, 507)) - # }) - # print(out_nms['confidence'].shape, out_nms['coordinates'].shape) - - # # # 3.5 Add Softmax model - # from coremltools.models import datatypes - # from coremltools.models import neural_network - # - # input_features = [ - # ("141", datatypes.Array(num_anchors, num_classes, 1)), - # ("143", datatypes.Array(num_anchors, 4, 1)) - # ] - # - # output_features = [ - # ("141", datatypes.Array(num_anchors, num_classes, 1)), - # ("143", datatypes.Array(num_anchors, 4, 1)) - # ] - # - # builder = neural_network.NeuralNetworkBuilder(input_features, output_features) - # builder.add_softmax(name="softmax_pcls", - # dim=(0, 3, 2, 1), - # input_name="scores", - # output_name="permute_scores_output") - # softmax_model = coremltools.models.MLModel(builder.spec) - # softmax_model.save("softmax.mlmodel") - - # 4. Pipeline models togethor - from coremltools.models import datatypes - # from coremltools.models import neural_network - from coremltools.models.pipeline import Pipeline - - input_features = [('0', datatypes.Array(3, 416, 416)), - ('iouThreshold', datatypes.Double()), - ('confidenceThreshold', datatypes.Double())] - - output_features = ['confidence', 'coordinates'] - - pipeline = Pipeline(input_features, output_features) - - # Add 3rd dimension of size 1 (apparently not needed, produces error on compile) - yolov3_output = yolov3_model._spec.description.output - yolov3_output[0].type.multiArrayType.shape[:] = [num_anchors, num_classes, 1] - yolov3_output[1].type.multiArrayType.shape[:] = [num_anchors, 4, 1] - - nms_input = nms_model._spec.description.input - for i in range(2): - nms_input[i].type.multiArrayType.shape[:] = yolov3_output[i].type.multiArrayType.shape[:] - - # And now we can add the three models, in order: - pipeline.add_model(yolov3_model) - - pipeline.add_model(nms_model) - - # Correct datatypes - pipeline.spec.description.input[0].ParseFromString(yolov3_model._spec.description.input[0].SerializeToString()) - pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString()) - pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString()) - - # Update metadata - pipeline.spec.description.metadata.versionString = 'yolov3-tiny.pt imported from PyTorch' - pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov3' - pipeline.spec.description.metadata.author = 'glenn.jocher@ultralytics.com' - pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov3' - - user_defined_metadata = {'classes': ','.join(labels), - 'iou_threshold': str(nms.iouThreshold), - 'confidence_threshold': str(nms.confidenceThreshold)} - pipeline.spec.description.metadata.userDefined.update(user_defined_metadata) - - # Save the model - pipeline.spec.specificationVersion = 3 - final_model = coremltools.models.MLModel(pipeline.spec) - final_model.save((name + '_pipelined.mlmodel')) - - -if __name__ == '__main__': - main() diff --git a/utils/parse_config.py b/utils/parse_config.py index dae59196..714bae7a 100644 --- a/utils/parse_config.py +++ b/utils/parse_config.py @@ -1,14 +1,12 @@ - - def parse_model_config(path): """Parses the yolo-v3 layer configuration file and returns module definitions""" file = open(path, 'r') lines = file.read().split('\n') lines = [x for x in lines if x and not x.startswith('#')] - lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces + lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces module_defs = [] for line in lines: - if line.startswith('['): # This marks the start of a new block + if line.startswith('['): # This marks the start of a new block module_defs.append({}) module_defs[-1]['type'] = line[1:-1].rstrip() if module_defs[-1]['type'] == 'convolutional': @@ -20,6 +18,7 @@ def parse_model_config(path): return module_defs + def parse_data_cfg(path): """Parses the data configuration file""" options = dict() diff --git a/utils/utils.py b/utils/utils.py index 01eb6e7f..45060b3f 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -254,7 +254,7 @@ def build_targets(target, anchor_wh, nA, nC, nG): iou_order = torch.argsort(-iou_best) # best to worst # Unique anchor selection - u = torch.cat((gi, gj, a), 0).view(3, -1) + u = torch.cat((gi, gj, a), 0).view((3, -1)) _, first_unique = np.unique(u[:, iou_order], axis=1, return_index=True) # first unique indices # _, first_unique = torch.unique(u[:, iou_order], dim=1, return_inverse=True) # different than numpy? @@ -340,7 +340,8 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1) # from scipy.stats import multivariate_normal # for c in range(60): - # shape_likelihood[:, c] = multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2]) + # shape_likelihood[:, c] = + # multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2]) class_prob, class_pred = torch.max(F.softmax(pred[:, 5:], 1), 1) @@ -436,7 +437,6 @@ def coco_class_count(path='../coco/labels/train2014/'): def plot_results(): # Plot YOLO training results file 'results.txt' import glob - import numpy as np import matplotlib.pyplot as plt # import os; os.system('rm -rf results.txt && wget https://storage.googleapis.com/ultralytics/results_v1_0.txt')