webcam updates

This commit is contained in:
Glenn Jocher 2019-02-11 18:15:51 +01:00
parent 585f2e2cc1
commit e23b1a3d73
7 changed files with 29 additions and 233 deletions

View File

@ -50,13 +50,16 @@ def detect(
for i, (path, img, im0) in enumerate(dataloader):
t = time.time()
print("%g/%g '%s': " % (i + 1, len(dataloader), path if not webcam else 'webcam'), end='')
if webcam:
print('webcam frame %g: ' % (i + 1), end='')
print('image %g/%g %s: ' % (i + 1, len(dataloader), path), end='')
save_path = os.path.join(output, path.split('/')[-1])
# Get detections
img = torch.from_numpy(img).unsqueeze(0).to(device)
torch.onnx._export(model, img, 'weights/model.onnx', verbose=True)
torch.onnx.export(model, img, 'weights/model.onnx', verbose=True)
return # ONNX export
pred = model(img)
pred = pred[pred[:, :, 4] > conf_thres] # remove boxes < threshold
@ -70,9 +73,9 @@ def detect(
# Print results to screen
unique_classes = detections[:, -1].cpu().unique()
for i in unique_classes:
n = (detections[:, -1].cpu() == i).sum()
print('%g %ss' % (n, classes[int(i)]), end=', ')
for c in unique_classes:
n = (detections[:, -1].cpu() == c).sum()
print('%g %ss' % (n, classes[int(c)]), end=', ')
# Draw bounding boxes and labels of detections
for x1, y1, x2, y2, conf, cls_conf, cls in detections:

View File

@ -82,6 +82,9 @@ class EmptyLayer(nn.Module):
def __init__(self):
super(EmptyLayer, self).__init__()
def forward(self, x):
return x
class Upsample(nn.Module):
# Custom Upsample layer (nn.Upsample gives deprecated warning message)
@ -121,8 +124,8 @@ class YOLOLayer(nn.Module):
# Build anchor grids
nG = int(self.img_dim / stride) # number grid points
self.grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).float()
self.grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).float()
self.grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float()
self.grid_y = torch.arange(nG).repeat((nG, 1)).t().view((1, 1, nG, nG)).float()
self.anchor_wh = torch.FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors]) # scale anchors
self.anchor_w = self.anchor_wh[:, 0].view((1, nA, 1, 1))
self.anchor_h = self.anchor_wh[:, 1].view((1, nA, 1, 1))
@ -169,8 +172,8 @@ class YOLOLayer(nn.Module):
# Width and height (yolo method)
w = p[..., 2] # Width
h = p[..., 3] # Height
width = torch.exp( * self.anchor_w
height = torch.exp( * self.anchor_h
# width = torch.exp( * self.anchor_w
# height = torch.exp( * self.anchor_h
# Width and height (power method)
# w = torch.sigmoid(p[..., 2]) # Width
@ -217,8 +220,8 @@ class YOLOLayer(nn.Module):
# Broadcasting only supported on first dimension in CoreML. See onnx-coreml/
# p_cls = F.softmax(p_cls, 2) * p_conf # SSD-like conf
p_cls = torch.exp(p_cls).permute(2, 1, 0)
p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute(2, 1, 0) # F.softmax() equivalent
p_cls = torch.exp(p_cls).permute((2, 1, 0))
p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0)) # F.softmax() equivalent
p_cls = p_cls.permute(2, 1, 0)
return / nG, width_height, p_conf, p_cls), 2).squeeze().t()
@ -246,6 +249,7 @@ class Darknet(nn.Module):
self.hyperparams, self.module_list = create_modules(self.module_defs)
self.img_size = img_size
self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT']
self.losses = []
def forward(self, x, targets=None, var=0):
self.losses = defaultdict(float)
@ -296,8 +300,8 @@ def load_darknet_weights(self, weights, cutoff=-1):
if not os.path.isfile(weights):
os.system('wget' + weights_file + ' -P ' + weights)
assert os.path.isfile(weights)
except IOError:
print(weights + ' not found')
# Establish cutoffs
if weights_file == 'darknet53.conv.74':

View File

@ -36,7 +36,6 @@ def train(
# Configure run
data_cfg = parse_data_cfg(data_cfg)
num_classes = int(data_cfg['classes'])
train_path = data_cfg['train']
# Initialize model
@ -62,7 +61,7 @@ def train(
# p.requires_grad = False
# Set optimizer
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr0, momentum=.9)
optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9)
start_epoch = checkpoint['epoch'] + 1
if checkpoint['optimizer'] is not None:
@ -85,7 +84,7 @@ def train(
# Set optimizer
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr0, momentum=.9)
optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr0, momentum=.9)
# Set scheduler
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[54, 61], gamma=0.1)

View File

@ -58,7 +58,6 @@ class LoadImages: # for inference
class LoadWebcam: # for inference
def __init__(self, img_size=416): = cv2.VideoCapture(0)
self.nF = 9999 # number of image files
self.height = img_size
def __iter__(self):
@ -88,7 +87,7 @@ class LoadWebcam: # for inference
return img_path, img, img0
def __len__(self):
return self.nF # number of files
return 0
class LoadImagesAndLabels: # for training

View File

@ -1,208 +0,0 @@
import os
import onnx
from onnx import onnx_pb
from onnx_coreml import convert
import glob
def main():
os.system('rm -rf saved_models && mkdir saved_models')
files = glob.glob('saved_models/*.onnx') + glob.glob('../yolov3/weights/*.onnx')
for f in files:
# 1. ONNX to CoreML
name = 'saved_models/' + f.split('/')[-1].replace('.onnx', '')
# # Load the ONNX model
model = onnx.load(f)
# Check that the IR is well formed
# Print a human readable representation of the graph
model_file = open(f, 'rb')
model_proto = onnx_pb.ModelProto()
yolov3_model = convert(model_proto, image_input_names=['0'], preprocessing_args={'image_scale': 1. / 255})
# 2. Reduce model to FP16, change outputs to DOUBLE and save
import coremltools
spec = yolov3_model.get_spec()
for i in range(2):
spec.description.output[i].type.multiArrayType.dataType = \
spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec)
yolov3_model = coremltools.models.MLModel(spec)
name_out0 = spec.description.output[0].name
name_out1 = spec.description.output[1].name
num_classes = 80
num_anchors = 507 # 507 for yolov3-tiny,
# spec.description.output[0].type.multiArrayType.shape.append(1)
# spec.description.output[1].type.multiArrayType.shape.append(1)
# rename
# input_mlmodel = input_tensor.replace(":", "__").replace("/", "__")
# class_output_mlmodel = class_output_tensor.replace(":", "__").replace("/", "__")
# bbox_output_mlmodel = bbox_output_tensor.replace(":", "__").replace("/", "__")
# for i in range(len(spec.neuralNetwork.layers)):
# if spec.neuralNetwork.layers[i].input[0] == input_mlmodel:
# spec.neuralNetwork.layers[i].input[0] = 'image'
# if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel:
# spec.neuralNetwork.layers[i].output[0] = 'scores'
# if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel:
# spec.neuralNetwork.layers[i].output[0] = 'boxes'
spec.neuralNetwork.preprocessing[0].featureName = '0' + '.mlmodel')
# yolov3_model.visualize_spec()
# 2.5. Try to Predict:
from PIL import Image
img ='../yolov3/data/samples/zidane_416.jpg')
out = yolov3_model.predict({'0': img}, useCPUOnly=True)
print(out[name_out0].shape, out[name_out1].shape)
# 3. Create NMS protobuf
import numpy as np
nms_spec = coremltools.proto.Model_pb2.Model()
nms_spec.specificationVersion = 3
for i in range(2):
decoder_output = yolov3_model._spec.description.output[i].SerializeToString()
nms_spec.description.output[0].name = 'confidence'
nms_spec.description.output[1].name = 'coordinates'
output_sizes = [num_classes, 4]
for i in range(2):
ma_type = nms_spec.description.output[i].type.multiArrayType
ma_type.shapeRange.sizeRanges[0].lowerBound = 0
ma_type.shapeRange.sizeRanges[0].upperBound = -1
ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
del ma_type.shape[:]
nms = nms_spec.nonMaximumSuppression
nms.confidenceInputFeatureName = name_out0 # 1x507x80
nms.coordinatesInputFeatureName = name_out1 # 1x507x4
nms.confidenceOutputFeatureName = 'confidence'
nms.coordinatesOutputFeatureName = 'coordinates'
nms.iouThresholdInputFeatureName = 'iouThreshold'
nms.confidenceThresholdInputFeatureName = 'confidenceThreshold'
nms.iouThreshold = 0.4
nms.confidenceThreshold = 0.5
nms.pickTop.perClass = True
labels = np.loadtxt('../yolov3/data/coco.names', dtype=str, delimiter='\n')
nms_model = coremltools.models.MLModel(nms_spec) + '_nms.mlmodel')
# out_nms = nms_model.predict({
# '143': out['143'].squeeze().reshape((80, 507)),
# '144': out['144'].squeeze().reshape((4, 507))
# })
# print(out_nms['confidence'].shape, out_nms['coordinates'].shape)
# # # 3.5 Add Softmax model
# from coremltools.models import datatypes
# from coremltools.models import neural_network
# input_features = [
# ("141", datatypes.Array(num_anchors, num_classes, 1)),
# ("143", datatypes.Array(num_anchors, 4, 1))
# ]
# output_features = [
# ("141", datatypes.Array(num_anchors, num_classes, 1)),
# ("143", datatypes.Array(num_anchors, 4, 1))
# ]
# builder = neural_network.NeuralNetworkBuilder(input_features, output_features)
# builder.add_softmax(name="softmax_pcls",
# dim=(0, 3, 2, 1),
# input_name="scores",
# output_name="permute_scores_output")
# softmax_model = coremltools.models.MLModel(builder.spec)
# 4. Pipeline models togethor
from coremltools.models import datatypes
# from coremltools.models import neural_network
from coremltools.models.pipeline import Pipeline
input_features = [('0', datatypes.Array(3, 416, 416)),
('iouThreshold', datatypes.Double()),
('confidenceThreshold', datatypes.Double())]
output_features = ['confidence', 'coordinates']
pipeline = Pipeline(input_features, output_features)
# Add 3rd dimension of size 1 (apparently not needed, produces error on compile)
yolov3_output = yolov3_model._spec.description.output
yolov3_output[0].type.multiArrayType.shape[:] = [num_anchors, num_classes, 1]
yolov3_output[1].type.multiArrayType.shape[:] = [num_anchors, 4, 1]
nms_input = nms_model._spec.description.input
for i in range(2):
nms_input[i].type.multiArrayType.shape[:] = yolov3_output[i].type.multiArrayType.shape[:]
# And now we can add the three models, in order:
# Correct datatypes
# Update metadata
pipeline.spec.description.metadata.versionString = ' imported from PyTorch'
pipeline.spec.description.metadata.shortDescription = '' = ''
pipeline.spec.description.metadata.license = ''
user_defined_metadata = {'classes': ','.join(labels),
'iou_threshold': str(nms.iouThreshold),
'confidence_threshold': str(nms.confidenceThreshold)}
# Save the model
pipeline.spec.specificationVersion = 3
final_model = coremltools.models.MLModel(pipeline.spec) + '_pipelined.mlmodel'))
if __name__ == '__main__':

View File

@ -1,5 +1,3 @@
def parse_model_config(path):
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
file = open(path, 'r')
@ -20,6 +18,7 @@ def parse_model_config(path):
return module_defs
def parse_data_cfg(path):
"""Parses the data configuration file"""
options = dict()

View File

@ -254,7 +254,7 @@ def build_targets(target, anchor_wh, nA, nC, nG):
iou_order = torch.argsort(-iou_best) # best to worst
# Unique anchor selection
u =, gj, a), 0).view(3, -1)
u =, gj, a), 0).view((3, -1))
_, first_unique = np.unique(u[:, iou_order], axis=1, return_index=True) # first unique indices
# _, first_unique = torch.unique(u[:, iou_order], dim=1, return_inverse=True) # different than numpy?
@ -340,7 +340,8 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
# x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
# from scipy.stats import multivariate_normal
# for c in range(60):
# shape_likelihood[:, c] = multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
# shape_likelihood[:, c] =
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
class_prob, class_pred = torch.max(F.softmax(pred[:, 5:], 1), 1)
@ -436,7 +437,6 @@ def coco_class_count(path='../coco/labels/train2014/'):
def plot_results():
# Plot YOLO training results file 'results.txt'
import glob
import numpy as np
import matplotlib.pyplot as plt
# import os; os.system('rm -rf results.txt && wget')