This commit is contained in:
Glenn Jocher 2019-04-02 13:43:18 +02:00
parent bd32517528
commit 01569d15e3
5 changed files with 95 additions and 70 deletions

View File

@ -9,6 +9,7 @@ from utils.utils import *
def detect( def detect(
cfg, cfg,
data_cfg,
weights, weights,
images, images,
output='output', # output folder output='output', # output folder
@ -36,6 +37,7 @@ def detect(
model.to(device).eval() model.to(device).eval()
# Set Dataloader # Set Dataloader
vid_path, vid_writer = None, None
if webcam: if webcam:
save_images = False save_images = False
dataloader = LoadWebcam(img_size=img_size) dataloader = LoadWebcam(img_size=img_size)
@ -43,16 +45,12 @@ def detect(
dataloader = LoadImages(images, img_size=img_size) dataloader = LoadImages(images, img_size=img_size)
# Get classes and colors # Get classes and colors
classes = load_classes(parse_data_cfg('cfg/coco.data')['names']) classes = load_classes(parse_data_cfg(data_cfg)['names'])
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
for i, (path, img, im0) in enumerate(dataloader): for i, (path, img, im0, vid_cap) in enumerate(dataloader):
t = time.time() t = time.time()
save_path = str(Path(output) / Path(path).name) save_path = str(Path(output) / Path(path).name)
if webcam:
print('webcam frame %g: ' % (i + 1), end='')
else:
print('image %g/%g %s: ' % (i + 1, len(dataloader), path), end='')
# Get detections # Get detections
img = torch.from_numpy(img).unsqueeze(0).to(device) img = torch.from_numpy(img).unsqueeze(0).to(device)
@ -83,12 +81,24 @@ def detect(
print('Done. (%.3fs)' % (time.time() - t)) print('Done. (%.3fs)' % (time.time() - t))
if save_images: # Save generated image with detections
cv2.imwrite(save_path, im0)
if webcam: # Show live webcam if webcam: # Show live webcam
cv2.imshow(weights, im0) cv2.imshow(weights, im0)
if save_images: # Save generated image with detections
if dataloader.mode == 'video':
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = vid_cap.get(cv2.CAP_PROP_FPS)
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'avc1'), fps, (width, height))
vid_writer.write(im0)
else:
cv2.imwrite(save_path, im0)
if save_images and platform == 'darwin': # macos if save_images and platform == 'darwin': # macos
os.system('open ' + output + ' ' + save_path) os.system('open ' + output + ' ' + save_path)
@ -96,10 +106,11 @@ def detect(
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path') parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('--data-cfg', type=str, default='cfg/coco.data', help='coco.data file path')
parser.add_argument('--weights', type=str, default='weights/yolov3.weights', help='path to weights file') parser.add_argument('--weights', type=str, default='weights/yolov3.weights', help='path to weights file')
parser.add_argument('--images', type=str, default='data/samples', help='path to images') parser.add_argument('--images', type=str, default='data/samples', help='path to images')
parser.add_argument('--img-size', type=int, default=32 * 13, help='size of each image dimension') parser.add_argument('--img-size', type=int, default=32 * 13, help='size of each image dimension')
parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
opt = parser.parse_args() opt = parser.parse_args()
print(opt) print(opt)
@ -107,6 +118,7 @@ if __name__ == '__main__':
with torch.no_grad(): with torch.no_grad():
detect( detect(
opt.cfg, opt.cfg,
opt.data_cfg,
opt.weights, opt.weights,
opt.images, opt.images,
img_size=opt.img_size, img_size=opt.img_size,

55
test.py
View File

@ -35,19 +35,19 @@ def test(
if torch.cuda.device_count() > 1: if torch.cuda.device_count() > 1:
model = nn.DataParallel(model) model = nn.DataParallel(model)
else: else:
device = next(model.parameters()).device device = next(model.parameters()).device # get model device
# Configure run # Configure run
data_cfg = parse_data_cfg(data_cfg) data_cfg = parse_data_cfg(data_cfg)
test_path = data_cfg['valid'] test_path = data_cfg['valid']
# if (os.sep + 'coco' + os.sep) in test_path: # COCO dataset probable if (os.sep + 'coco' + os.sep) in test_path: # COCO dataset probable
# save_json = True # use pycocotools save_json = True # use pycocotools
# Dataloader # Dataloader
dataset = LoadImagesAndLabels(test_path, img_size=img_size) dataset = LoadImagesAndLabels(test_path, img_size=img_size)
dataloader = DataLoader(dataset, dataloader = DataLoader(dataset,
batch_size=batch_size, batch_size=batch_size,
num_workers=4, num_workers=0,
pin_memory=False, pin_memory=False,
collate_fn=dataset.collate_fn) collate_fn=dataset.collate_fn)
@ -66,16 +66,16 @@ def test(
# Per image # Per image
for si, pred in enumerate(output): for si, pred in enumerate(output):
image_id = int(Path(paths[si]).stem.split('_')[-1])
labels = targets[targets[:, 0] == si, 1:] labels = targets[targets[:, 0] == si, 1:]
correct, detected, tcls = [], [], []
seen += 1 seen += 1
if pred is None: if pred is None:
continue continue
if save_json: if save_json: # add to json pred dictionary
# add to json pred dictionary
# [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
image_id = int(Path(paths[si]).stem.split('_')[-1])
box = pred[:, :4].clone() # xyxy box = pred[:, :4].clone() # xyxy
scale_coords(img_size, box, shapes[si]) # to original shape scale_coords(img_size, box, shapes[si]) # to original shape
box = xyxy2xywh(box) # xywh box = xyxy2xywh(box) # xywh
@ -88,42 +88,21 @@ def test(
'score': float(d[4]) 'score': float(d[4])
}) })
# if len(labels) > 0:
# # add to json targets dictionary
# # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], ...
# box = labels[:, 1:].clone()
# box[:, [0, 2]] *= shapes[si][1] # scale width
# box[:, [1, 3]] *= shapes[si][0] # scale height
# box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
# for di, d in enumerate(labels):
# tdict.append({
# 'segmentation': [[]],
# 'iscrowd': 0,
# 'image_id': image_id,
# 'category_id': coco91class[int(d[0])],
# 'id': seen,
# 'bbox': [float3(x) for x in box[di]],
# 'area': float3(box[di][2:4].prod())
# })
# If no labels add number of detections as incorrect # If no labels add number of detections as incorrect
correct = []
detected = []
if len(labels) == 0: if len(labels) == 0:
# correct.extend([0 for _ in range(len(detections))]) correct.extend([0] * len(pred))
continue
else: else:
# Extract target boxes as (x1, y1, x2, y2) # Extract target boxes as (x1, y1, x2, y2)
target_box = xywh2xyxy(labels[:, 1:5]) * img_size tbox = xywh2xyxy(labels[:, 1:5]) * img_size
target_cls = labels[:, 0] tcls = labels[:, 0].cpu()
for *pred_box, conf, cls_conf, cls_pred in pred: for *pbox, pconf, pcls_conf, pcls in pred:
if cls_pred not in target_cls: if pcls not in tcls:
correct.append(0) correct.append(0)
continue continue
# Best iou, index between pred and targets # Best iou, index between pred and targets
iou, bi = bbox_iou(pred_box, target_box).max(0) iou, bi = bbox_iou(pbox, tbox).max(0)
# If iou > threshold and class is correct mark as correct # If iou > threshold and class is correct mark as correct
if iou > iou_thres and bi not in detected: if iou > iou_thres and bi not in detected:
@ -132,12 +111,8 @@ def test(
else: else:
correct.append(0) correct.append(0)
# Convert to Numpy # Append Statistics (correct, conf, pcls, tcls)
tp = np.array(correct) stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
conf = pred[:, 4].cpu().numpy()
pred_cls = pred[:, 6].cpu().numpy()
target_cls = target_cls.cpu().numpy()
stats.append((tp, conf, pred_cls, target_cls))
# Compute means # Compute means
stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))] stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))]

View File

@ -119,7 +119,7 @@ def train(
plot_images = False plot_images = False
if plot_images: if plot_images:
fig = plt.figure(figsize=(10, 10)) fig = plt.figure(figsize=(10, 10))
for ip in range(batch_size): for ip in range(len(imgs)):
boxes = xywh2xyxy(targets[targets[:, 0] == ip, 2:6]).numpy().T * img_size boxes = xywh2xyxy(targets[targets[:, 0] == ip, 2:6]).numpy().T * img_size
plt.subplot(4, 4, ip + 1).imshow(imgs[ip].numpy().transpose(1, 2, 0)) plt.subplot(4, 4, ip + 1).imshow(imgs[ip].numpy().transpose(1, 2, 0))
plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-') plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-')

View File

@ -16,31 +16,61 @@ from utils.utils import xyxy2xywh
class LoadImages: # for inference class LoadImages: # for inference
def __init__(self, path, img_size=416): def __init__(self, path, img_size=416):
if os.path.isdir(path):
image_format = ['.jpg', '.jpeg', '.png', '.tif']
self.files = sorted(glob.glob('%s/*.*' % path))
self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files))
elif os.path.isfile(path):
self.files = [path]
self.nF = len(self.files) # number of image files
self.height = img_size self.height = img_size
img_formats = ['.jpg', '.jpeg', '.png', '.tif']
vid_formats = ['.mov', '.avi', '.mp4']
assert self.nF > 0, 'No images found in ' + path files = []
if os.path.isdir(path):
files = sorted(glob.glob('%s/*.*' % path))
elif os.path.isfile(path):
files = [path]
# self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in img_formats, files))
images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
self.files = images + videos
self.nI, self.nV = len(images), len(videos)
self.nF = self.nI + self.nV # number of files
self.video_flag = [False] * self.nI + [True] * self.nV
self.mode = 'images'
if any(videos):
self.new_video(videos[0]) # new video
else:
self.cap = None
assert self.nF > 0, 'No images or videos found in ' + path
def __iter__(self): def __iter__(self):
self.count = -1 self.count = 0
return self return self
def __next__(self): def __next__(self):
self.count += 1
if self.count == self.nF: if self.count == self.nF:
raise StopIteration raise StopIteration
img_path = self.files[self.count] path = self.files[self.count]
if self.video_flag[self.count]:
self.mode = 'video'
ret_val, img0 = self.cap.read()
if not ret_val:
self.count += 1
self.cap.release()
if self.count == self.nF: # last video
raise StopIteration
else:
path = self.files[self.count]
self.new_video(path)
ret_val, img0 = self.cap.read()
self.frame += 1
print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
else:
# Read image # Read image
img0 = cv2.imread(img_path) # BGR self.count += 1
assert img0 is not None, 'File Not Found ' + img_path img0 = cv2.imread(path) # BGR
assert img0 is not None, 'File Not Found ' + path
print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
# Padded resize # Padded resize
img, _, _, _ = letterbox(img0, height=self.height) img, _, _, _ = letterbox(img0, height=self.height)
@ -50,8 +80,13 @@ class LoadImages: # for inference
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0 img /= 255.0 # 0 - 255 to 0.0 - 1.0
# cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
return img_path, img, img0 return path, img, img0, self.cap
def new_video(self, path):
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
def __len__(self): def __len__(self):
return self.nF # number of files return self.nF # number of files

View File

@ -163,15 +163,18 @@ def ap_per_class(tp, conf, pred_cls, target_cls):
# Recall # Recall
recall_curve = tpc / (n_gt + 1e-16) recall_curve = tpc / (n_gt + 1e-16)
r.append(tpc[-1] / (n_gt + 1e-16)) r.append(recall_curve[-1])
# Precision # Precision
precision_curve = tpc / (tpc + fpc) precision_curve = tpc / (tpc + fpc)
p.append(tpc[-1] / (tpc[-1] + fpc[-1])) p.append(precision_curve[-1])
# AP from recall-precision curve # AP from recall-precision curve
ap.append(compute_ap(recall_curve, precision_curve)) ap.append(compute_ap(recall_curve, precision_curve))
# Plot
# plt.plot(recall_curve, precision_curve)
return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p) return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p)