From 88eea8f14788452ddd38b8ef6918d3cf7ed6771a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 25 Jul 2019 18:18:40 +0200 Subject: [PATCH] updates --- models.py | 1 + utils/datasets.py | 13 +++++++------ utils/utils.py | 6 ++++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/models.py b/models.py index a951a1a3..692199cd 100755 --- a/models.py +++ b/models.py @@ -139,6 +139,7 @@ class YOLOLayer(nn.Module): return torch.cat((xy / ngu, wh, p_conf, p_cls), 2).squeeze().t() else: # inference + # s = 1.5 # scale_xy (pxy = pxy * s - (s - 1) / 2) io = p.clone() # inference output io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method diff --git a/utils/datasets.py b/utils/datasets.py index b4161b6b..b147d25d 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -40,8 +40,6 @@ def exif_size(img): class LoadImages: # for inference def __init__(self, path, img_size=416): - self.height = img_size - files = [] if os.path.isdir(path): files = sorted(glob.glob('%s/*.*' % path)) @@ -52,6 +50,7 @@ class LoadImages: # for inference videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats] nI, nV = len(images), len(videos) + self.img_size = img_size self.files = images + videos self.nF = nI + nV # number of files self.video_flag = [False] * nI + [True] * nV @@ -96,7 +95,7 @@ class LoadImages: # for inference print('image %g/%g %s: ' % (self.count, self.nF, path), end='') # Padded resize - img, *_ = letterbox(img0, new_shape=self.height) + img, *_ = letterbox(img0, new_shape=self.img_size) # Normalize RGB img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB @@ -117,8 +116,10 @@ class LoadImages: # for inference class LoadWebcam: # for inference def __init__(self, img_size=416): - self.cam = cv2.VideoCapture(0) - self.height = img_size + self.img_size = img_size + self.cam = cv2.VideoCapture(0) # local camera + # self.cam = cv2.VideoCapture('rtsp://192.168.1.64/1') # IP camera + # self.cam = cv2.VideoCapture('rtsp://username:password@192.168.1.64/1') # IP camera with login def __iter__(self): self.count = -1 @@ -138,7 +139,7 @@ class LoadWebcam: # for inference print('webcam %g: ' % self.count, end='') # Padded resize - img, *_ = letterbox(img0, new_shape=self.height) + img, *_ = letterbox(img0, new_shape=self.img_size) # Normalize RGB img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB diff --git a/utils/utils.py b/utils/utils.py index dd7018d9..6ec36a50 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -304,12 +304,14 @@ def compute_loss(p, targets, model, giou_loss=True): # predictions, targets, mo tobj[b, a, gj, gi] = 1.0 # obj # pi[..., 2:4] = torch.sigmoid(pi[..., 2:4]) # wh power loss (uncomment) + # s = 1.5 # scale_xy + pxy = torch.sigmoid(pi[..., 0:2]) # * s - (s - 1) / 2 if giou_loss: - pbox = torch.cat((torch.sigmoid(pi[..., 0:2]), torch.exp(pi[..., 2:4]) * anchor_vec[i]), 1) # predicted + pbox = torch.cat((pxy, torch.exp(pi[..., 2:4]) * anchor_vec[i]), 1) # predicted giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True) # giou computation lxy += (k * h['giou']) * (1.0 - giou).mean() # giou loss else: - lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss + lxy += (k * h['xy']) * MSE(pxy, txy[i]) # xy loss lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i]) # wh yolo loss tclsm = torch.zeros_like(pi[..., 5:])