diff --git a/models.py b/models.py index bc9c7d0e..87b02cc5 100755 --- a/models.py +++ b/models.py @@ -64,7 +64,7 @@ def create_modules(module_defs): anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] nc = int(module_def['classes']) # number of classes - img_size = int(hyperparams['height']) + img_size = hyperparams['height'] # Define detection layer yolo_layer = YOLOLayer(anchors, nc, img_size, yolo_layer_count, cfg=hyperparams['cfg']) modules.add_module('yolo_%d' % i, yolo_layer) @@ -103,38 +103,37 @@ class YOLOLayer(nn.Module): def __init__(self, anchors, nc, img_size, yolo_layer, cfg): super(YOLOLayer, self).__init__() - self.anchors = torch.FloatTensor(anchors) + self.anchors = torch.Tensor(anchors) self.na = len(anchors) # number of anchors (3) self.nc = nc # number of classes (80) self.img_size = 0 - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - create_grids(self, 32, 1, device=device) if ONNX_EXPORT: # grids must be computed in __init__ stride = [32, 16, 8][yolo_layer] # stride of this layer if cfg.endswith('yolov3-tiny.cfg'): stride *= 2 - ng = int(img_size / stride) # number grid points - create_grids(self, img_size, ng) + ng = (int(img_size[0] / stride), int(img_size[1] / stride)) # number grid points + create_grids(self, max(img_size), ng) def forward(self, p, img_size, var=None): if ONNX_EXPORT: - bs, ng = 1, self.ng # batch size, grid size + bs = 1 # batch size else: - bs, ng = p.shape[0], p.shape[-1] + bs, nx, ny = p.shape[0], p.shape[-2], p.shape[-1] if self.img_size != img_size: - create_grids(self, img_size, ng, p.device) + create_grids(self, img_size, (nx, ny), p.device) # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) - p = p.view(bs, self.na, self.nc + 5, ng, ng).permute(0, 1, 3, 4, 2).contiguous() # prediction + p = p.view(bs, self.na, self.nc + 5, self.nx, self.ny).permute(0, 1, 3, 4, 2).contiguous() # prediction if self.training: return p elif ONNX_EXPORT: + ngu = self.ng.view((1, 1, 2)) grid_xy = self.grid_xy.repeat((1, self.na, 1, 1, 1)).view((1, -1, 2)) - anchor_wh = self.anchor_wh.repeat((1, 1, ng, ng, 1)).view((1, -1, 2)) / ng + anchor_wh = self.anchor_wh.repeat((1, 1, self.nx, self.ny, 1)).view((1, -1, 2)) / self.nx # p = p.view(-1, 5 + self.nc) # xy = torch.sigmoid(p[..., 0:2]) + grid_xy[0] # x, y @@ -153,7 +152,7 @@ class YOLOLayer(nn.Module): p_cls = torch.exp(p_cls).permute((2, 1, 0)) p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0)) # F.softmax() equivalent p_cls = p_cls.permute(2, 1, 0) - return torch.cat((xy / ng, wh, p_conf, p_cls), 2).squeeze().t() + return torch.cat((xy / self.nx, wh, p_conf, p_cls), 2).squeeze().t() else: # inference io = p.clone() # inference output @@ -234,9 +233,9 @@ def get_yolo_layers(model): def create_grids(self, img_size, ng, device='cpu'): - nx, ny = ng, ng # x and y grid size + nx, ny = ng # x and y grid size self.img_size = img_size - self.stride = img_size / nx + self.stride = img_size / max(ng) # build xy offsets yv, xv = torch.meshgrid([torch.arange(nx), torch.arange(ny)]) @@ -245,7 +244,7 @@ def create_grids(self, img_size, ng, device='cpu'): # build wh gains self.anchor_vec = self.anchors.to(device) / self.stride self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device) - self.ng = torch.Tensor([ng]).to(device) + self.ng = torch.Tensor(ng).to(device) self.nx = nx self.ny = ny diff --git a/test.py b/test.py index f218e7e9..e78d6be1 100644 --- a/test.py +++ b/test.py @@ -93,7 +93,7 @@ def test( # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[si]).stem.split('_')[-1]) box = pred[:, :4].clone() # xyxy - scale_coords(imgs[si].shape, box, shapes[si]) # to original shape + scale_coords(img_size, box, shapes[si]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for di, d in enumerate(pred): diff --git a/utils/utils.py b/utils/utils.py index bd348b9a..e25f1983 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -102,13 +102,13 @@ def xywh2xyxy(x): def scale_coords(img1_shape, coords, img0_shape): # Rescale coords1 (xyxy) from img1_shape to img0_shape - gain = max(img1_shape[1:3]) / max(img0_shape[:2]) # gain = old / new - pad_x = (img1_shape[2] - img0_shape[1] * gain) / 2 # width padding - pad_y = (img1_shape[1] - img0_shape[0] * gain) / 2 # height padding + gain = img1_shape / max(img0_shape[:2]) # gain = old / new + pad_x = np.mod(img1_shape - img0_shape[1] * gain, 32) / 2 # width padding + pad_y = np.mod(img1_shape - img0_shape[0] * gain, 32) / 2 # height padding coords[:, [0, 2]] -= pad_x coords[:, [1, 3]] -= pad_y coords[:, :4] /= gain - coords[:, :4] = torch.clamp(coords[:, :4], min=0) + coords[:, :4] = coords[:, :4].clamp(min=0) return coords