diff --git a/detect.py b/detect.py
index 7423d654..dc2a3dd3 100644
--- a/detect.py
+++ b/detect.py
@@ -34,6 +34,22 @@ def detect(
     else:  # darknet format
         _ = load_darknet_weights(model, weights)
 
+    # Fuse batchnorm
+    fuse = True
+    if fuse:
+        fused_list = nn.ModuleList()
+        for a in list(model.children())[0]:
+            for i, b in enumerate(a):
+                if isinstance(b, nn.modules.batchnorm.BatchNorm2d):
+                    # fuse this bn layer with the previous conv2d layer
+                    conv = a[i - 1]
+                    fused = torch_utils.fuse_conv_and_bn(conv, b)
+                    a = nn.Sequential(fused, *list(a.children())[i + 1:])
+                    break
+            fused_list.append(a)
+        model.module_list = fused_list
+        #model_info(model)  # yolov3-spp reduced from 225 to 152 layers
+
     model.to(device).eval()
 
     # Set Dataloader
diff --git a/models.py b/models.py
index ce4fad46..77abdd9b 100755
--- a/models.py
+++ b/models.py
@@ -63,10 +63,10 @@ def create_modules(module_defs):
             anchors = [float(x) for x in module_def['anchors'].split(',')]
             anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
             anchors = [anchors[i] for i in anchor_idxs]
-            nC = int(module_def['classes'])  # number of classes
+            nc = int(module_def['classes'])  # number of classes
             img_size = int(hyperparams['height'])
             # Define detection layer
-            yolo_layer = YOLOLayer(anchors, nC, img_size, yolo_layer_count, cfg=hyperparams['cfg'])
+            yolo_layer = YOLOLayer(anchors, nc, img_size, yolo_layer_count, cfg=hyperparams['cfg'])
             modules.add_module('yolo_%d' % i, yolo_layer)
             yolo_layer_count += 1
 
@@ -100,12 +100,12 @@ class Upsample(nn.Module):
 
 
 class YOLOLayer(nn.Module):
-    def __init__(self, anchors, nC, img_size, yolo_layer, cfg):
+    def __init__(self, anchors, nc, img_size, yolo_layer, cfg):
         super(YOLOLayer, self).__init__()
 
         self.anchors = torch.FloatTensor(anchors)
-        self.nA = len(anchors)  # number of anchors (3)
-        self.nC = nC  # number of classes (80)
+        self.na = len(anchors)  # number of anchors (3)
+        self.nc = nc  # number of classes (80)
         self.img_size = 0
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         create_grids(self, 32, 1, device=device)
@@ -115,35 +115,35 @@ class YOLOLayer(nn.Module):
             if cfg.endswith('yolov3-tiny.cfg'):
                 stride *= 2
 
-            nG = int(img_size / stride)  # number grid points
-            create_grids(self, img_size, nG)
+            ng = int(img_size / stride)  # number grid points
+            create_grids(self, img_size, ng)
 
     def forward(self, p, img_size, var=None):
         if ONNX_EXPORT:
-            bs, nG = 1, self.nG  # batch size, grid size
+            bs, ng = 1, self.ng  # batch size, grid size
         else:
-            bs, nG = p.shape[0], p.shape[-1]
+            bs, ng = p.shape[0], p.shape[-1]
             if self.img_size != img_size:
-                create_grids(self, img_size, nG, p.device)
+                create_grids(self, img_size, ng, p.device)
 
         # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85)  # (bs, anchors, grid, grid, classes + xywh)
-        p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous()  # prediction
+        p = p.view(bs, self.na, self.nc + 5, ng, ng).permute(0, 1, 3, 4, 2).contiguous()  # prediction
 
         if self.training:
             return p
 
         elif ONNX_EXPORT:
-            grid_xy = self.grid_xy.repeat((1, self.nA, 1, 1, 1)).view((1, -1, 2))
-            anchor_wh = self.anchor_wh.repeat((1, 1, nG, nG, 1)).view((1, -1, 2)) / nG
+            grid_xy = self.grid_xy.repeat((1, self.na, 1, 1, 1)).view((1, -1, 2))
+            anchor_wh = self.anchor_wh.repeat((1, 1, ng, ng, 1)).view((1, -1, 2)) / ng
 
-            # p = p.view(-1, 5 + self.nC)
+            # p = p.view(-1, 5 + self.nc)
             # xy = torch.sigmoid(p[..., 0:2]) + grid_xy[0]  # x, y
             # wh = torch.exp(p[..., 2:4]) * anchor_wh[0]  # width, height
             # p_conf = torch.sigmoid(p[:, 4:5])  # Conf
             # p_cls = F.softmax(p[:, 5:85], 1) * p_conf  # SSD-like conf
-            # return torch.cat((xy / nG, wh, p_conf, p_cls), 1).t()
+            # return torch.cat((xy / ng, wh, p_conf, p_cls), 1).t()
 
-            p = p.view(1, -1, 5 + self.nC)
+            p = p.view(1, -1, 5 + self.nc)
             xy = torch.sigmoid(p[..., 0:2]) + grid_xy  # x, y
             wh = torch.exp(p[..., 2:4]) * anchor_wh  # width, height
             p_conf = torch.sigmoid(p[..., 4:5])  # Conf
@@ -153,7 +153,7 @@ class YOLOLayer(nn.Module):
             p_cls = torch.exp(p_cls).permute((2, 1, 0))
             p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0))  # F.softmax() equivalent
             p_cls = p_cls.permute(2, 1, 0)
-            return torch.cat((xy / nG, wh, p_conf, p_cls), 2).squeeze().t()
+            return torch.cat((xy / ng, wh, p_conf, p_cls), 2).squeeze().t()
 
         else:  # inference
             io = p.clone()  # inference output
@@ -165,7 +165,7 @@ class YOLOLayer(nn.Module):
             io[..., :4] *= self.stride
 
             # reshape from [1, 3, 13, 13, 85] to [1, 507, 85]
-            return io.view(bs, -1, 5 + self.nC), p
+            return io.view(bs, -1, 5 + self.nc), p
 
 
 class Darknet(nn.Module):
@@ -218,19 +218,19 @@ def get_yolo_layers(model):
     return [i for i, x in enumerate(a) if x]  # [82, 94, 106] for yolov3
 
 
-def create_grids(self, img_size, nG, device='cpu'):
+def create_grids(self, img_size, ng, device='cpu'):
     self.img_size = img_size
-    self.stride = img_size / nG
+    self.stride = img_size / ng
 
     # build xy offsets
-    grid_x = torch.arange(nG).repeat((nG, 1)).view((1, 1, nG, nG)).float()
+    grid_x = torch.arange(ng).repeat((ng, 1)).view((1, 1, ng, ng)).float()
     grid_y = grid_x.permute(0, 1, 3, 2)
     self.grid_xy = torch.stack((grid_x, grid_y), 4).to(device)
 
     # build wh gains
     self.anchor_vec = self.anchors.to(device) / self.stride
-    self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2).to(device)
-    self.nG = torch.FloatTensor([nG]).to(device)
+    self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device)
+    self.ng = torch.FloatTensor([ng]).to(device)
 
 
 def load_darknet_weights(self, weights, cutoff=-1):
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index 52c64261..406d5ac7 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -26,3 +26,32 @@ def select_device(force_cpu=False):
                       (i, x[i].name, x[i].total_memory / c))
 
     return device
+
+
+def fuse_conv_and_bn(conv, bn):
+    # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
+    with torch.no_grad():
+        # init
+        fusedconv = torch.nn.Conv2d(
+            conv.in_channels,
+            conv.out_channels,
+            kernel_size=conv.kernel_size,
+            stride=conv.stride,
+            padding=conv.padding,
+            bias=True
+        )
+
+        # prepare filters
+        w_conv = conv.weight.clone().view(conv.out_channels, -1)
+        w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
+        fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
+
+        # prepare spatial bias
+        if conv.bias is not None:
+            b_conv = conv.bias
+        else:
+            b_conv = torch.zeros(conv.weight.size(0))
+        b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
+        fusedconv.bias.copy_(b_conv + b_bn)
+
+        return fusedconv
diff --git a/utils/utils.py b/utils/utils.py
index 183b9562..c78dcc50 100755
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -291,7 +291,7 @@ def build_targets(model, targets):
 
         # iou of targets-anchors
         t, a = targets, []
-        gwh = targets[:, 4:6] * layer.nG
+        gwh = targets[:, 4:6] * layer.ng
         if nt:
             iou = [wh_iou(x, gwh) for x in layer.anchor_vec]
             iou, a = torch.stack(iou, 0).max(0)  # best iou and anchor
@@ -304,7 +304,7 @@ def build_targets(model, targets):
 
         # Indices
         b, c = t[:, :2].long().t()  # target image, class
-        gxy = t[:, 2:4] * layer.nG
+        gxy = t[:, 2:4] * layer.ng
         gi, gj = gxy.long().t()  # grid_i, grid_j
         indices.append((b, a, gj, gi))
 
@@ -318,7 +318,7 @@ def build_targets(model, targets):
         # Class
         tcls.append(c)
         if c.shape[0]:
-            assert c.max() <= layer.nC, 'Target classes exceed model classes'
+            assert c.max() <= layer.nc, 'Target classes exceed model classes'
 
     return txy, twh, tcls, indices
 
@@ -442,12 +442,12 @@ def strip_optimizer_from_checkpoint(filename='weights/best.pt'):
 
 def coco_class_count(path='../coco/labels/train2014/'):
     # Histogram of occurrences per class
-    nC = 80  # number classes
-    x = np.zeros(nC, dtype='int32')
+    nc = 80  # number classes
+    x = np.zeros(nc, dtype='int32')
     files = sorted(glob.glob('%s/*.*' % path))
     for i, file in enumerate(files):
         labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
-        x += np.bincount(labels[:, 0].astype('int32'), minlength=nC)
+        x += np.bincount(labels[:, 0].astype('int32'), minlength=nc)
         print(i, len(files))