diff --git a/Dockerfile b/Dockerfile
index f21cf49e..da91e035 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,7 @@
 FROM nvcr.io/nvidia/pytorch:20.03-py3
 
 # Install dependencies (pip or conda)
-RUN pip install -U gsutil
+RUN pip install -U gsutil thop
 # RUN pip install -U -r requirements.txt
 # RUN conda update -n base -c defaults conda
 # RUN conda install -y -c anaconda future numpy opencv matplotlib tqdm pillow
diff --git a/models.py b/models.py
index 88107179..fdde9dd7 100755
--- a/models.py
+++ b/models.py
@@ -23,13 +23,21 @@ def create_modules(module_defs, img_size):
             filters = mdef['filters']
             size = mdef['size']
             stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x'])
-            modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1],
-                                                   out_channels=filters,
-                                                   kernel_size=size,
-                                                   stride=stride,
-                                                   padding=(size - 1) // 2 if mdef['pad'] else 0,
-                                                   groups=mdef['groups'] if 'groups' in mdef else 1,
-                                                   bias=not bn))
+            if isinstance(size, int):  # single-size conv
+                modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1],
+                                                       out_channels=filters,
+                                                       kernel_size=size,
+                                                       stride=stride,
+                                                       padding=(size - 1) // 2 if mdef['pad'] else 0,
+                                                       groups=mdef['groups'] if 'groups' in mdef else 1,
+                                                       bias=not bn))
+            else:  # multiple-size conv
+                modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1],
+                                                          out_ch=filters,
+                                                          k=size,
+                                                          stride=stride,
+                                                          bias=not bn))
+
             if bn:
                 modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4))
             else:
diff --git a/utils/layers.py b/utils/layers.py
index cb72d67b..1f19279d 100644
--- a/utils/layers.py
+++ b/utils/layers.py
@@ -35,6 +35,35 @@ class WeightedFeatureFusion(nn.Module):  # weighted sum of 2 or more layers http
         return x
 
 
+class MixConv2d(nn.Module):  # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595
+    def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'):
+        super(MixConv2d, self).__init__()
+
+        groups = len(k)
+        if method == 'equal_ch':  # equal channels per group
+            i = torch.linspace(0, groups - 1E-6, out_ch).floor()  # out_ch indices
+            ch = [(i == g).sum() for g in range(groups)]
+        else:  # 'equal_params': equal parameter count per group
+            b = [out_ch] + [0] * groups
+            a = np.eye(groups + 1, groups, k=-1)
+            a -= np.roll(a, 1, axis=1)
+            a *= np.array(k) ** 2
+            a[0] = 1
+            ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int)  # solve for equal weight indices, ax = b
+
+        self.m = nn.ModuleList([torch.nn.Conv2d(in_channels=in_ch,
+                                                out_channels=ch[g],
+                                                kernel_size=k[g],
+                                                stride=stride,
+                                                padding=(k[g] - 1) // 2,  # 'same' pad
+                                                dilation=dilation,
+                                                bias=bias) for g in range(groups)])
+
+    def forward(self, x):
+        return torch.cat([m(x) for m in self.m], 1)
+
+
+# Activation functions below -------------------------------------------------------------------------------------------
 class SwishImplementation(torch.autograd.Function):
     @staticmethod
     def forward(ctx, i):
diff --git a/utils/parse_config.py b/utils/parse_config.py
index 36ea42d7..4208748e 100644
--- a/utils/parse_config.py
+++ b/utils/parse_config.py
@@ -27,7 +27,7 @@ def parse_model_cfg(path):
 
             if key == 'anchors':  # return nparray
                 mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2))  # np anchors
-            elif key in ['from', 'layers', 'mask']:  # return array
+            elif (key in ['from', 'layers', 'mask']) or (key == 'size' and ',' in val):  # return array
                 mdefs[-1][key] = [int(x) for x in val.split(',')]
             else:
                 val = val.strip()