diff --git a/Dockerfile b/Dockerfile index f21cf49e..da91e035 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM nvcr.io/nvidia/pytorch:20.03-py3 # Install dependencies (pip or conda) -RUN pip install -U gsutil +RUN pip install -U gsutil thop # RUN pip install -U -r requirements.txt # RUN conda update -n base -c defaults conda # RUN conda install -y -c anaconda future numpy opencv matplotlib tqdm pillow diff --git a/models.py b/models.py index 88107179..fdde9dd7 100755 --- a/models.py +++ b/models.py @@ -23,13 +23,21 @@ def create_modules(module_defs, img_size): filters = mdef['filters'] size = mdef['size'] stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x']) - modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1], - out_channels=filters, - kernel_size=size, - stride=stride, - padding=(size - 1) // 2 if mdef['pad'] else 0, - groups=mdef['groups'] if 'groups' in mdef else 1, - bias=not bn)) + if isinstance(size, int): # single-size conv + modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1], + out_channels=filters, + kernel_size=size, + stride=stride, + padding=(size - 1) // 2 if mdef['pad'] else 0, + groups=mdef['groups'] if 'groups' in mdef else 1, + bias=not bn)) + else: # multiple-size conv + modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1], + out_ch=filters, + k=size, + stride=stride, + bias=not bn)) + if bn: modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4)) else: diff --git a/utils/layers.py b/utils/layers.py index cb72d67b..1f19279d 100644 --- a/utils/layers.py +++ b/utils/layers.py @@ -35,6 +35,35 @@ class WeightedFeatureFusion(nn.Module): # weighted sum of 2 or more layers http return x +class MixConv2d(nn.Module): # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595 + def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'): + super(MixConv2d, self).__init__() + + groups = len(k) + if method == 'equal_ch': # equal channels per group + i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices + ch = [(i == g).sum() for g in range(groups)] + else: # 'equal_params': equal parameter count per group + b = [out_ch] + [0] * groups + a = np.eye(groups + 1, groups, k=-1) + a -= np.roll(a, 1, axis=1) + a *= np.array(k) ** 2 + a[0] = 1 + ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b + + self.m = nn.ModuleList([torch.nn.Conv2d(in_channels=in_ch, + out_channels=ch[g], + kernel_size=k[g], + stride=stride, + padding=(k[g] - 1) // 2, # 'same' pad + dilation=dilation, + bias=bias) for g in range(groups)]) + + def forward(self, x): + return torch.cat([m(x) for m in self.m], 1) + + +# Activation functions below ------------------------------------------------------------------------------------------- class SwishImplementation(torch.autograd.Function): @staticmethod def forward(ctx, i): diff --git a/utils/parse_config.py b/utils/parse_config.py index 36ea42d7..4208748e 100644 --- a/utils/parse_config.py +++ b/utils/parse_config.py @@ -27,7 +27,7 @@ def parse_model_cfg(path): if key == 'anchors': # return nparray mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors - elif key in ['from', 'layers', 'mask']: # return array + elif (key in ['from', 'layers', 'mask']) or (key == 'size' and ',' in val): # return array mdefs[-1][key] = [int(x) for x in val.split(',')] else: val = val.strip()