multi_thread dataloader

This commit is contained in:
Glenn Jocher 2019-03-21 14:48:40 +02:00
parent be38caf284
commit 70fe2204b4
4 changed files with 21 additions and 14 deletions

View File

@ -174,9 +174,6 @@ class Darknet(nn.Module):
self.module_defs[0]['cfg'] = cfg_path self.module_defs[0]['cfg'] = cfg_path
self.module_defs[0]['height'] = img_size self.module_defs[0]['height'] = img_size
self.hyperparams, self.module_list = create_modules(self.module_defs) self.hyperparams, self.module_list = create_modules(self.module_defs)
self.img_size = img_size
self.loss_names = ['loss', 'xy', 'wh', 'conf', 'cls', 'nT']
self.losses = []
def forward(self, x, var=None): def forward(self, x, var=None):
img_size = x.shape[-1] img_size = x.shape[-1]

View File

@ -1,6 +1,8 @@
import argparse import argparse
import time import time
from torch.utils.data import DataLoader
import test # Import test.py to get mAP after each epoch import test # Import test.py to get mAP after each epoch
from models import * from models import *
from utils.datasets import * from utils.datasets import *
@ -17,6 +19,7 @@ def train(
accumulate=1, accumulate=1,
multi_scale=False, multi_scale=False,
freeze_backbone=False, freeze_backbone=False,
num_workers=0
): ):
weights = 'weights' + os.sep weights = 'weights' + os.sep
latest = weights + 'latest.pt' latest = weights + 'latest.pt'
@ -38,10 +41,11 @@ def train(
lr0 = 0.001 # initial learning rate lr0 = 0.001 # initial learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=.9) optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=.9)
# Get dataloader # Dataloader
dataloader = LoadImagesAndLabels(train_path, batch_size, img_size, augment=True) if num_workers > 0:
# from torch.utils.data import DataLoader cv2.setNumThreads(0) # to prevent OpenCV from multithreading
# dataloader = DataLoader(dataloader, batch_size=batch_size, num_workers=1) dataset = LoadImagesAndLabels(train_path, img_size=img_size, augment=True)
dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)
cutoff = -1 # backbone reaches to cutoff layer cutoff = -1 # backbone reaches to cutoff layer
start_epoch = 0 start_epoch = 0
@ -102,7 +106,6 @@ def train(
ui = -1 ui = -1
rloss = defaultdict(float) rloss = defaultdict(float)
for i, (imgs, targets, _, _) in enumerate(dataloader): for i, (imgs, targets, _, _) in enumerate(dataloader):
if targets.shape[1] == 100: # multithreaded 100-size block if targets.shape[1] == 100: # multithreaded 100-size block
targets = targets.view((-1, 6)) targets = targets.view((-1, 6))
targets = targets[targets[:, 5].nonzero().squeeze()] targets = targets[targets[:, 5].nonzero().squeeze()]
@ -150,8 +153,8 @@ def train(
# Multi-Scale training (320 - 608 pixels) every 10 batches # Multi-Scale training (320 - 608 pixels) every 10 batches
if multi_scale and (i + 1) % 10 == 0: if multi_scale and (i + 1) % 10 == 0:
dataloader.img_size = random.choice(range(10, 20)) * 32 dataset.img_size = random.choice(range(10, 20)) * 32
print('multi_scale img_size = %g' % dataloader.img_size) print('multi_scale img_size = %g' % dataset.img_size)
# Update best loss # Update best loss
if rloss['total'] < best_loss: if rloss['total'] < best_loss:
@ -194,6 +197,7 @@ if __name__ == '__main__':
parser.add_argument('--multi-scale', action='store_true', help='random image sizes per batch 320 - 608') parser.add_argument('--multi-scale', action='store_true', help='random image sizes per batch 320 - 608')
parser.add_argument('--img-size', type=int, default=32 * 13, help='pixels') parser.add_argument('--img-size', type=int, default=32 * 13, help='pixels')
parser.add_argument('--resume', action='store_true', help='resume training flag') parser.add_argument('--resume', action='store_true', help='resume training flag')
parser.add_argument('--num_workers', type=int, default=0, help='number of Pytorch DataLoader workers')
opt = parser.parse_args() opt = parser.parse_args()
print(opt, end='\n\n') print(opt, end='\n\n')
@ -208,4 +212,5 @@ if __name__ == '__main__':
batch_size=opt.batch_size, batch_size=opt.batch_size,
accumulate=opt.accumulate, accumulate=opt.accumulate,
multi_scale=opt.multi_scale, multi_scale=opt.multi_scale,
num_workers=opt.num_workers
) )

View File

@ -7,7 +7,6 @@ import cv2
import numpy as np import numpy as np
import torch import torch
# from torch.utils.data import Dataset
from utils.utils import xyxy2xywh from utils.utils import xyxy2xywh
@ -114,10 +113,11 @@ class LoadImagesAndLabels: # for training
def __getitem__(self, index): def __getitem__(self, index):
imgs, labels0, img_paths, img_shapes = self.load_images(index, index + 1) imgs, labels0, img_paths, img_shapes = self.load_images(index, index + 1)
labels0[:,0] = index % self.batch_size
labels0[:, 0] = index % self.batch_size
labels = torch.zeros(100, 6) labels = torch.zeros(100, 6)
labels[:min(len(labels0), 100)] = labels0 # max 100 labels per image labels[:min(len(labels0), 100)] = labels0 # max 100 labels per image
return imgs.squeeze(0), labels, img_paths, img_shapes return imgs.squeeze(0), labels, img_paths, img_shapes
def __next__(self): def __next__(self):
@ -225,7 +225,12 @@ class LoadImagesAndLabels: # for training
img_all = np.ascontiguousarray(img_all, dtype=np.float32) # uint8 to float32 img_all = np.ascontiguousarray(img_all, dtype=np.float32) # uint8 to float32
img_all /= 255.0 # 0 - 255 to 0.0 - 1.0 img_all /= 255.0 # 0 - 255 to 0.0 - 1.0
labels_all = torch.from_numpy(np.concatenate(labels_all, 0)) if len(labels_all) > 0:
labels_all = np.concatenate(labels_all, 0)
else:
labels_all = np.zeros((1, 6), dtype='float32')
labels_all = torch.from_numpy(labels_all)
return torch.from_numpy(img_all), labels_all, img_paths, img_shapes return torch.from_numpy(img_all), labels_all, img_paths, img_shapes
def __len__(self): def __len__(self):

View File

@ -40,7 +40,7 @@ def model_info(model):
print('\n%5s %38s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) print('\n%5s %38s %9s %12s %20s %12s %12s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
for i, (name, p) in enumerate(model.named_parameters()): for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace('module_list.', '') name = name.replace('module_list.', '')
print('%5g %38s %9s %12g %20s %12.3g %12.3g' % ( print('%5g %40s %9s %12g %20s %10.3g %10.3g' % (
i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g)) print('Model Summary: %g layers, %g parameters, %g gradients' % (i + 1, n_p, n_g))