This commit is contained in:
Glenn Jocher 2019-04-24 21:23:54 +02:00
parent 9c0cde69d5
commit 83793ffb2b
3 changed files with 52 additions and 22 deletions

View File

@ -44,7 +44,7 @@ def test(
names = load_classes(data_cfg['names']) # class names
# Dataloader
dataset = LoadImagesAndLabels(test_path, img_size=img_size)
dataset = LoadImagesAndLabels(test_path, img_size, batch_size)
dataloader = DataLoader(dataset,
batch_size=batch_size,
num_workers=4,

View File

@ -119,7 +119,7 @@ def train(
# plt.savefig('LR.png', dpi=300)
# Dataset
dataset = LoadImagesAndLabels(train_path, img_size=img_size, augment=True)
dataset = LoadImagesAndLabels(train_path, img_size, batch_size, augment=True)
# Initialize distributed training
if torch.cuda.device_count() > 1:
@ -131,7 +131,7 @@ def train(
dataloader = DataLoader(dataset,
batch_size=batch_size,
num_workers=opt.num_workers,
shuffle=True,
shuffle=False,
pin_memory=True,
collate_fn=dataset.collate_fn)

View File

@ -74,7 +74,7 @@ class LoadImages: # for inference
print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
# Padded resize
img, _, _, _ = letterbox(img0, height=self.height)
img, _, _, _ = letterbox(img0, new_shape=self.height)
print('%gx%g ' % img.shape[:2], end='') # print image size
# Normalize RGB
@ -116,7 +116,7 @@ class LoadWebcam: # for inference
img0 = cv2.flip(img0, 1) # flip left-right
# Padded resize
img, _, _, _ = letterbox(img0, height=self.height)
img, _, _, _ = letterbox(img0, new_shape=self.height)
# Normalize RGB
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
@ -130,7 +130,7 @@ class LoadWebcam: # for inference
class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, img_size=416, augment=False):
def __init__(self, path, img_size=416, batch_size=16, augment=False):
with open(path, 'r') as f:
img_files = f.read().splitlines()
self.img_files = list(filter(lambda x: len(x) > 0, img_files))
@ -143,17 +143,35 @@ class LoadImagesAndLabels(Dataset): # for training/testing
x.replace('images', 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')
for x in self.img_files]
# sort dataset by aspect ratio for rectangular training
self.rectangle = False
if self.rectangle:
# Rectangular Training https://github.com/ultralytics/yolov3/issues/232
self.train_rectangular = True
if self.train_rectangular:
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
nb = bi[-1] # number of batches
from PIL import Image
# Read image aspect ratios
s = np.array([Image.open(f).size for f in tqdm(self.img_files, desc='Reading image shapes')])
ar = s[:, 1] / s[:, 0] # aspect ratio
# Sort by aspect ratio
i = ar.argsort()
ar = ar[i]
self.img_files = [self.img_files[i] for i in i]
self.label_files = [self.label_files[i] for i in i]
self.ar = ar[i]
# Set training image shapes
shapes = [[1, 1]] * nb
for i in range(nb):
ari = ar[bi == i]
mini, maxi = ari.min(), ari.max()
if maxi < 1:
shapes[i] = [maxi, 1]
elif mini > 1:
shapes[i] = [1, 1 / mini]
self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32.).astype(np.int) * 32
self.batch = bi # batch index of image
# if n < 200: # preload all images into memory if possible
# self.imgs = [cv2.imread(img_files[i]) for i in range(n)]
@ -187,8 +205,13 @@ class LoadImagesAndLabels(Dataset): # for training/testing
img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
# Letterbox
h, w, _ = img.shape
img, ratio, padw, padh = letterbox(img, height=self.img_size, mode='square')
if self.train_rectangular:
new_shape = self.batch_shapes[self.batch[index]]
img, ratio, padw, padh = letterbox(img, new_shape=new_shape, mode='rect')
else:
img, ratio, padw, padh = letterbox(img, new_shape=self.img_size, mode='square')
# Load labels
labels = []
@ -248,23 +271,30 @@ class LoadImagesAndLabels(Dataset): # for training/testing
return torch.stack(img, 0), torch.cat(label, 0), path, hw
def letterbox(img, height=416, color=(127.5, 127.5, 127.5), mode='rect'):
def letterbox(img, new_shape=416, color=(127.5, 127.5, 127.5), mode='auto'):
# Resize a rectangular image to a 32 pixel multiple rectangle
shape = img.shape[:2] # shape = [height, width]
ratio = float(height) / max(shape) # ratio = old / new
new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height]
# https://github.com/ultralytics/yolov3/issues/232
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
ratio = float(new_shape) / max(shape)
else:
ratio = max(new_shape) / max(shape) # ratio = new / old
new_unpad = (int(round(shape[1] * ratio)), int(round(shape[0] * ratio)))
# Select padding https://github.com/ultralytics/yolov3/issues/232
if mode is 'rect': # rectangle
dw = np.mod(height - new_shape[0], 32) / 2 # width padding
dh = np.mod(height - new_shape[1], 32) / 2 # height padding
# Compute padding https://github.com/ultralytics/yolov3/issues/232
if mode is 'auto': # minimum rectangle
dw = np.mod(new_shape - new_unpad[0], 32) / 2 # width padding
dh = np.mod(new_shape - new_unpad[1], 32) / 2 # height padding
elif mode is 'square': # square
dw = (height - new_shape[0]) / 2 # width padding
dh = (height - new_shape[1]) / 2 # height padding
dw = (new_shape - new_unpad[0]) / 2 # width padding
dh = (new_shape - new_unpad[1]) / 2 # height padding
elif mode is 'rect': # square
dw = (new_shape[1] - new_unpad[0]) / 2 # width padding
dh = (new_shape[0] - new_unpad[1]) / 2 # height padding
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square
return img, ratio, dw, dh