From 4816969933728afe44f05d4bfb95fdb2cb193b95 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sat, 20 Jul 2019 14:54:37 +0200 Subject: [PATCH] updates --- train.py | 21 ++++++++++++++------- utils/datasets.py | 32 ++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/train.py b/train.py index 87946dc6..b8f98a53 100644 --- a/train.py +++ b/train.py @@ -33,11 +33,17 @@ hyp = {'giou': 1.153, # giou loss gain 'cls_pw': 3.05, # cls BCELoss positive_weight 'obj': 20.93, # obj loss gain 'obj_pw': 2.842, # obj BCELoss positive_weight - 'iou_t': 0.2759, # iou target-anchor training threshold + 'iou_t': 0.2759, # iou training threshold 'lr0': 0.001357, # initial learning rate - 'lrf': -4., # final learning rate = lr0 * (10 ** lrf) + 'lrf': -4., # final LambdaLR learning rate = lr0 * (10 ** lrf) 'momentum': 0.916, # SGD momentum - 'weight_decay': 0.000572} # optimizer weight decay + 'weight_decay': 0.000572, # optimizer weight decay + 'hsv_s': 0.5, # image HSV-Saturation augmentation (fraction) + 'hsv_v': 0.5, # image HSV-Value augmentation (fraction) + 'degrees': 10, # image rotation (+/- deg) + 'translate': 0.1, # image translation (+/- fraction) + 'scale': 0.1, # image scale (+/- gain) + 'shear': 2} # image shear (+/- deg) # # Training hyperparameters e @@ -50,7 +56,7 @@ hyp = {'giou': 1.153, # giou loss gain # 'obj_pw': 2.634, # obj BCELoss positive_weight # 'iou_t': 0.273, # iou target-anchor training threshold # 'lr0': 0.001542, # initial learning rate -# 'lrf': -4., # final learning rate = lr0 * (10 ** lrf) +# 'lrf': -4., # final LambdaLR learning rate = lr0 * (10 ** lrf) # 'momentum': 0.8364, # SGD momentum # 'weight_decay': 0.0008393} # optimizer weight decay @@ -149,6 +155,7 @@ def train(cfg, img_size, batch_size, augment=True, + hyp=hyp, # augmentation hyperparameters rect=opt.rect) # rectangular training # Initialize distributed training @@ -375,14 +382,14 @@ if __name__ == '__main__': # Mutate init_seeds(seed=int(time.time())) - s = [.15, .15, .15, .15, .15, .15, .15, .15, .15, .00, .05, .10] # fractional sigmas + s = [.15, .15, .15, .15, .15, .15, .15, .15, .15, .00, .05, .10, .15, .15, .15, .15, .15, .15] # sigmas for i, k in enumerate(hyp.keys()): x = (np.random.randn(1) * s[i] + 1) ** 2.0 # plt.hist(x.ravel(), 300) hyp[k] *= float(x) # vary by 20% 1sigma # Clip to limits - keys = ['lr0', 'iou_t', 'momentum', 'weight_decay'] - limits = [(1e-4, 1e-2), (0.00, 0.70), (0.60, 0.95), (0, 0.01)] + keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale'] + limits = [(1e-4, 1e-2), (0.00, 0.70), (0.60, 0.95), (0, 0.001), (0, .8), (0, .8), (0, .8), (0, .8)] for k, v in zip(keys, limits): hyp[k] = np.clip(hyp[k], v[0], v[1]) diff --git a/utils/datasets.py b/utils/datasets.py index 12545a8a..305271b0 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -152,7 +152,7 @@ class LoadWebcam: # for inference class LoadImagesAndLabels(Dataset): # for training/testing - def __init__(self, path, img_size=416, batch_size=16, augment=False, rect=True, image_weights=False): + def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=True, image_weights=False): with open(path, 'r') as f: img_files = f.read().splitlines() self.img_files = [x for x in img_files if os.path.splitext(x)[-1].lower() in img_formats] @@ -166,6 +166,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing self.batch = bi # batch index of image self.img_size = img_size self.augment = augment + self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect @@ -271,6 +272,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing img_path = self.img_files[index] label_path = self.label_files[index] + hyp = self.hyp # Load image img = self.imgs[index] @@ -289,13 +291,12 @@ class LoadImagesAndLabels(Dataset): # for training/testing augment_hsv = True if self.augment and augment_hsv: # SV augmentation by 50% - fraction = 0.50 # must be < 1.0 img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # hue, sat, val S = img_hsv[:, :, 1].astype(np.float32) # saturation V = img_hsv[:, :, 2].astype(np.float32) # value - a = random.uniform(-1, 1) * fraction + 1 - b = random.uniform(-1, 1) * fraction + 1 + a = random.uniform(-1, 1) * hyp['hsv_s'] + 1 + b = random.uniform(-1, 1) * hyp['hsv_v'] + 1 S *= a V *= b @@ -331,7 +332,11 @@ class LoadImagesAndLabels(Dataset): # for training/testing # Augment image and labels if self.augment: - img, labels = random_affine(img, labels, degrees=(-3, 3), translate=(0.05, 0.05), scale=(0.90, 1.10)) + img, labels = random_affine(img, labels, + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear']) nL = len(labels) # number of labels if nL: @@ -410,8 +415,7 @@ def letterbox(img, new_shape=416, color=(128, 128, 128), mode='auto'): return img, ratiow, ratioh, dw, dh -def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2), - borderValue=(128, 128, 128)): +def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 @@ -423,24 +427,24 @@ def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale= # Rotation and Scale R = np.eye(3) - a = random.uniform(degrees[0], degrees[1]) + a = random.uniform(-degrees, degrees) # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations - s = random.uniform(scale[0], scale[1]) + s = random.uniform(1 - scale, 1 + scale) R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) # Translation T = np.eye(3) - T[0, 2] = random.uniform(-1, 1) * translate[0] * img.shape[0] + border # x translation (pixels) - T[1, 2] = random.uniform(-1, 1) * translate[1] * img.shape[1] + border # y translation (pixels) + T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border # x translation (pixels) + T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border # y translation (pixels) # Shear S = np.eye(3) - S[0, 1] = math.tan(random.uniform(shear[0], shear[1]) * math.pi / 180) # x shear (deg) - S[1, 0] = math.tan(random.uniform(shear[0], shear[1]) * math.pi / 180) # y shear (deg) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! imw = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_AREA, - borderValue=borderValue) # BGR order borderValue + borderValue=(128, 128, 128)) # BGR order borderValue # Return warped points also if len(targets) > 0: