car-detection-bayes/utils/datasets.py

304 lines
11 KiB
Python
Raw Normal View History

2018-08-26 08:51:39 +00:00
import glob
import math
import os
import random
import shutil
from pathlib import Path
2018-08-26 08:51:39 +00:00
import cv2
import numpy as np
import torch
2019-03-21 20:41:12 +00:00
from torch.utils.data import Dataset
from tqdm import tqdm
2018-08-26 08:51:39 +00:00
from utils.utils import xyxy2xywh
2019-02-11 11:44:12 +00:00
class LoadImages: # for inference
2019-02-08 21:43:05 +00:00
def __init__(self, path, img_size=416):
2018-08-26 08:51:39 +00:00
if os.path.isdir(path):
2019-01-06 13:58:41 +00:00
image_format = ['.jpg', '.jpeg', '.png', '.tif']
2018-08-26 08:51:39 +00:00
self.files = sorted(glob.glob('%s/*.*' % path))
2019-01-06 13:58:41 +00:00
self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files))
2018-08-26 08:51:39 +00:00
elif os.path.isfile(path):
self.files = [path]
self.nF = len(self.files) # number of image files
self.height = img_size
2018-09-02 09:26:56 +00:00
2019-02-11 11:40:14 +00:00
assert self.nF > 0, 'No images found in ' + path
2018-08-26 08:51:39 +00:00
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
2019-02-08 21:43:05 +00:00
if self.count == self.nF:
2018-08-26 08:51:39 +00:00
raise StopIteration
img_path = self.files[self.count]
# Read image
2019-02-08 21:43:05 +00:00
img0 = cv2.imread(img_path) # BGR
2019-03-05 15:13:40 +00:00
assert img0 is not None, 'File Not Found ' + img_path
2018-08-26 08:51:39 +00:00
# Padded resize
2019-02-11 11:40:14 +00:00
img, _, _, _ = letterbox(img0, height=self.height)
2018-08-26 08:51:39 +00:00
# Normalize RGB
2019-03-20 17:30:10 +00:00
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
2018-08-26 08:51:39 +00:00
2019-02-08 21:43:05 +00:00
# cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
return img_path, img, img0
2018-08-26 08:51:39 +00:00
def __len__(self):
2019-02-08 21:43:05 +00:00
return self.nF # number of files
2018-08-26 08:51:39 +00:00
2019-02-11 12:45:04 +00:00
class LoadWebcam: # for inference
2019-02-11 16:25:32 +00:00
def __init__(self, img_size=416):
2019-02-11 12:45:04 +00:00
self.cam = cv2.VideoCapture(0)
self.height = img_size
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
if cv2.waitKey(1) == 27: # esc to quit
cv2.destroyAllWindows()
raise StopIteration
# Read image
ret_val, img0 = self.cam.read()
assert ret_val, 'Webcam Error'
img_path = 'webcam_%g.jpg' % self.count
2019-03-20 18:31:09 +00:00
img0 = cv2.flip(img0, 1) # flip left-right
2019-02-11 12:45:04 +00:00
# Padded resize
img, _, _, _ = letterbox(img0, height=self.height)
# Normalize RGB
2019-03-20 17:30:10 +00:00
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
2019-02-11 12:45:04 +00:00
return img_path, img, img0
def __len__(self):
2019-02-11 17:15:51 +00:00
return 0
2019-02-11 12:45:04 +00:00
2019-03-21 20:41:12 +00:00
class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, img_size=416, augment=False):
2018-08-26 08:51:39 +00:00
with open(path, 'r') as file:
2019-03-20 23:57:16 +00:00
self.img_files = file.read().splitlines()
2019-02-18 13:03:39 +00:00
self.img_files = list(filter(lambda x: len(x) > 0, self.img_files))
2019-03-21 20:41:12 +00:00
assert len(self.img_files) > 0, 'No images found in %s' % path
2019-03-19 08:38:32 +00:00
self.img_size = img_size
2018-09-02 09:38:39 +00:00
self.augment = augment
2019-03-31 17:57:44 +00:00
self.label_files = [
x.replace('images', 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')
for x in self.img_files]
2018-09-02 09:26:56 +00:00
2019-03-21 20:41:12 +00:00
def __len__(self):
return len(self.img_files)
2018-08-26 08:51:39 +00:00
2019-03-20 23:57:16 +00:00
def __getitem__(self, index):
2019-03-21 20:41:12 +00:00
img_path = self.img_files[index]
label_path = self.label_files[index]
img = cv2.imread(img_path) # BGR
assert img is not None, 'File Not Found ' + img_path
augment_hsv = True
if self.augment and augment_hsv:
# SV augmentation by 50%
2019-03-31 17:57:44 +00:00
fraction = 0.50 # must be < 1.0
2019-03-21 20:41:12 +00:00
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
S = img_hsv[:, :, 1].astype(np.float32)
V = img_hsv[:, :, 2].astype(np.float32)
a = (random.random() * 2 - 1) * fraction + 1
S *= a
if a > 1:
2019-03-31 17:57:44 +00:00
np.clip(S, None, 255, out=S)
2019-03-21 20:41:12 +00:00
a = (random.random() * 2 - 1) * fraction + 1
V *= a
if a > 1:
2019-03-31 17:57:44 +00:00
np.clip(V, None, 255, out=V)
2019-03-21 20:41:12 +00:00
2019-03-31 17:57:44 +00:00
img_hsv[:, :, 1] = S # .astype(np.uint8)
img_hsv[:, :, 2] = V # .astype(np.uint8)
2019-03-21 20:41:12 +00:00
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
h, w, _ = img.shape
img, ratio, padw, padh = letterbox(img, height=self.img_size)
# Load labels
labels = []
2019-03-21 20:41:12 +00:00
if os.path.isfile(label_path):
with open(label_path, 'r') as file:
lines = file.read().splitlines()
2019-03-22 12:52:58 +00:00
x = np.array([x.split() for x in lines], dtype=np.float32)
if x.size > 0:
2019-03-22 10:59:09 +00:00
# Normalized xywh to pixel xyxy format
2019-03-22 12:52:58 +00:00
labels = x.copy()
labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
2019-03-21 20:41:12 +00:00
# Augment image and labels
if self.augment:
img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10))
2019-03-21 20:41:12 +00:00
nL = len(labels) # number of labels
if nL:
2019-03-21 20:41:12 +00:00
# convert xyxy to xywh
2019-03-22 12:56:43 +00:00
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size
2019-03-21 20:41:12 +00:00
if self.augment:
# random left-right flip
lr_flip = True
if lr_flip and random.random() > 0.5:
2019-03-21 20:41:12 +00:00
img = np.fliplr(img)
if nL:
2019-03-21 20:41:12 +00:00
labels[:, 1] = 1 - labels[:, 1]
# random up-down flip
ud_flip = False
if ud_flip and random.random() > 0.5:
2019-03-21 20:41:12 +00:00
img = np.flipud(img)
if nL:
2019-03-21 20:41:12 +00:00
labels[:, 2] = 1 - labels[:, 2]
labels_out = torch.zeros((nL, 6))
if nL:
labels_out[:, 1:] = torch.from_numpy(labels)
2019-03-21 12:48:40 +00:00
2019-03-21 20:41:12 +00:00
# Normalize
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
2019-03-21 20:41:12 +00:00
img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
2018-08-26 08:51:39 +00:00
return torch.from_numpy(img), labels_out, img_path, (h, w)
@staticmethod
def collate_fn(batch):
img, label, path, hw = list(zip(*batch)) # transposed
for i, l in enumerate(label):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img, 0), torch.cat(label, 0), path, hw
2018-08-26 08:51:39 +00:00
2019-03-31 17:57:44 +00:00
def letterbox(img, height=416, color=(127.5, 127.5, 127.5)):
# Resize a rectangular image to a padded square
2018-08-26 08:51:39 +00:00
shape = img.shape[:2] # shape = [height, width]
2018-09-02 09:38:39 +00:00
ratio = float(height) / max(shape) # ratio = old / new
2019-02-10 19:32:04 +00:00
new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
2019-02-10 20:23:58 +00:00
dw = (height - new_shape[0]) / 2 # width padding
dh = (height - new_shape[1]) / 2 # height padding
top, bottom = round(dh - 0.1), round(dh + 0.1)
left, right = round(dw - 0.1), round(dw + 0.1)
2019-02-10 19:32:04 +00:00
img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
2019-02-10 20:23:58 +00:00
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square
return img, ratio, dw, dh
2019-02-10 19:32:04 +00:00
2018-08-26 08:51:39 +00:00
def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
2018-08-26 15:09:10 +00:00
borderValue=(127.5, 127.5, 127.5)):
2018-08-26 08:51:39 +00:00
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
if targets is None:
targets = []
2018-08-26 08:51:39 +00:00
border = 0 # width of added border (optional)
height = max(img.shape[0], img.shape[1]) + border * 2
# Rotation and Scale
R = np.eye(3)
a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
2018-09-02 09:38:39 +00:00
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
2018-08-26 08:51:39 +00:00
s = random.random() * (scale[1] - scale[0]) + scale[0]
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
# Translation
T = np.eye(3)
T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels)
T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels)
# Shear
S = np.eye(3)
S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg)
2018-09-02 09:38:39 +00:00
M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
2018-08-26 08:51:39 +00:00
imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,
2018-09-02 09:38:39 +00:00
borderValue=borderValue) # BGR order borderValue
2018-08-26 08:51:39 +00:00
# Return warped points also
if len(targets) > 0:
n = targets.shape[0]
points = targets[:, 1:5].copy()
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = (xy @ M.T)[:, :2].reshape(n, 8)
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
2019-03-31 17:57:44 +00:00
# apply angle-based reduction of bounding boxes
radians = a * math.pi / 180
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
x = (xy[:, 2] + xy[:, 0]) / 2
y = (xy[:, 3] + xy[:, 1]) / 2
w = (xy[:, 2] - xy[:, 0]) * reduction
h = (xy[:, 3] - xy[:, 1]) * reduction
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
# reject warped points outside of image
np.clip(xy, 0, height, out=xy)
w = xy[:, 2] - xy[:, 0]
h = xy[:, 3] - xy[:, 1]
area = w * h
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
targets = targets[i]
targets[:, 1:5] = xy[i]
return imw, targets
def convert_images2bmp():
# cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
folder = os.sep + Path(path).name
output = path.replace(folder, folder + 'bmp')
if os.path.exists(output):
shutil.rmtree(output) # delete output folder
os.makedirs(output) # make new output folder
for f in tqdm(glob.glob('%s*.jpg' % path)):
save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
cv2.imwrite(save_name, cv2.imread(f))
for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
with open(label_path, 'r') as file:
lines = file.read()
lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
'/Users/glennjocher/PycharmProjects/', '../')
with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
file.write(lines)