car-detection-bayes/utils/datasets.py

import glob
import math
import os
import random
import shutil
from pathlib import Path

import cv2
import numpy as np
import torch
from torch.utils.data import Dataset
from tqdm import tqdm

from utils.utils import xyxy2xywh


class LoadImages:  # for inference
    def __init__(self, path, img_size=416):
        self.height = img_size
        img_formats = ['.jpg', '.jpeg', '.png', '.tif']
        vid_formats = ['.mov', '.avi', '.mp4']

        files = []
        if os.path.isdir(path):
            files = sorted(glob.glob('%s/*.*' % path))
        elif os.path.isfile(path):
            files = [path]

        images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
        videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
        nI, nV = len(images), len(videos)

        self.files = images + videos
        self.nF = nI + nV  # number of files
        self.video_flag = [False] * nI + [True] * nV
        self.mode = 'images'
        if any(videos):
            self.new_video(videos[0])  # new video
        else:
            self.cap = None
        assert self.nF > 0, 'No images or videos found in ' + path

    def __iter__(self):
        self.count = 0
        return self

    def __next__(self):
        if self.count == self.nF:
            raise StopIteration
        path = self.files[self.count]

        if self.video_flag[self.count]:
            # Read video
            self.mode = 'video'
            ret_val, img0 = self.cap.read()
            if not ret_val:
                self.count += 1
                self.cap.release()
                if self.count == self.nF:  # last video
                    raise StopIteration
                else:
                    path = self.files[self.count]
                    self.new_video(path)
                    ret_val, img0 = self.cap.read()

            self.frame += 1
            print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')

        else:
            # Read image
            self.count += 1
            img0 = cv2.imread(path)  # BGR
            assert img0 is not None, 'File Not Found ' + path
            print('image %g/%g %s: ' % (self.count, self.nF, path), end='')

        # Padded resize
        img, _, _, _ = letterbox(img0, height=self.height)

        # Normalize RGB
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
        return path, img, img0, self.cap

    def new_video(self, path):
        self.frame = 0
        self.cap = cv2.VideoCapture(path)
        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))

    def __len__(self):
        return self.nF  # number of files


class LoadWebcam:  # for inference
    def __init__(self, img_size=416):
        self.cam = cv2.VideoCapture(0)
        self.height = img_size

    def __iter__(self):
        self.count = -1
        return self

    def __next__(self):
        self.count += 1
        if cv2.waitKey(1) == 27:  # esc to quit
            cv2.destroyAllWindows()
            raise StopIteration

        # Read image
        ret_val, img0 = self.cam.read()
        assert ret_val, 'Webcam Error'
        img_path = 'webcam_%g.jpg' % self.count
        img0 = cv2.flip(img0, 1)  # flip left-right

        # Padded resize
        img, _, _, _ = letterbox(img0, height=self.height)

        # Normalize RGB
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return img_path, img, img0

    def __len__(self):
        return 0


class LoadImagesAndLabels(Dataset):  # for training/testing
    def __init__(self, path, img_size=416, augment=False):
        with open(path, 'r') as file:
            self.img_files = file.read().splitlines()
            self.img_files = list(filter(lambda x: len(x) > 0, self.img_files))
        assert len(self.img_files) > 0, 'No images found in %s' % path
        self.img_size = img_size
        self.augment = augment
        self.label_files = [
            x.replace('images', 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')
            for x in self.img_files]

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, index):
        img_path = self.img_files[index]
        label_path = self.label_files[index]

        img = cv2.imread(img_path)  # BGR
        assert img is not None, 'File Not Found ' + img_path

        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            fraction = 0.50  # must be < 1.0
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            S = img_hsv[:, :, 1].astype(np.float32)
            V = img_hsv[:, :, 2].astype(np.float32)

            a = (random.random() * 2 - 1) * fraction + 1
            S *= a
            if a > 1:
                np.clip(S, None, 255, out=S)

            a = (random.random() * 2 - 1) * fraction + 1
            V *= a
            if a > 1:
                np.clip(V, None, 255, out=V)

            img_hsv[:, :, 1] = S  # .astype(np.uint8)
            img_hsv[:, :, 2] = V  # .astype(np.uint8)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        h, w, _ = img.shape
        img, ratio, padw, padh = letterbox(img, height=self.img_size)

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            with open(label_path, 'r') as file:
                lines = file.read().splitlines()

            x = np.array([x.split() for x in lines], dtype=np.float32)
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10))

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)

    @staticmethod
    def collate_fn(batch):
        img, label, path, hw = list(zip(*batch))  # transposed
        for i, l in enumerate(label):
            l[:, 0] = i  # add target image index for build_targets()
        return torch.stack(img, 0), torch.cat(label, 0), path, hw


def letterbox(img, height=416, color=(127.5, 127.5, 127.5)):
    # Resize a rectangular image to a padded square
    shape = img.shape[:2]  # shape = [height, width]
    ratio = float(height) / max(shape)  # ratio  = old / new
    new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
    dw = (height - new_shape[0]) / 2  # width padding
    dh = (height - new_shape[1]) / 2  # height padding
    top, bottom = round(dh - 0.1), round(dh + 0.1)
    left, right = round(dw - 0.1), round(dw + 0.1)
    img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded square
    return img, ratio, dw, dh


def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
                  borderValue=(127.5, 127.5, 127.5)):
    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4

    if targets is None:
        targets = []
    border = 0  # width of added border (optional)
    height = max(img.shape[0], img.shape[1]) + border * 2

    # Rotation and Scale
    R = np.eye(3)
    a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
    # a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
    s = random.random() * (scale[1] - scale[0]) + scale[0]
    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)

    # Translation
    T = np.eye(3)
    T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
    T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)

    # Shear
    S = np.eye(3)
    S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
    S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)

    M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
    imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,
                              borderValue=borderValue)  # BGR order borderValue

    # Return warped points also
    if len(targets) > 0:
        n = targets.shape[0]
        points = targets[:, 1:5].copy()
        area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])

        # warp points
        xy = np.ones((n * 4, 3))
        xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
        xy = (xy @ M.T)[:, :2].reshape(n, 8)

        # create new boxes
        x = xy[:, [0, 2, 4, 6]]
        y = xy[:, [1, 3, 5, 7]]
        xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

        # apply angle-based reduction of bounding boxes
        radians = a * math.pi / 180
        reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
        x = (xy[:, 2] + xy[:, 0]) / 2
        y = (xy[:, 3] + xy[:, 1]) / 2
        w = (xy[:, 2] - xy[:, 0]) * reduction
        h = (xy[:, 3] - xy[:, 1]) * reduction
        xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T

        # reject warped points outside of image
        np.clip(xy, 0, height, out=xy)
        w = xy[:, 2] - xy[:, 0]
        h = xy[:, 3] - xy[:, 1]
        area = w * h
        ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
        i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)

        targets = targets[i]
        targets[:, 1:5] = xy[i]

    return imw, targets


def convert_images2bmp():
    # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
    for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
        folder = os.sep + Path(path).name
        output = path.replace(folder, folder + 'bmp')
        if os.path.exists(output):
            shutil.rmtree(output)  # delete output folder
        os.makedirs(output)  # make new output folder

        for f in tqdm(glob.glob('%s*.jpg' % path)):
            save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
            cv2.imwrite(save_name, cv2.imread(f))

    for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
        with open(label_path, 'r') as file:
            lines = file.read()
        lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
            '/Users/glennjocher/PycharmProjects/', '../')
        with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
            file.write(lines)
Initial commit 2018-08-26 08:51:39 +00:00			`import glob`
			`import math`
			`import os`
			`import random`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`import shutil`
			`from pathlib import Path`
Initial commit 2018-08-26 08:51:39 +00:00
			`import cv2`
			`import numpy as np`
			`import torch`
updates 2019-03-21 20:41:12 +00:00			`from torch.utils.data import Dataset`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`from tqdm import tqdm`
Initial commit 2018-08-26 08:51:39 +00:00
			`from utils.utils import xyxy2xywh`


class labeling corrections 2019-02-11 11:44:12 +00:00			`class LoadImages: # for inference`
updates 2019-02-08 21:43:05 +00:00			`def __init__(self, path, img_size=416):`
Initial commit 2018-08-26 08:51:39 +00:00			`self.height = img_size`
updates 2019-04-02 11:43:18 +00:00			`img_formats = ['.jpg', '.jpeg', '.png', '.tif']`
			`vid_formats = ['.mov', '.avi', '.mp4']`
updates 2018-09-02 09:26:56 +00:00
updates 2019-04-02 11:43:18 +00:00			`files = []`
			`if os.path.isdir(path):`
			`files = sorted(glob.glob('%s/.' % path))`
			`elif os.path.isfile(path):`
			`files = [path]`

			`images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]`
			`videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]`
updates 2019-04-02 13:09:13 +00:00			`nI, nV = len(images), len(videos)`

updates 2019-04-02 11:43:18 +00:00			`self.files = images + videos`
updates 2019-04-02 13:09:13 +00:00			`self.nF = nI + nV # number of files`
			`self.video_flag = [False] * nI + [True] * nV`
updates 2019-04-02 11:43:18 +00:00			`self.mode = 'images'`
			`if any(videos):`
			`self.new_video(videos[0]) # new video`
			`else:`
			`self.cap = None`
			`assert self.nF > 0, 'No images or videos found in ' + path`
Initial commit 2018-08-26 08:51:39 +00:00
			`def __iter__(self):`
updates 2019-04-02 11:43:18 +00:00			`self.count = 0`
Initial commit 2018-08-26 08:51:39 +00:00			`return self`

			`def __next__(self):`
updates 2019-02-08 21:43:05 +00:00			`if self.count == self.nF:`
Initial commit 2018-08-26 08:51:39 +00:00			`raise StopIteration`
updates 2019-04-02 11:43:18 +00:00			`path = self.files[self.count]`

			`if self.video_flag[self.count]:`
updates 2019-04-02 13:09:13 +00:00			`# Read video`
updates 2019-04-02 11:43:18 +00:00			`self.mode = 'video'`
			`ret_val, img0 = self.cap.read()`
			`if not ret_val:`
			`self.count += 1`
			`self.cap.release()`
			`if self.count == self.nF: # last video`
			`raise StopIteration`
			`else:`
			`path = self.files[self.count]`
			`self.new_video(path)`
			`ret_val, img0 = self.cap.read()`

			`self.frame += 1`
			`print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')`

			`else:`
			`# Read image`
			`self.count += 1`
			`img0 = cv2.imread(path) # BGR`
			`assert img0 is not None, 'File Not Found ' + path`
			`print('image %g/%g %s: ' % (self.count, self.nF, path), end='')`
Initial commit 2018-08-26 08:51:39 +00:00
			`# Padded resize`
updates 2019-02-11 11:40:14 +00:00			`img, _, _, _ = letterbox(img0, height=self.height)`
Initial commit 2018-08-26 08:51:39 +00:00
			`# Normalize RGB`
updates 2019-03-20 17:30:10 +00:00			`img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB`
			`img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32`
			`img /= 255.0 # 0 - 255 to 0.0 - 1.0`
Initial commit 2018-08-26 08:51:39 +00:00
updates 2019-04-02 11:43:18 +00:00			`# cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image`
			`return path, img, img0, self.cap`

			`def new_video(self, path):`
			`self.frame = 0`
			`self.cap = cv2.VideoCapture(path)`
			`self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))`
Initial commit 2018-08-26 08:51:39 +00:00
			`def __len__(self):`
updates 2019-02-08 21:43:05 +00:00			`return self.nF # number of files`
Initial commit 2018-08-26 08:51:39 +00:00

class labeling corrections 2019-02-11 12:45:04 +00:00			`class LoadWebcam: # for inference`
updates 2019-02-11 16:25:32 +00:00			`def __init__(self, img_size=416):`
class labeling corrections 2019-02-11 12:45:04 +00:00			`self.cam = cv2.VideoCapture(0)`
			`self.height = img_size`

			`def __iter__(self):`
			`self.count = -1`
			`return self`

			`def __next__(self):`
			`self.count += 1`
			`if cv2.waitKey(1) == 27: # esc to quit`
			`cv2.destroyAllWindows()`
			`raise StopIteration`

			`# Read image`
			`ret_val, img0 = self.cam.read()`
			`assert ret_val, 'Webcam Error'`
			`img_path = 'webcam_%g.jpg' % self.count`
updates 2019-03-20 18:31:09 +00:00			`img0 = cv2.flip(img0, 1) # flip left-right`
class labeling corrections 2019-02-11 12:45:04 +00:00
			`# Padded resize`
			`img, _, _, _ = letterbox(img0, height=self.height)`

			`# Normalize RGB`
updates 2019-03-20 17:30:10 +00:00			`img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB`
			`img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32`
			`img /= 255.0 # 0 - 255 to 0.0 - 1.0`
class labeling corrections 2019-02-11 12:45:04 +00:00
			`return img_path, img, img0`

			`def __len__(self):`
webcam updates 2019-02-11 17:15:51 +00:00			`return 0`
class labeling corrections 2019-02-11 12:45:04 +00:00

updates 2019-03-21 20:41:12 +00:00			`class LoadImagesAndLabels(Dataset): # for training/testing`
			`def __init__(self, path, img_size=416, augment=False):`
Initial commit 2018-08-26 08:51:39 +00:00			`with open(path, 'r') as file:`
updates 2019-03-20 23:57:16 +00:00			`self.img_files = file.read().splitlines()`
updates 2019-02-18 13:03:39 +00:00			`self.img_files = list(filter(lambda x: len(x) > 0, self.img_files))`
updates 2019-03-21 20:41:12 +00:00			`assert len(self.img_files) > 0, 'No images found in %s' % path`
multi_gpu multi_scale 2019-03-19 08:38:32 +00:00			`self.img_size = img_size`
updates 2018-09-02 09:38:39 +00:00			`self.augment = augment`
updates 2019-03-31 17:57:44 +00:00			`self.label_files = [`
			`x.replace('images', 'labels').replace('.bmp', '.txt').replace('.jpg', '.txt').replace('.png', '.txt')`
			`for x in self.img_files]`
updates 2018-09-02 09:26:56 +00:00
updates 2019-03-21 20:41:12 +00:00			`def __len__(self):`
			`return len(self.img_files)`
Initial commit 2018-08-26 08:51:39 +00:00
updates 2019-03-20 23:57:16 +00:00			`def __getitem__(self, index):`
updates 2019-03-21 20:41:12 +00:00			`img_path = self.img_files[index]`
			`label_path = self.label_files[index]`

			`img = cv2.imread(img_path) # BGR`
			`assert img is not None, 'File Not Found ' + img_path`

			`augment_hsv = True`
			`if self.augment and augment_hsv:`
			`# SV augmentation by 50%`
updates 2019-03-31 17:57:44 +00:00			`fraction = 0.50 # must be < 1.0`
updates 2019-03-21 20:41:12 +00:00			`img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)`
			`S = img_hsv[:, :, 1].astype(np.float32)`
			`V = img_hsv[:, :, 2].astype(np.float32)`

			`a = (random.random() * 2 - 1) * fraction + 1`
			`S *= a`
			`if a > 1:`
updates 2019-03-31 17:57:44 +00:00			`np.clip(S, None, 255, out=S)`
updates 2019-03-21 20:41:12 +00:00
			`a = (random.random() * 2 - 1) * fraction + 1`
			`V *= a`
			`if a > 1:`
updates 2019-03-31 17:57:44 +00:00			`np.clip(V, None, 255, out=V)`
updates 2019-03-21 20:41:12 +00:00
updates 2019-03-31 17:57:44 +00:00			`img_hsv[:, :, 1] = S # .astype(np.uint8)`
			`img_hsv[:, :, 2] = V # .astype(np.uint8)`
updates 2019-03-21 20:41:12 +00:00			`cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)`

			`h, w, _ = img.shape`
			`img, ratio, padw, padh = letterbox(img, height=self.img_size)`

			`# Load labels`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`labels = []`
updates 2019-03-21 20:41:12 +00:00			`if os.path.isfile(label_path):`
			`with open(label_path, 'r') as file:`
			`lines = file.read().splitlines()`
Update datasets.py 2019-03-22 12:52:58 +00:00
			`x = np.array([x.split() for x in lines], dtype=np.float32)`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`if x.size > 0:`
Empty label file may cause index error 2019-03-22 10:59:09 +00:00			`# Normalized xywh to pixel xyxy format`
Update datasets.py 2019-03-22 12:52:58 +00:00			`labels = x.copy()`
			`labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw`
			`labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh`
			`labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw`
			`labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh`
updates 2019-03-21 20:41:12 +00:00
			`# Augment image and labels`
			`if self.augment:`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10))`
updates 2019-03-21 20:41:12 +00:00
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`nL = len(labels) # number of labels`
			`if nL:`
updates 2019-03-21 20:41:12 +00:00			`# convert xyxy to xywh`
updates 2019-03-22 12:56:43 +00:00			`labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size`
updates 2019-03-21 20:41:12 +00:00
			`if self.augment:`
			`# random left-right flip`
			`lr_flip = True`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`if lr_flip and random.random() > 0.5:`
updates 2019-03-21 20:41:12 +00:00			`img = np.fliplr(img)`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`if nL:`
updates 2019-03-21 20:41:12 +00:00			`labels[:, 1] = 1 - labels[:, 1]`

			`# random up-down flip`
			`ud_flip = False`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`if ud_flip and random.random() > 0.5:`
updates 2019-03-21 20:41:12 +00:00			`img = np.flipud(img)`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`if nL:`
updates 2019-03-21 20:41:12 +00:00			`labels[:, 2] = 1 - labels[:, 2]`

Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`labels_out = torch.zeros((nL, 6))`
			`if nL:`
			`labels_out[:, 1:] = torch.from_numpy(labels)`
multi_thread dataloader 2019-03-21 12:48:40 +00:00
updates 2019-03-21 20:41:12 +00:00			`# Normalize`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416`
updates 2019-03-21 20:41:12 +00:00			`img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32`
			`img /= 255.0 # 0 - 255 to 0.0 - 1.0`
Initial commit 2018-08-26 08:51:39 +00:00
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`return torch.from_numpy(img), labels_out, img_path, (h, w)`

			`@staticmethod`
			`def collate_fn(batch):`
			`img, label, path, hw = list(zip(*batch)) # transposed`
			`for i, l in enumerate(label):`
			`l[:, 0] = i # add target image index for build_targets()`
			`return torch.stack(img, 0), torch.cat(label, 0), path, hw`
Initial commit 2018-08-26 08:51:39 +00:00

updates 2019-03-31 17:57:44 +00:00			`def letterbox(img, height=416, color=(127.5, 127.5, 127.5)):`
			`# Resize a rectangular image to a padded square`
Initial commit 2018-08-26 08:51:39 +00:00			`shape = img.shape[:2] # shape = [height, width]`
updates 2018-09-02 09:38:39 +00:00			`ratio = float(height) / max(shape) # ratio = old / new`
updates 2019-02-10 19:32:04 +00:00			`new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))`
updates 2019-02-10 20:23:58 +00:00			`dw = (height - new_shape[0]) / 2 # width padding`
			`dh = (height - new_shape[1]) / 2 # height padding`
			`top, bottom = round(dh - 0.1), round(dh + 0.1)`
			`left, right = round(dw - 0.1), round(dw + 0.1)`
updates 2019-02-10 19:32:04 +00:00			`img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border`
updates 2019-02-10 20:23:58 +00:00			`img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded square`
			`return img, ratio, dw, dh`
updates 2019-02-10 19:32:04 +00:00
Initial commit 2018-08-26 08:51:39 +00:00
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`def random_affine(img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),`
updates 2018-08-26 15:09:10 +00:00			`borderValue=(127.5, 127.5, 127.5)):`
Initial commit 2018-08-26 08:51:39 +00:00			`# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))`
			`# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4`

Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`if targets is None:`
			`targets = []`
Initial commit 2018-08-26 08:51:39 +00:00			`border = 0 # width of added border (optional)`
			`height = max(img.shape[0], img.shape[1]) + border * 2`

			`# Rotation and Scale`
			`R = np.eye(3)`
			`a = random.random() * (degrees[1] - degrees[0]) + degrees[0]`
updates 2018-09-02 09:38:39 +00:00			`# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations`
Initial commit 2018-08-26 08:51:39 +00:00			`s = random.random() * (scale[1] - scale[0]) + scale[0]`
			`R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)`

			`# Translation`
			`T = np.eye(3)`
			`T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels)`
			`T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels)`

			`# Shear`
			`S = np.eye(3)`
			`S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg)`
			`S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg)`

updates 2018-09-02 09:38:39 +00:00			`M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!`
Initial commit 2018-08-26 08:51:39 +00:00			`imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,`
updates 2018-09-02 09:38:39 +00:00			`borderValue=borderValue) # BGR order borderValue`
Initial commit 2018-08-26 08:51:39 +00:00
			`# Return warped points also`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`if len(targets) > 0:`
			`n = targets.shape[0]`
			`points = targets[:, 1:5].copy()`
			`area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])`

			`# warp points`
			`xy = np.ones((n * 4, 3))`
			`xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1`
			`xy = (xy @ M.T)[:, :2].reshape(n, 8)`

			`# create new boxes`
			`x = xy[:, [0, 2, 4, 6]]`
			`y = xy[:, [1, 3, 5, 7]]`
			`xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T`

updates 2019-03-31 17:57:44 +00:00			`# apply angle-based reduction of bounding boxes`
Add collate_fn() to DataLoader (#163) Multi-GPU update with custom collate function to allow variable size target vector per image without needing to pad targets. 2019-03-25 13:59:38 +00:00			`radians = a * math.pi / 180`
			`reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5`
			`x = (xy[:, 2] + xy[:, 0]) / 2`
			`y = (xy[:, 3] + xy[:, 1]) / 2`
			`w = (xy[:, 2] - xy[:, 0]) * reduction`
			`h = (xy[:, 3] - xy[:, 1]) * reduction`
			`xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T`

			`# reject warped points outside of image`
			`np.clip(xy, 0, height, out=xy)`
			`w = xy[:, 2] - xy[:, 0]`
			`h = xy[:, 3] - xy[:, 1]`
			`area = w * h`
			`ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))`
			`i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)`

			`targets = targets[i]`
			`targets[:, 1:5] = xy[i]`

			`return imw, targets`


			`def convert_images2bmp():`
			`# cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s`
			`for path in ['../coco/images/val2014/', '../coco/images/train2014/']:`
			`folder = os.sep + Path(path).name`
			`output = path.replace(folder, folder + 'bmp')`
			`if os.path.exists(output):`
			`shutil.rmtree(output) # delete output folder`
			`os.makedirs(output) # make new output folder`

			`for f in tqdm(glob.glob('%s*.jpg' % path)):`
			`save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')`
			`cv2.imwrite(save_name, cv2.imread(f))`

			`for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:`
			`with open(label_path, 'r') as file:`
			`lines = file.read()`
			`lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(`
			`'/Users/glennjocher/PycharmProjects/', '../')`
			`with open(label_path.replace('5k', '5k_bmp'), 'w') as file:`
			`file.write(lines)`