diff --git a/utils/datasets.py b/utils/datasets.py index 430d34e6..d0ee7660 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -379,12 +379,13 @@ class LoadImagesAndLabels(Dataset): # for training/testing nf, nm, ne, nd, n) assert nf > 0, 'No labels found. See %s' % help_url - # Cache images into memory for faster training (WARNING: Large datasets may exceed system RAM) + # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) if cache_images: # if training gb = 0 # Gigabytes of cached images pbar = tqdm(range(len(self.img_files)), desc='Caching images') + self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images - self.imgs[i] = load_image(self, i) + self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) @@ -419,15 +420,13 @@ class LoadImagesAndLabels(Dataset): # for training/testing if mosaic: # Load mosaic img, labels = load_mosaic(self, index) - h, w = img.shape[:2] - ratio, pad = None, None + h0, w0, ratio, pad = None, None, None, None else: # Load image - img = load_image(self, index) + img, (h0, w0), (h, w) = load_image(self, index) # Letterbox - h, w = img.shape[:2] shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) @@ -495,7 +494,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) - return torch.from_numpy(img), labels_out, img_path, ((h, w), (ratio, pad)) + return torch.from_numpy(img), labels_out, img_path, ((h0, w0), (ratio, pad)) @staticmethod def collate_fn(batch): @@ -506,17 +505,18 @@ class LoadImagesAndLabels(Dataset): # for training/testing def load_image(self, index): - # loads 1 image from dataset - img = self.imgs[index] - if img is None: + # loads 1 image from dataset, returns img, original hw, resized hw + if self.imgs[index] is None: # not cached img_path = self.img_files[index] img = cv2.imread(img_path) # BGR assert img is not None, 'Image Not Found ' + img_path - r = self.img_size / max(img.shape) # resize image to img_size + h0, w0 = img.shape[:2] # orig hw + r = self.img_size / max(h0, w0) # resize image to img_size if self.augment and (r != 1): # always resize down, only resize up if training with augmentation - h, w = img.shape[:2] - return cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR) # _LINEAR fastest - return img + h, w = int(h0 * r), int(w0 * r) + return cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR), (h0, w0), (h, w) # _LINEAR fastest + else: # cached + return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): @@ -535,8 +535,7 @@ def load_mosaic(self, index): indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices for i, index in enumerate(indices): # Load image - img = load_image(self, index) - h, w, _ = img.shape + img, _, (h, w) = load_image(self, index) # place img in img4 if i == 0: # top left