diff --git a/test.py b/test.py
index 92c6c948..9ffa82e2 100644
--- a/test.py
+++ b/test.py
@@ -17,7 +17,8 @@ def test(cfg,
          conf_thres=0.001,
          nms_thres=0.5,
          save_json=False,
-         model=None):
+         model=None,
+         dataloader=None):
     # Initialize/load model and set device
     if model is None:
         device = torch_utils.select_device(opt.device, batch_size=batch_size)
@@ -46,13 +47,14 @@ def test(cfg,
     names = load_classes(data['names'])  # class names
 
     # Dataloader
-    dataset = LoadImagesAndLabels(test_path, img_size, batch_size)
-    batch_size = min(batch_size, len(dataset))
-    dataloader = DataLoader(dataset,
-                            batch_size=batch_size,
-                            num_workers=min([os.cpu_count(), batch_size if batch_size > 1 else 0, 16]),
-                            pin_memory=True,
-                            collate_fn=dataset.collate_fn)
+    if dataloader is None:
+        dataset = LoadImagesAndLabels(test_path, img_size, batch_size, rect=True)
+        batch_size = min(batch_size, len(dataset))
+        dataloader = DataLoader(dataset,
+                                batch_size=batch_size,
+                                num_workers=min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]),
+                                pin_memory=True,
+                                collate_fn=dataset.collate_fn)
 
     seen = 0
     model.eval()
@@ -167,7 +169,7 @@ def test(cfg,
 
     # Save JSON
     if save_json and map and len(jdict):
-        imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
+        imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files]
         with open('results.json', 'w') as file:
             json.dump(jdict, file)
 
diff --git a/train.py b/train.py
index 9aa6f683..b7d87f6f 100644
--- a/train.py
+++ b/train.py
@@ -72,6 +72,7 @@ def train():
     # Configure run
     data_dict = parse_data_cfg(data)
     train_path = data_dict['train']
+    test_path = data_dict['valid']
     nc = int(data_dict['classes'])  # number of classes
 
     # Remove previous results
@@ -187,19 +188,17 @@ def train():
         model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level
 
     # Dataset
-    dataset = LoadImagesAndLabels(train_path,
-                                  img_size,
-                                  batch_size,
+    dataset = LoadImagesAndLabels(train_path, img_size, batch_size,
                                   augment=True,
                                   hyp=hyp,  # augmentation hyperparameters
                                   rect=opt.rect,  # rectangular training
                                   image_weights=opt.img_weights,
-                                  cache_labels=True if epochs > 10 else False,
-                                  cache_images=False if opt.prebias else opt.cache_images)
+                                  cache_labels=epochs > 10,
+                                  cache_images=opt.cache_images and not opt.prebias)
 
     # Dataloader
     batch_size = min(batch_size, len(dataset))
-    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 16])  # number of workers
+    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
     print('Using %g dataloader workers' % nw)
     dataloader = torch.utils.data.DataLoader(dataset,
                                              batch_size=batch_size,
@@ -208,13 +207,23 @@ def train():
                                              pin_memory=True,
                                              collate_fn=dataset.collate_fn)
 
+    # Test Dataloader
+    if not opt.prebias:
+        testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path, img_size, batch_size, hyp=hyp,
+                                                                     cache_labels=True,
+                                                                     cache_images=opt.cache_images),
+                                                 batch_size=batch_size,
+                                                 num_workers=nw,
+                                                 pin_memory=True,
+                                                 collate_fn=dataset.collate_fn)
+
     # Start training
+    nb = len(dataloader)
     model.nc = nc  # attach number of classes to model
     model.arc = opt.arc  # attach yolo architecture
     model.hyp = hyp  # attach hyperparameters to model
     model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
     torch_utils.model_info(model, report='summary')  # 'full' or 'summary'
-    nb = len(dataloader)
     maps = np.zeros(nc)  # mAP per class
     # torch.autograd.set_detect_anomaly(True)
     results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
@@ -321,7 +330,8 @@ def train():
                                               img_size=opt.img_size,
                                               model=model,
                                               conf_thres=0.001 if final_epoch and epoch > 0 else 0.1,  # 0.1 for speed
-                                              save_json=final_epoch and epoch > 0 and 'coco.data' in data)
+                                              save_json=final_epoch and epoch > 0 and 'coco.data' in data,
+                                              dataloader=testloader)
 
         # Write epoch results
         with open(results_file, 'a') as f:
diff --git a/utils/datasets.py b/utils/datasets.py
index 97bf1234..0a4ad7c7 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -255,7 +255,7 @@ class LoadStreams:  # multiple IP or RTSP cameras
 
 
 class LoadImagesAndLabels(Dataset):  # for training/testing
-    def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=True, image_weights=False,
+    def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
                  cache_labels=False, cache_images=False):
         path = str(Path(path))  # os-agnostic
         with open(path, 'r') as f:
@@ -319,7 +319,7 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
             self.labels = [np.zeros((0, 5))] * n
             extract_bounding_boxes = False
             create_datasubset = False
-            pbar = tqdm(self.label_files, desc='Reading labels')
+            pbar = tqdm(self.label_files, desc='Caching labels')
             nm, nf, ne, ns = 0, 0, 0, 0  # number missing, number found, number empty, number datasubset
             for i, file in enumerate(pbar):
                 try:
@@ -370,13 +370,17 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                     ne += 1  # print('empty labels for image %s' % self.img_files[i])  # file empty
                     # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i]))  # remove
 
-                pbar.desc = 'Reading labels (%g found, %g missing, %g empty for %g images)' % (nf, nm, ne, n)
+                pbar.desc = 'Caching labels (%g found, %g missing, %g empty for %g images)' % (nf, nm, ne, n)
             assert nf > 0, 'No labels found. Recommend correcting image and label paths.'
 
-        # Cache images into memory for faster training (~5GB)
-        if cache_images and augment:  # if training
-            for i in tqdm(range(min(len(self.img_files), 10000)), desc='Reading images'):  # max 10k images
+        # Cache images into memory for faster training (WARNING: Large datasets may exceed system RAM)
+        if cache_images:  # if training
+            gb = 0  # Gigabytes of cached images
+            pbar = tqdm(range(len(self.img_files)), desc='Caching images')
+            for i in pbar:  # max 10k images
                 self.imgs[i] = load_image(self, i)
+                gb += self.imgs[i].nbytes
+                pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
 
         # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3
         detect_corrupted_images = False
@@ -503,10 +507,10 @@ def load_image(self, index):
         img_path = self.img_files[index]
         img = cv2.imread(img_path)  # BGR
         assert img is not None, 'Image Not Found ' + img_path
-        r = self.img_size / max(img.shape)  # size ratio
-        if self.augment:  # if training (NOT testing), downsize to inference shape
+        r = self.img_size / max(img.shape)  # resize image to img_size
+        if (r < 1) or ((r > 1) and self.augment):  # always resize down, only resize up if training with augmentation
             h, w = img.shape[:2]
-            img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)  # _LINEAR fastest
+            return cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)  # _LINEAR fastest
     return img
 
 
@@ -569,13 +573,11 @@ def load_mosaic(self, index):
     # Concat/clip labels
     if len(labels4):
         labels4 = np.concatenate(labels4, 0)
-        np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:])  # use before random_affine
-        # np.clip(labels4[:, 1:], s / 2, 1.5 * s, out=labels4[:, 1:])
-        # labels4[:, 1:] -= s / 2
-
-    # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)]
+        # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:])  # use with center crop
+        np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:])  # use with random_affine
 
     # Augment
+    # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)]  # center crop (WARNING, requires box pruning)
     img4, labels4 = random_affine(img4, labels4,
                                   degrees=self.hyp['degrees'],
                                   translate=self.hyp['translate'],