diff --git a/.gitignore b/.gitignore
index ae46812..997fba9 100755
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,7 @@ data/*
 !data/trainvalno5k.shapes
 !data/5k.shapes
 !data/5k.txt
+!data/*.sh
 
 pycocotools/*
 results*.txt
diff --git a/README.md b/README.md
index dee8639..001668e 100755
--- a/README.md
+++ b/README.md
@@ -211,7 +211,6 @@ Computing mAP: 100%|████████████████████
  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.331
  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.517
  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.618
-
 ```
 
 # Citation
diff --git a/data/get_coco_dataset_gdrive.sh b/data/get_coco_dataset_gdrive.sh
new file mode 100755
index 0000000..fabaad2
--- /dev/null
+++ b/data/get_coco_dataset_gdrive.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859
+
+# Download COCO dataset
+fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO"
+filename="coco_gdrive.zip"
+curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
+curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
+
+# Unzip
+unzip -q coco_gdrive.zip
\ No newline at end of file
diff --git a/detect.py b/detect.py
index 192ef31..f33d39f 100644
--- a/detect.py
+++ b/detect.py
@@ -127,20 +127,18 @@ if __name__ == '__main__':
     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
     parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
-    parser.add_argument('--fourcc', type=str, default='mp4v', help='specifies the fourcc code for output video encoding (make sure ffmpeg supports specified fourcc codec)')
-    parser.add_argument('--output', type=str, default='output',help='specifies the output path for images and videos')
+    parser.add_argument('--fourcc', type=str, default='mp4v', help='fourcc output video codec (verify ffmpeg support)')
+    parser.add_argument('--output', type=str, default='output', help='specifies the output path for images and videos')
     opt = parser.parse_args()
     print(opt)
 
     with torch.no_grad():
-        detect(
-            opt.cfg,
-            opt.data_cfg,
-            opt.weights,
-            images=opt.images,
-            img_size=opt.img_size,
-            conf_thres=opt.conf_thres,
-            nms_thres=opt.nms_thres,
-            fourcc=opt.fourcc,
-            output=opt.output
-        )
+        detect(opt.cfg,
+               opt.data_cfg,
+               opt.weights,
+               images=opt.images,
+               img_size=opt.img_size,
+               conf_thres=opt.conf_thres,
+               nms_thres=opt.nms_thres,
+               fourcc=opt.fourcc,
+               output=opt.output)
diff --git a/models.py b/models.py
index 347264f..b1717d9 100755
--- a/models.py
+++ b/models.py
@@ -15,6 +15,7 @@ def create_modules(module_defs):
     hyperparams = module_defs.pop(0)
     output_filters = [int(hyperparams['channels'])]
     module_list = nn.ModuleList()
+    yolo_index = -1
 
     for i, module_def in enumerate(module_defs):
         modules = nn.Sequential()
@@ -44,8 +45,7 @@ def create_modules(module_defs):
             modules.add_module('maxpool_%d' % i, maxpool)
 
         elif module_def['type'] == 'upsample':
-            # upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')  # WARNING: deprecated
-            upsample = Upsample(scale_factor=int(module_def['stride']))
+            upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')
             modules.add_module('upsample_%d' % i, upsample)
 
         elif module_def['type'] == 'route':
@@ -58,6 +58,7 @@ def create_modules(module_defs):
             modules.add_module('shortcut_%d' % i, EmptyLayer())
 
         elif module_def['type'] == 'yolo':
+            yolo_index += 1
             anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
             # Extract anchors
             anchors = [float(x) for x in module_def['anchors'].split(',')]
@@ -66,8 +67,7 @@ def create_modules(module_defs):
             nc = int(module_def['classes'])  # number of classes
             img_size = hyperparams['height']
             # Define detection layer
-            yolo_layer = YOLOLayer(anchors, nc, img_size, cfg=hyperparams['cfg'])
-            modules.add_module('yolo_%d' % i, yolo_layer)
+            modules.add_module('yolo_%d' % i, YOLOLayer(anchors, nc, img_size, yolo_index))
 
         # Register module list and number of output filters
         module_list.append(modules)
@@ -86,20 +86,8 @@ class EmptyLayer(nn.Module):
         return x
 
 
-class Upsample(nn.Module):
-    # Custom Upsample layer (nn.Upsample gives deprecated warning message)
-
-    def __init__(self, scale_factor=1, mode='nearest'):
-        super(Upsample, self).__init__()
-        self.scale_factor = scale_factor
-        self.mode = mode
-
-    def forward(self, x):
-        return F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
-
-
 class YOLOLayer(nn.Module):
-    def __init__(self, anchors, nc, img_size, cfg):
+    def __init__(self, anchors, nc, img_size, yolo_index):
         super(YOLOLayer, self).__init__()
 
         self.anchors = torch.Tensor(anchors)
@@ -109,7 +97,7 @@ class YOLOLayer(nn.Module):
         self.ny = 0  # initialize number of y gridpoints
 
         if ONNX_EXPORT:  # grids must be computed in __init__
-            stride = [32, 16, 8][yolo_layer]  # stride of this layer
+            stride = [32, 16, 8][yolo_index]  # stride of this layer
             nx = int(img_size[1] / stride)  # number x grid points
             ny = int(img_size[0] / stride)  # number y grid points
             create_grids(self, max(img_size), (nx, ny))
diff --git a/test.py b/test.py
index eb902c5..eeaadf6 100644
--- a/test.py
+++ b/test.py
@@ -201,14 +201,12 @@ if __name__ == '__main__':
     print(opt)
 
     with torch.no_grad():
-        mAP = test(
-            opt.cfg,
-            opt.data_cfg,
-            opt.weights,
-            opt.batch_size,
-            opt.img_size,
-            opt.iou_thres,
-            opt.conf_thres,
-            opt.nms_thres,
-            opt.save_json
-        )
+        mAP = test(opt.cfg,
+                   opt.data_cfg,
+                   opt.weights,
+                   opt.batch_size,
+                   opt.img_size,
+                   opt.iou_thres,
+                   opt.conf_thres,
+                   opt.nms_thres,
+                   opt.save_json)
diff --git a/train.py b/train.py
index bb6f776..ea00fc6 100644
--- a/train.py
+++ b/train.py
@@ -11,27 +11,31 @@ from models import *
 from utils.datasets import *
 from utils.utils import *
 
-# Hyperparameters: train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve     0.087      0.281      0.109      0.121
-hyp = {'giou': .035,  # giou loss gain
-       'xy': 0.20,  # xy loss gain
-       'wh': 0.10,  # wh loss gain
-       'cls': 0.035,  # cls loss gain
-       'cls_pw': 79.0,  # cls BCELoss positive_weight
-       'conf': 1.61,  # conf loss gain
-       'conf_pw': 3.53,  # conf BCELoss positive_weight
-       'iou_t': 0.29,  # iou target-anchor training threshold
+#      0.149      0.241      0.126      0.156       6.85      1.008      1.421    0.07989      16.94      6.215      10.61      4.272      0.251      0.001         -4        0.9     0.0005   320 64-1 giou
+#      0.111       0.27      0.132      0.131       3.96      1.276     0.3156     0.1425      21.21      6.224      11.59       8.83      0.376      0.001         -4        0.9     0.0005
+hyp = {'giou': 1.008,  # giou loss gain
+       'xy': 1.421,  # xy loss gain
+       'wh': 0.07989,  # wh loss gain
+       'cls': 16.94,  # cls loss gain
+       'cls_pw': 6.215,  # cls BCELoss positive_weight
+       'conf': 10.61,  # conf loss gain
+       'conf_pw': 4.272,  # conf BCELoss positive_weight
+       'iou_t': 0.251,  # iou target-anchor training threshold
        'lr0': 0.001,  # initial learning rate
        'lrf': -4.,  # final learning rate = lr0 * (10 ** lrf)
        'momentum': 0.90,  # SGD momentum
        'weight_decay': 0.0005}  # optimizer weight decay
 
-# hyp = {'giou': 1.0,  # giou loss gain
-#        'xy': 1.0,  # xy loss gain
-#        'wh': 1.0,  # wh loss gain
-#        'cls': 1.0,  # cls loss gain
+
+#     0.0945      0.279      0.114      0.131         25      0.035        0.2        0.1      0.035         79       1.61       3.53       0.29      0.001         -4        0.9     0.0005   320 64-1
+#     0.112       0.265      0.111      0.144       12.6      0.035        0.2        0.1      0.035         79       1.61       3.53       0.29      0.001         -4        0.9     0.0005   320 32-2
+# hyp = {'giou': .035,  # giou loss gain
+#        'xy': 0.20,  # xy loss gain
+#        'wh': 0.10,  # wh loss gain
+#        'cls': 0.035,  # cls loss gain
 #        'cls_pw': 79.0,  # cls BCELoss positive_weight
-#        'conf': 1.0,  # conf loss gain
-#        'conf_pw': 6.0,  # conf BCELoss positive_weight
+#        'conf': 1.61,  # conf loss gain
+#        'conf_pw': 3.53,  # conf BCELoss positive_weight
 #        'iou_t': 0.29,  # iou target-anchor training threshold
 #        'lr0': 0.001,  # initial learning rate
 #        'lrf': -4.,  # final learning rate = lr0 * (10 ** lrf)
@@ -43,12 +47,10 @@ def train(
         cfg,
         data_cfg,
         img_size=416,
-        resume=False,
-        epochs=100,  # 500200 batches at bs 4, 117263 images = 68 epochs
-        batch_size=16,
-        accumulate=4,  # effective bs = 64 = batch_size * accumulate
+        epochs=100,  # 500200 batches at bs 16, 117263 images = 273 epochs
+        batch_size=8,
+        accumulate=8,  # effective bs = batch_size * accumulate = 8 * 8 = 64
         freeze_backbone=False,
-        transfer=False  # Transfer learning (train only YOLO layers)
 ):
     init_seeds()
     weights = 'weights' + os.sep
@@ -76,10 +78,10 @@ def train(
 
     cutoff = -1  # backbone reaches to cutoff layer
     start_epoch = 0
-    best_loss = float('inf')
+    best_fitness = 0.0
     nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters'])  # yolo layer size (i.e. 255)
-    if resume:  # Load previously saved model
-        if transfer:  # Transfer learning
+    if opt.resume or opt.transfer:  # Load previously saved model
+        if opt.transfer:  # Transfer learning
             chkpt = torch.load(weights + 'yolov3-spp.pt', map_location=device)
             model.load_state_dict({k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != 255},
                                   strict=False)
@@ -93,7 +95,7 @@ def train(
         start_epoch = chkpt['epoch'] + 1
         if chkpt['optimizer'] is not None:
             optimizer.load_state_dict(chkpt['optimizer'])
-            best_loss = chkpt['best_loss']
+            best_fitness = chkpt['best_fitness']
         del chkpt
 
     else:  # Initialize model with backbone (optional)
@@ -135,7 +137,11 @@ def train(
 
     # Initialize distributed training
     if torch.cuda.device_count() > 1:
-        dist.init_process_group(backend=opt.backend, init_method=opt.dist_url, world_size=opt.world_size, rank=opt.rank)
+        dist.init_process_group(backend='nccl',  # 'distributed backend'
+                                init_method='tcp://127.0.0.1:9999',  # distributed training init method
+                                world_size=1,  # number of nodes for distributed training
+                                rank=0)  # distributed training node rank
+
         model = torch.nn.parallel.DistributedDataParallel(model)
         # sampler = torch.utils.data.distributed.DistributedSampler(dataset)
 
@@ -148,12 +154,13 @@ def train(
                             collate_fn=dataset.collate_fn)
 
     # Mixed precision training https://github.com/NVIDIA/apex
-    try:
-        from apex import amp
-        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
-        mixed_precision = True
-    except:  # not installed: install help: https://github.com/NVIDIA/apex/issues/259
-        mixed_precision = False
+    mixed_precision = True
+    if mixed_precision:
+        try:
+            from apex import amp
+            model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
+        except:  # not installed: install help: https://github.com/NVIDIA/apex/issues/259
+            mixed_precision = False
 
     # Start training
     model.hyp = hyp  # attach hyperparameters to model
@@ -166,7 +173,8 @@ def train(
     t, t0 = time.time(), time.time()
     for epoch in range(start_epoch, epochs):
         model.train()
-        print(('\n%8s%12s' + '%10s' * 7) % ('Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'targets', 'time'))
+        print(('\n%8s%12s' + '%10s' * 7) %
+              ('Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'targets', 'img_size'))
 
         # Update scheduler
         scheduler.step()
@@ -183,15 +191,16 @@ def train(
         # dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n)  # random weighted index
 
         mloss = torch.zeros(5).to(device)  # mean losses
-        for i, (imgs, targets, _, _) in enumerate(dataloader):
+        pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
+        for i, (imgs, targets, _, _) in pbar:
             imgs = imgs.to(device)
             targets = targets.to(device)
 
-            # Multi-Scale training
+            # Multi-Scale training TODO: short-side to 32-multiple https://github.com/ultralytics/yolov3/issues/358
             if multi_scale:
-                if (i + 1 + nb * epoch) / accumulate % 10 == 0:  #  adjust (67% - 150%) every 10 batches
+                if (i + nb * epoch) / accumulate % 10 == 0:  #  adjust (67% - 150%) every 10 batches
                     img_size = random.choice(range(img_size_min, img_size_max + 1)) * 32
-                    print('img_size = %g' % img_size)
+                    # print('img_size = %g' % img_size)
                 scale_factor = img_size / max(imgs.shape[-2:])
                 imgs = F.interpolate(imgs, scale_factor=scale_factor, mode='bilinear', align_corners=False)
 
@@ -228,11 +237,11 @@ def train(
 
             # Print batch results
             mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+            # s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nb - 1), *mloss, len(targets), time.time() - t)
             s = ('%8s%12s' + '%10.3g' * 7) % (
-                '%g/%g' % (epoch, epochs - 1),
-                '%g/%g' % (i, nb - 1), *mloss, len(targets), time.time() - t)
+                '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nb - 1), *mloss, len(targets), img_size)
             t = time.time()
-            print(s)
+            pbar.set_description(s)  # print(s)
 
         # Report time
         dt = (time.time() - t0) / 3600
@@ -248,17 +257,17 @@ def train(
         with open('results.txt', 'a') as file:
             file.write(s + '%11.3g' * 5 % results + '\n')  # P, R, mAP, F1, test_loss
 
-        # Update best loss
-        test_loss = results[4]
-        if test_loss < best_loss:
-            best_loss = test_loss
+        # Update best map
+        fitness = results[2]
+        if fitness > best_fitness:
+            best_fitness = fitness
 
         # Save training results
         save = (not opt.nosave) or (epoch == epochs - 1)
         if save:
             # Create checkpoint
             chkpt = {'epoch': epoch,
-                     'best_loss': best_loss,
+                     'best_fitness': best_fitness,
                      'model': model.module.state_dict() if type(
                          model) is nn.parallel.DistributedDataParallel else model.state_dict(),
                      'optimizer': optimizer.state_dict()}
@@ -267,7 +276,7 @@ def train(
             torch.save(chkpt, latest)
 
             # Save best checkpoint
-            if best_loss == test_loss:
+            if best_fitness == fitness:
                 torch.save(chkpt, best)
 
             # Save backup every 10 epochs (optional)
@@ -286,8 +295,15 @@ def print_mutation(hyp, results):
     b = '%11.4g' * len(hyp) % tuple(hyp.values())  # hyperparam values
     c = '%11.3g' * len(results) % results  # results (P, R, mAP, F1, test_loss)
     print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
-    with open('evolve.txt', 'a') as f:
-        f.write(c + b + '\n')
+
+    if opt.cloud_evolve:
+        os.system('gsutil cp gs://yolov4/evolve.txt .')  # download evolve.txt
+        with open('evolve.txt', 'a') as f:  # append result to evolve.txt
+            f.write(c + b + '\n')
+        os.system('gsutil cp evolve.txt gs://yolov4')  # upload evolve.txt
+    else:
+        with open('evolve.txt', 'a') as f:
+            f.write(c + b + '\n')
 
 
 if __name__ == '__main__':
@@ -302,51 +318,46 @@ if __name__ == '__main__':
     parser.add_argument('--resume', action='store_true', help='resume training flag')
     parser.add_argument('--transfer', action='store_true', help='transfer learning flag')
     parser.add_argument('--num-workers', type=int, default=4, help='number of Pytorch DataLoader workers')
-    parser.add_argument('--dist-url', default='tcp://127.0.0.1:9999', type=str, help='distributed training init method')
-    parser.add_argument('--rank', default=0, type=int, help='distributed training node rank')
-    parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training')
-    parser.add_argument('--backend', default='nccl', type=str, help='distributed backend')
     parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
     parser.add_argument('--notest', action='store_true', help='only test final epoch')
     parser.add_argument('--giou', action='store_true', help='use GIoU loss instead of xy, wh loss')
-    parser.add_argument('--evolve', action='store_true', help='run hyperparameter evolution')
+    parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
+    parser.add_argument('--cloud-evolve', action='store_true', help='evolve hyperparameters from a cloud source')
     parser.add_argument('--var', default=0, type=int, help='debug variable')
     opt = parser.parse_args()
     print(opt)
 
+    opt.evolve = opt.cloud_evolve or opt.evolve
     if opt.evolve:
         opt.notest = True  # only test final epoch
         opt.nosave = True  # only save final checkpoint
 
     # Train
-    results = train(
-        opt.cfg,
-        opt.data_cfg,
-        img_size=opt.img_size,
-        resume=opt.resume or opt.transfer,
-        transfer=opt.transfer,
-        epochs=opt.epochs,
-        batch_size=opt.batch_size,
-        accumulate=opt.accumulate,
-    )
+    results = train(opt.cfg,
+                    opt.data_cfg,
+                    img_size=opt.img_size,
+                    epochs=opt.epochs,
+                    batch_size=opt.batch_size,
+                    accumulate=opt.accumulate)
 
     # Evolve hyperparameters (optional)
     if opt.evolve:
-        best_fitness = results[2]  # use mAP for fitness
-
-        # Write mutation results
-        print_mutation(hyp, results)
-
         gen = 1000  # generations to evolve
-        for _ in range(gen):
+        print_mutation(hyp, results)  # Write mutation results
 
-            # Mutate hyperparameters
-            old_hyp = hyp.copy()
-            init_seeds(seed=int(time.time()))
-            s = [.4, .4, .4, .4, .4, .4, .4, .4, .4 * 0, .4 * 0, .04 * 0, .4 * 0]  # fractional sigmas
+        for _ in range(gen):
+            # Get best hyperparamters
+            x = np.loadtxt('evolve.txt', ndmin=2)
+            x = x[x[:, 2].argmax()]  # select best mAP as genetic fitness (col 2)
             for i, k in enumerate(hyp.keys()):
-                x = (np.random.randn(1) * s[i] + 1) ** 1.1  # plt.hist(x.ravel(), 100)
-                hyp[k] = hyp[k] * float(x)  # vary by about 30% 1sigma
+                hyp[k] = x[i + 5]
+
+            # Mutate
+            init_seeds(seed=int(time.time()))
+            s = [.2, .2, .2, .2, .2, .2, .2, .2, .2 * 0, .2 * 0, .05 * 0, .2 * 0]  # fractional sigmas
+            for i, k in enumerate(hyp.keys()):
+                x = (np.random.randn(1) * s[i] + 1) ** 2.0  # plt.hist(x.ravel(), 300)
+                hyp[k] *= float(x)  # vary by 20% 1sigma
 
             # Clip to limits
             keys = ['lr0', 'iou_t', 'momentum', 'weight_decay']
@@ -354,30 +365,17 @@ if __name__ == '__main__':
             for k, v in zip(keys, limits):
                 hyp[k] = np.clip(hyp[k], v[0], v[1])
 
-            # Determine mutation fitness
-            results = train(
-                opt.cfg,
-                opt.data_cfg,
-                img_size=opt.img_size,
-                resume=opt.resume or opt.transfer,
-                transfer=opt.transfer,
-                epochs=opt.epochs,
-                batch_size=opt.batch_size,
-                accumulate=opt.accumulate,
-            )
-            mutation_fitness = results[2]
+            # Train mutation
+            results = train(opt.cfg,
+                            opt.data_cfg,
+                            img_size=opt.img_size,
+                            epochs=opt.epochs,
+                            batch_size=opt.batch_size,
+                            accumulate=opt.accumulate)
 
             # Write mutation results
             print_mutation(hyp, results)
 
-            # Update hyperparameters if fitness improved
-            if mutation_fitness > best_fitness:
-                # Fitness improved!
-                print('Fitness improved!')
-                best_fitness = mutation_fitness
-            else:
-                hyp = old_hyp.copy()  # reset hyp to
-
             # # Plot results
             # import numpy as np
             # import matplotlib.pyplot as plt
diff --git a/utils/google_utils.py b/utils/google_utils.py
new file mode 100644
index 0000000..ee93944
--- /dev/null
+++ b/utils/google_utils.py
@@ -0,0 +1,32 @@
+# This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
+# pip install --upgrade google-cloud-storage
+
+from google.cloud import storage
+
+
+def upload_blob(bucket_name, source_file_name, destination_blob_name):
+    # Uploads a file to a bucket
+    # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
+
+    storage_client = storage.Client()
+    bucket = storage_client.get_bucket(bucket_name)
+    blob = bucket.blob(destination_blob_name)
+
+    blob.upload_from_filename(source_file_name)
+
+    print('File {} uploaded to {}.'.format(
+        source_file_name,
+        destination_blob_name))
+
+
+def download_blob(bucket_name, source_blob_name, destination_file_name):
+    # Uploads a blob from a bucket
+    storage_client = storage.Client()
+    bucket = storage_client.get_bucket(bucket_name)
+    blob = bucket.blob(source_blob_name)
+
+    blob.download_to_filename(destination_file_name)
+
+    print('Blob {} downloaded to {}.'.format(
+        source_blob_name,
+        destination_file_name))
diff --git a/utils/utils.py b/utils/utils.py
index 4c0b179..4f8fe60 100755
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -11,6 +11,7 @@ from PIL import Image
 from tqdm import tqdm
 
 from . import torch_utils
+from . import google_utils
 
 matplotlib.rc('font', **{'size': 11})
 
@@ -284,7 +285,7 @@ def compute_loss(p, targets, model, giou_loss=False):  # predictions, targets, m
 
     # Compute losses
     bs = p[0].shape[0]  # batch size
-    k = bs  # loss gain
+    k = bs / 64  # loss gain
     for i, pi0 in enumerate(p):  # layer i predictions, i
         b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
         tconf = torch.zeros_like(pi0[..., 0])  # conf
@@ -303,12 +304,12 @@ def compute_loss(p, targets, model, giou_loss=False):  # predictions, targets, m
                 lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i])  # xy loss
                 lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i])  # wh yolo loss
 
-            # tclsm = torch.zeros_like(pi[..., 5:])
-            # tclsm[range(len(b)), tcls[i]] = 1.0
-            # lcls += (k * h['cls']) * BCEcls(pi[..., 5:], tclsm)  # class_conf loss
-            lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i])  # class_conf loss
+            tclsm = torch.zeros_like(pi[..., 5:])
+            tclsm[range(len(b)), tcls[i]] = 1.0
+            lcls += (k * h['cls']) * BCEcls(pi[..., 5:], tclsm)  # cls loss (BCE)
+            # lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i])  # cls loss (CE)
 
-            # # Append to text file
+            # Append targets to text file
             # with open('targets.txt', 'a') as file:
             #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]