diff --git a/test.py b/test.py
index 686a8f5d..6e4d4654 100644
--- a/test.py
+++ b/test.py
@@ -68,11 +68,8 @@ def test(
         # Run model
         inf_out, train_out = model(imgs)  # inference and training outputs
 
-        # Build targets
-        target_list = build_targets(model, targets)
-
         # Compute loss
-        loss_i, _ = compute_loss(train_out, target_list)
+        loss_i, _ = compute_loss(train_out, targets, model)
         loss += loss_i.item()
 
         # Run NMS
diff --git a/train.py b/train.py
index 99f3f840..ebe257ac 100644
--- a/train.py
+++ b/train.py
@@ -2,6 +2,7 @@ import argparse
 import time
 
 import torch.distributed as dist
+import torch.optim as optim
 from torch.utils.data import DataLoader
 
 import test  # Import test.py to get mAP after each epoch
@@ -41,9 +42,21 @@ def train(
     # Initialize model
     model = Darknet(cfg, img_size).to(device)
 
+    # Initialize hyperparameters
+    hyp = {'k': 8.4875,  # loss multiple
+           'xy': 0.079756,  # xy loss fraction
+           'wh': 0.010461,  # wh loss fraction
+           'cls': 0.02105,  # cls loss fraction
+           'conf': 0.88873,  # conf loss fraction
+           'iou_t': 0.1,  # iou target-anchor training threshold
+           'lr0': 0.001,  # initial learning rate
+           'lrf': -2.,  # final learning rate = lr0 * (10 ** lrf)
+           'momentum': 0.9,  # SGD momentum
+           'weight_decay': 0.0005,  # optimizer weight decay
+           }
+
     # Optimizer
-    lr0 = 0.001  # initial learning rate
-    optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005)
+    optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], weight_decay=hyp['weight_decay'])
 
     cutoff = -1  # backbone reaches to cutoff layer
     start_epoch = 0
@@ -74,8 +87,11 @@ def train(
             cutoff = load_darknet_weights(model, weights + 'darknet53.conv.74')
 
     # Scheduler (reduce lr at epochs 218, 245, i.e. batches 400k, 450k)
-    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[218, 245], gamma=0.1,
-                                                     last_epoch=start_epoch - 1)
+    # lf = lambda x: 1 - x / epochs  # linear ramp to zero
+    # lf = lambda x: 10 ** (-2 * x / epochs)  # exp ramp to lr0 * 1e-2
+    # lf = lambda x: 1 - 10 ** (-2 * (1 - x / epochs))  # inv exp ramp to lr0 * 1e-2
+    # scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf, last_epoch=start_epoch - 1)
+    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[218, 245], gamma=0.1, last_epoch=start_epoch - 1)
 
     # Dataset
     dataset = LoadImagesAndLabels(train_path, img_size=img_size, augment=True)
@@ -105,9 +121,10 @@ def train(
 
     # Start training
     t = time.time()
+    model.hyp = hyp  # attach hyperparameters to model
     model_info(model)
-    nB = len(dataloader)
-    n_burnin = min(round(nB / 5 + 1), 1000)  # burn-in batches
+    nb = len(dataloader)
+    n_burnin = min(round(nb / 5 + 1), 1000)  # burn-in batches
     os.remove('train_batch0.jpg') if os.path.exists('train_batch0.jpg') else None
     os.remove('test_batch0.jpg') if os.path.exists('test_batch0.jpg') else None
     for epoch in range(start_epoch, epochs):
@@ -123,7 +140,7 @@ def train(
                 if int(name.split('.')[1]) < cutoff:  # if layer < 75
                     p.requires_grad = False if epoch == 0 else True
 
-        mloss = torch.zeros(5).to(device) # mean losses
+        mloss = torch.zeros(5).to(device)  # mean losses
         for i, (imgs, targets, _, _) in enumerate(dataloader):
             imgs = imgs.to(device)
             targets = targets.to(device)
@@ -137,18 +154,15 @@ def train(
 
             # SGD burn-in
             if epoch == 0 and i <= n_burnin:
-                lr = lr0 * (i / n_burnin) ** 4
+                lr = hyp['lr0'] * (i / n_burnin) ** 4
                 for x in optimizer.param_groups:
                     x['lr'] = lr
 
             # Run model
             pred = model(imgs)
 
-            # Build targets
-            target_list = build_targets(model, targets)
-
             # Compute loss
-            loss, loss_items = compute_loss(pred, target_list)
+            loss, loss_items = compute_loss(pred, targets, model)
 
             # Compute gradient
             if mixed_precision:
@@ -158,7 +172,7 @@ def train(
                 loss.backward()
 
             # Accumulate gradient for x batches before optimizing
-            if (i + 1) % accumulate == 0 or (i + 1) == nB:
+            if (i + 1) % accumulate == 0 or (i + 1) == nb:
                 optimizer.step()
                 optimizer.zero_grad()
 
@@ -168,7 +182,7 @@ def train(
             # Print batch results
             s = ('%8s%12s' + '%10.3g' * 7) % (
                 '%g/%g' % (epoch, epochs - 1),
-                '%g/%g' % (i, nB - 1), *mloss, nt, time.time() - t)
+                '%g/%g' % (i, nb - 1), *mloss, nt, time.time() - t)
             t = time.time()
             print(s)
 
@@ -182,7 +196,8 @@ def train(
             results = (0, 0, 0, 0, 0)
         else:
             with torch.no_grad():
-                results = test.test(cfg, data_cfg, batch_size=batch_size, img_size=img_size, model=model, conf_thres=0.1)
+                results = test.test(cfg, data_cfg, batch_size=batch_size, img_size=img_size, model=model,
+                                    conf_thres=0.1)
 
         # Write epoch results
         with open('results.txt', 'a') as file:
@@ -235,6 +250,7 @@ if __name__ == '__main__':
     parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training')
     parser.add_argument('--backend', default='nccl', type=str, help='distributed backend')
     parser.add_argument('--nosave', action='store_true', help='do not save training results')
+    parser.add_argument('--var', default=0, type=int, help='debug variable')
     opt = parser.parse_args()
     print(opt, end='\n\n')
 
diff --git a/utils/gcp.sh b/utils/gcp.sh
index 054ad228..88a76cf5 100755
--- a/utils/gcp.sh
+++ b/utils/gcp.sh
@@ -50,14 +50,19 @@ git clone https://github.com/ultralytics/yolov3  # master
 cp -r weights yolov3
 cp -r cocoapi/PythonAPI/pycocotools yolov3
 cd yolov3
-python3 train.py --nosave --data data/coco_100val.data
+python3 train.py --nosave --data data/coco_32img.data --var 4 && mv results.txt results_t2.txt
+python3 train.py --nosave --data data/coco_32img.data --var 5 && mv results.txt results_t3.txt
+python3 -c "from utils import utils; utils.plot_results()"
+gsutil cp results*.txt gs://ultralytics
+gsutil cp results.png gs://ultralytics
+sudo shutdown
 
-#mv ../utils.py utils
+mv ../train.py .
 
 rm results*.txt  # WARNING: removes existing results
 python3 train.py --nosave --data data/coco_1img.data && mv results.txt results3_1img.txt
 python3 train.py --nosave --data data/coco_10img.data && mv results.txt results3_10img.txt
-python3 train.py --nosave --data data/coco_100img.data && mv results.txt results3_100img.txt
+python3 train.py --nosave --data data/coco_100img.data && mv results.txt results4_100img.txt
 python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt
 # python3 train.py --nosave --data data/coco_1000img.data && mv results.txt results_1000img.txt
 python3 -c "from utils import utils; utils.plot_results()"
diff --git a/utils/utils.py b/utils/utils.py
index 26a450f5..ecccf7cc 100755
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -242,35 +242,37 @@ def wh_iou(box1, box2):
     return inter_area / union_area  # iou
 
 
-def compute_loss(p, targets):  # predictions, targets
+def compute_loss(p, targets, model):  # predictions, targets, model
     ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor
     lxy, lwh, lcls, lconf = ft([0]), ft([0]), ft([0]), ft([0])
-    txy, twh, tcls, indices = targets
+    txy, twh, tcls, indices = build_targets(model, targets)
+
+    # Define criteria
     MSE = nn.MSELoss()
     CE = nn.CrossEntropyLoss()
     BCE = nn.BCEWithLogitsLoss()
 
     # Compute losses
+    h = model.hyp  # hyperparameters
     bs = p[0].shape[0]  # batch size
-    # gp = [x.numel() for x in tconf]  # grid points
+    k = h['k'] * bs  # loss gain
     for i, pi0 in enumerate(p):  # layer i predictions, i
         b, a, gj, gi = indices[i]  # image, anchor, gridx, gridy
         tconf = torch.zeros_like(pi0[..., 0])  # conf
 
         # Compute losses
-        k = 8.4875 * bs
         if len(b):  # number of targets
             pi = pi0[b, a, gj, gi]  # predictions closest to anchors
             tconf[b, a, gj, gi] = 1  # conf
+            # pi[..., 2:4] = torch.sigmoid(pi[..., 2:4])  # wh power loss (uncomment)
 
-            lxy += (k * 0.079756) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i])  # xy loss
-            lwh += (k * 0.010461) * MSE(pi[..., 2:4], twh[i])  # wh yolo loss
-            # lwh += (k * 0.010461) * MSE(torch.sigmoid(pi[..., 2:4]), twh[i])  # wh power loss
-            lcls += (k * 0.02105) * CE(pi[..., 5:], tcls[i])  # class_conf loss
+            lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i])  # xy loss
+            lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i])  # wh yolo loss
+            lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i])  # class_conf loss
 
         # pos_weight = ft([gp[i] / min(gp) * 4.])
         # BCE = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
-        lconf += (k * 0.88873) * BCE(pi0[..., 4], tconf)  # obj_conf loss
+        lconf += (k * h['conf']) * BCE(pi0[..., 4], tconf)  # obj_conf loss
     loss = lxy + lwh + lconf + lcls
 
     return loss, torch.cat((lxy, lwh, lconf, lcls, loss)).detach()
@@ -296,7 +298,7 @@ def build_targets(model, targets):
             # reject below threshold ious (OPTIONAL, increases P, lowers R)
             reject = True
             if reject:
-                j = iou > 0.10
+                j = iou > model.hyp['iou_t']  # hyperparameter
                 t, a, gwh = targets[j], a[j], gwh[j]
 
         # Indices