updates
This commit is contained in:
parent
9c5524ba82
commit
0b8a28e3dd
5
test.py
5
test.py
|
@ -68,11 +68,8 @@ def test(
|
|||
# Run model
|
||||
inf_out, train_out = model(imgs) # inference and training outputs
|
||||
|
||||
# Build targets
|
||||
target_list = build_targets(model, targets)
|
||||
|
||||
# Compute loss
|
||||
loss_i, _ = compute_loss(train_out, target_list)
|
||||
loss_i, _ = compute_loss(train_out, targets, model)
|
||||
loss += loss_i.item()
|
||||
|
||||
# Run NMS
|
||||
|
|
46
train.py
46
train.py
|
@ -2,6 +2,7 @@ import argparse
|
|||
import time
|
||||
|
||||
import torch.distributed as dist
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
import test # Import test.py to get mAP after each epoch
|
||||
|
@ -41,9 +42,21 @@ def train(
|
|||
# Initialize model
|
||||
model = Darknet(cfg, img_size).to(device)
|
||||
|
||||
# Initialize hyperparameters
|
||||
hyp = {'k': 8.4875, # loss multiple
|
||||
'xy': 0.079756, # xy loss fraction
|
||||
'wh': 0.010461, # wh loss fraction
|
||||
'cls': 0.02105, # cls loss fraction
|
||||
'conf': 0.88873, # conf loss fraction
|
||||
'iou_t': 0.1, # iou target-anchor training threshold
|
||||
'lr0': 0.001, # initial learning rate
|
||||
'lrf': -2., # final learning rate = lr0 * (10 ** lrf)
|
||||
'momentum': 0.9, # SGD momentum
|
||||
'weight_decay': 0.0005, # optimizer weight decay
|
||||
}
|
||||
|
||||
# Optimizer
|
||||
lr0 = 0.001 # initial learning rate
|
||||
optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005)
|
||||
optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], weight_decay=hyp['weight_decay'])
|
||||
|
||||
cutoff = -1 # backbone reaches to cutoff layer
|
||||
start_epoch = 0
|
||||
|
@ -74,8 +87,11 @@ def train(
|
|||
cutoff = load_darknet_weights(model, weights + 'darknet53.conv.74')
|
||||
|
||||
# Scheduler (reduce lr at epochs 218, 245, i.e. batches 400k, 450k)
|
||||
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[218, 245], gamma=0.1,
|
||||
last_epoch=start_epoch - 1)
|
||||
# lf = lambda x: 1 - x / epochs # linear ramp to zero
|
||||
# lf = lambda x: 10 ** (-2 * x / epochs) # exp ramp to lr0 * 1e-2
|
||||
# lf = lambda x: 1 - 10 ** (-2 * (1 - x / epochs)) # inv exp ramp to lr0 * 1e-2
|
||||
# scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf, last_epoch=start_epoch - 1)
|
||||
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[218, 245], gamma=0.1, last_epoch=start_epoch - 1)
|
||||
|
||||
# Dataset
|
||||
dataset = LoadImagesAndLabels(train_path, img_size=img_size, augment=True)
|
||||
|
@ -105,9 +121,10 @@ def train(
|
|||
|
||||
# Start training
|
||||
t = time.time()
|
||||
model.hyp = hyp # attach hyperparameters to model
|
||||
model_info(model)
|
||||
nB = len(dataloader)
|
||||
n_burnin = min(round(nB / 5 + 1), 1000) # burn-in batches
|
||||
nb = len(dataloader)
|
||||
n_burnin = min(round(nb / 5 + 1), 1000) # burn-in batches
|
||||
os.remove('train_batch0.jpg') if os.path.exists('train_batch0.jpg') else None
|
||||
os.remove('test_batch0.jpg') if os.path.exists('test_batch0.jpg') else None
|
||||
for epoch in range(start_epoch, epochs):
|
||||
|
@ -123,7 +140,7 @@ def train(
|
|||
if int(name.split('.')[1]) < cutoff: # if layer < 75
|
||||
p.requires_grad = False if epoch == 0 else True
|
||||
|
||||
mloss = torch.zeros(5).to(device) # mean losses
|
||||
mloss = torch.zeros(5).to(device) # mean losses
|
||||
for i, (imgs, targets, _, _) in enumerate(dataloader):
|
||||
imgs = imgs.to(device)
|
||||
targets = targets.to(device)
|
||||
|
@ -137,18 +154,15 @@ def train(
|
|||
|
||||
# SGD burn-in
|
||||
if epoch == 0 and i <= n_burnin:
|
||||
lr = lr0 * (i / n_burnin) ** 4
|
||||
lr = hyp['lr0'] * (i / n_burnin) ** 4
|
||||
for x in optimizer.param_groups:
|
||||
x['lr'] = lr
|
||||
|
||||
# Run model
|
||||
pred = model(imgs)
|
||||
|
||||
# Build targets
|
||||
target_list = build_targets(model, targets)
|
||||
|
||||
# Compute loss
|
||||
loss, loss_items = compute_loss(pred, target_list)
|
||||
loss, loss_items = compute_loss(pred, targets, model)
|
||||
|
||||
# Compute gradient
|
||||
if mixed_precision:
|
||||
|
@ -158,7 +172,7 @@ def train(
|
|||
loss.backward()
|
||||
|
||||
# Accumulate gradient for x batches before optimizing
|
||||
if (i + 1) % accumulate == 0 or (i + 1) == nB:
|
||||
if (i + 1) % accumulate == 0 or (i + 1) == nb:
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
|
||||
|
@ -168,7 +182,7 @@ def train(
|
|||
# Print batch results
|
||||
s = ('%8s%12s' + '%10.3g' * 7) % (
|
||||
'%g/%g' % (epoch, epochs - 1),
|
||||
'%g/%g' % (i, nB - 1), *mloss, nt, time.time() - t)
|
||||
'%g/%g' % (i, nb - 1), *mloss, nt, time.time() - t)
|
||||
t = time.time()
|
||||
print(s)
|
||||
|
||||
|
@ -182,7 +196,8 @@ def train(
|
|||
results = (0, 0, 0, 0, 0)
|
||||
else:
|
||||
with torch.no_grad():
|
||||
results = test.test(cfg, data_cfg, batch_size=batch_size, img_size=img_size, model=model, conf_thres=0.1)
|
||||
results = test.test(cfg, data_cfg, batch_size=batch_size, img_size=img_size, model=model,
|
||||
conf_thres=0.1)
|
||||
|
||||
# Write epoch results
|
||||
with open('results.txt', 'a') as file:
|
||||
|
@ -235,6 +250,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training')
|
||||
parser.add_argument('--backend', default='nccl', type=str, help='distributed backend')
|
||||
parser.add_argument('--nosave', action='store_true', help='do not save training results')
|
||||
parser.add_argument('--var', default=0, type=int, help='debug variable')
|
||||
opt = parser.parse_args()
|
||||
print(opt, end='\n\n')
|
||||
|
||||
|
|
11
utils/gcp.sh
11
utils/gcp.sh
|
@ -50,14 +50,19 @@ git clone https://github.com/ultralytics/yolov3 # master
|
|||
cp -r weights yolov3
|
||||
cp -r cocoapi/PythonAPI/pycocotools yolov3
|
||||
cd yolov3
|
||||
python3 train.py --nosave --data data/coco_100val.data
|
||||
python3 train.py --nosave --data data/coco_32img.data --var 4 && mv results.txt results_t2.txt
|
||||
python3 train.py --nosave --data data/coco_32img.data --var 5 && mv results.txt results_t3.txt
|
||||
python3 -c "from utils import utils; utils.plot_results()"
|
||||
gsutil cp results*.txt gs://ultralytics
|
||||
gsutil cp results.png gs://ultralytics
|
||||
sudo shutdown
|
||||
|
||||
#mv ../utils.py utils
|
||||
mv ../train.py .
|
||||
|
||||
rm results*.txt # WARNING: removes existing results
|
||||
python3 train.py --nosave --data data/coco_1img.data && mv results.txt results3_1img.txt
|
||||
python3 train.py --nosave --data data/coco_10img.data && mv results.txt results3_10img.txt
|
||||
python3 train.py --nosave --data data/coco_100img.data && mv results.txt results3_100img.txt
|
||||
python3 train.py --nosave --data data/coco_100img.data && mv results.txt results4_100img.txt
|
||||
python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt
|
||||
# python3 train.py --nosave --data data/coco_1000img.data && mv results.txt results_1000img.txt
|
||||
python3 -c "from utils import utils; utils.plot_results()"
|
||||
|
|
|
@ -242,35 +242,37 @@ def wh_iou(box1, box2):
|
|||
return inter_area / union_area # iou
|
||||
|
||||
|
||||
def compute_loss(p, targets): # predictions, targets
|
||||
def compute_loss(p, targets, model): # predictions, targets, model
|
||||
ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor
|
||||
lxy, lwh, lcls, lconf = ft([0]), ft([0]), ft([0]), ft([0])
|
||||
txy, twh, tcls, indices = targets
|
||||
txy, twh, tcls, indices = build_targets(model, targets)
|
||||
|
||||
# Define criteria
|
||||
MSE = nn.MSELoss()
|
||||
CE = nn.CrossEntropyLoss()
|
||||
BCE = nn.BCEWithLogitsLoss()
|
||||
|
||||
# Compute losses
|
||||
h = model.hyp # hyperparameters
|
||||
bs = p[0].shape[0] # batch size
|
||||
# gp = [x.numel() for x in tconf] # grid points
|
||||
k = h['k'] * bs # loss gain
|
||||
for i, pi0 in enumerate(p): # layer i predictions, i
|
||||
b, a, gj, gi = indices[i] # image, anchor, gridx, gridy
|
||||
tconf = torch.zeros_like(pi0[..., 0]) # conf
|
||||
|
||||
# Compute losses
|
||||
k = 8.4875 * bs
|
||||
if len(b): # number of targets
|
||||
pi = pi0[b, a, gj, gi] # predictions closest to anchors
|
||||
tconf[b, a, gj, gi] = 1 # conf
|
||||
# pi[..., 2:4] = torch.sigmoid(pi[..., 2:4]) # wh power loss (uncomment)
|
||||
|
||||
lxy += (k * 0.079756) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss
|
||||
lwh += (k * 0.010461) * MSE(pi[..., 2:4], twh[i]) # wh yolo loss
|
||||
# lwh += (k * 0.010461) * MSE(torch.sigmoid(pi[..., 2:4]), twh[i]) # wh power loss
|
||||
lcls += (k * 0.02105) * CE(pi[..., 5:], tcls[i]) # class_conf loss
|
||||
lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss
|
||||
lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i]) # wh yolo loss
|
||||
lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i]) # class_conf loss
|
||||
|
||||
# pos_weight = ft([gp[i] / min(gp) * 4.])
|
||||
# BCE = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
|
||||
lconf += (k * 0.88873) * BCE(pi0[..., 4], tconf) # obj_conf loss
|
||||
lconf += (k * h['conf']) * BCE(pi0[..., 4], tconf) # obj_conf loss
|
||||
loss = lxy + lwh + lconf + lcls
|
||||
|
||||
return loss, torch.cat((lxy, lwh, lconf, lcls, loss)).detach()
|
||||
|
@ -296,7 +298,7 @@ def build_targets(model, targets):
|
|||
# reject below threshold ious (OPTIONAL, increases P, lowers R)
|
||||
reject = True
|
||||
if reject:
|
||||
j = iou > 0.10
|
||||
j = iou > model.hyp['iou_t'] # hyperparameter
|
||||
t, a, gwh = targets[j], a[j], gwh[j]
|
||||
|
||||
# Indices
|
||||
|
|
Loading…
Reference in New Issue