weight_decay fix
This commit is contained in:
parent
883ddcc682
commit
ff82e4d488
30
train.py
30
train.py
|
@ -71,10 +71,18 @@ def train():
|
||||||
model = Darknet(cfg, arc=opt.arc).to(device)
|
model = Darknet(cfg, arc=opt.arc).to(device)
|
||||||
|
|
||||||
# Optimizer
|
# Optimizer
|
||||||
# optimizer = optim.Adam(model.parameters(), lr=hyp['lr0'], weight_decay=hyp['weight_decay'])
|
pg0, pg1 = [], [] # optimizer parameter groups
|
||||||
# optimizer = AdaBound(model.parameters(), lr=hyp['lr0'], final_lr=0.1)
|
for k, v in dict(model.named_parameters()).items():
|
||||||
optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], weight_decay=hyp['weight_decay'],
|
if 'Conv2d.weight' in k:
|
||||||
nesterov=True)
|
pg1 += [v] # parameter group 1 (apply weight_decay)
|
||||||
|
else:
|
||||||
|
pg0 += [v] # parameter group 0
|
||||||
|
|
||||||
|
# optimizer = optim.Adam(pg0, lr=hyp['lr0'])
|
||||||
|
# optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1)
|
||||||
|
optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
|
||||||
|
optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
|
||||||
|
del pg0, pg1
|
||||||
|
|
||||||
cutoff = -1 # backbone reaches to cutoff layer
|
cutoff = -1 # backbone reaches to cutoff layer
|
||||||
start_epoch = 0
|
start_epoch = 0
|
||||||
|
@ -112,17 +120,17 @@ def train():
|
||||||
if opt.transfer or opt.prebias: # transfer learning edge (yolo) layers
|
if opt.transfer or opt.prebias: # transfer learning edge (yolo) layers
|
||||||
nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255)
|
nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255)
|
||||||
|
|
||||||
for x in optimizer.param_groups:
|
for p in optimizer.param_groups:
|
||||||
# lower param count allows more aggressive training settings: ~0.1 lr0, ~0.9 momentum
|
# lower param count allows more aggressive training settings: i.e. SGD ~0.1 lr0, ~0.9 momentum
|
||||||
x['lr'] *= 100
|
p['lr'] *= 100
|
||||||
x['momentum'] *= 0.9
|
p['momentum'] *= 0.9
|
||||||
|
|
||||||
for p in model.parameters():
|
for p in model.parameters():
|
||||||
if opt.prebias and p.numel() == nf: # train yolo biases only
|
if opt.prebias and p.numel() == nf: # train (yolo biases)
|
||||||
p.requires_grad = True
|
p.requires_grad = True
|
||||||
elif opt.transfer and p.shape[0] == nf: # train yolo biases+weights only
|
elif opt.transfer and p.shape[0] == nf: # train (yolo biases+weights)
|
||||||
p.requires_grad = True
|
p.requires_grad = True
|
||||||
else:
|
else: # freeze layer
|
||||||
p.requires_grad = False
|
p.requires_grad = False
|
||||||
|
|
||||||
# Scheduler https://github.com/ultralytics/yolov3/issues/238
|
# Scheduler https://github.com/ultralytics/yolov3/issues/238
|
||||||
|
|
|
@ -18,6 +18,7 @@ sudo shutdown
|
||||||
rm -rf yolov3 # Warning: remove existing
|
rm -rf yolov3 # Warning: remove existing
|
||||||
git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master
|
git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master
|
||||||
# git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch
|
# git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch
|
||||||
|
python3 train.py --img-size 320 --weights weights/darknet53.conv.74 --epochs 27 --batch-size 64 --accumulate 1
|
||||||
|
|
||||||
# Train
|
# Train
|
||||||
python3 train.py
|
python3 train.py
|
||||||
|
|
Loading…
Reference in New Issue