This commit is contained in:
Glenn Jocher 2019-07-15 17:54:31 +02:00
parent 96e25462e8
commit 8501aed49f
4 changed files with 24 additions and 19 deletions

View File

@ -50,7 +50,7 @@ https://colab.research.google.com/drive/1G8T-VFxQkjDe4idzN8F-hbIBqkkkQnxw
**Start Training:** `python3 train.py` to begin training after downloading COCO data with `data/get_coco_dataset.sh`.
**Resume Training:** `python3 train.py --resume` to resume training from `weights/latest.pt`.
**Resume Training:** `python3 train.py --resume` to resume training from `weights/last.pt`.
Each epoch trains on 117,263 images from the train and validate COCO sets, and tests on 5000 images from the COCO validate set. Default training settings produce loss plots below, with **training speed of 0.25 s/batch on a V100 GPU (almost 50 COCO epochs/day)**.
@ -136,8 +136,9 @@ Success: converted 'weights/yolov3-spp.pt' to 'converted.weights'
# mAP
- Use `test.py --weights weights/yolov3.weights` to test the official YOLOv3 weights.
- Use `test.py --weights weights/latest.pt` to test the latest training results.
- `test.py --weights weights/yolov3.weights` to test official YOLOv3 weights.
- `test.py --weights weights/last.pt` to test most recent checkpoint.
- `test.py --weights weights/best.pt` to test best checkpoint.
- Compare to darknet published results https://arxiv.org/abs/1804.02767.
<!---

View File

@ -45,7 +45,7 @@ def train(cfg,
# Initialize
init_seeds()
weights = 'weights' + os.sep
latest = weights + 'latest.pt'
last = weights + 'last.pt'
best = weights + 'best.pt'
device = torch_utils.select_device()
multi_scale = opt.multi_scale
@ -79,10 +79,10 @@ def train(cfg,
for p in model.parameters():
p.requires_grad = True if p.shape[0] == nf else False
else: # resume from latest.pt
else: # resume from last.pt
if opt.bucket:
os.system('gsutil cp gs://%s/latest.pt %s' % (opt.bucket, latest)) # download from bucket
chkpt = torch.load(latest, map_location=device) # load checkpoint
os.system('gsutil cp gs://%s/last.pt %s' % (opt.bucket, last)) # download from bucket
chkpt = torch.load(last, map_location=device) # load checkpoint
model.load_state_dict(chkpt['model'])
if chkpt['optimizer'] is not None:
@ -273,10 +273,10 @@ def train(cfg,
model) is nn.parallel.DistributedDataParallel else model.state_dict(),
'optimizer': optimizer.state_dict()}
# Save latest checkpoint
torch.save(chkpt, latest)
# Save last checkpoint
torch.save(chkpt, last)
if opt.bucket:
os.system('gsutil cp %s gs://%s' % (latest, opt.bucket)) # upload to bucket
os.system('gsutil cp %s gs://%s' % (last, opt.bucket)) # upload to bucket
# Save best checkpoint
if best_fitness == fitness:

View File

@ -181,9 +181,13 @@ class LoadImagesAndLabels(Dataset): # for training/testing
s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')]
np.savetxt(sp, s, fmt='%g')
try:
with open(sp, 'r') as f: # read existing shapefile
s = np.array([x.split() for x in f.read().splitlines()], dtype=np.float64)
assert len(s) == n, 'Shapefile error. Please delete %s and rerun' % sp # TODO: auto-delete shapefile
assert len(s) == n, 'Shapefile out of sync'
except:
os.remove(sp)
print('Shapefile deleted: %s. Please rerun again.' % sp)
# Sort by aspect ratio
ar = s[:, 1] / s[:, 0] # aspect ratio

View File

@ -35,12 +35,12 @@ git pull https://github.com/ultralytics/yolov3 test # branch
# Test Darknet training
python3 test.py --weights ../darknet/backup/yolov3.backup
# Copy latest.pt TO bucket
gsutil cp yolov3/weights/latest1gpu.pt gs://ultralytics
# Copy last.pt TO bucket
gsutil cp yolov3/weights/last1gpu.pt gs://ultralytics
# Copy latest.pt FROM bucket
gsutil cp gs://ultralytics/latest.pt yolov3/weights/latest.pt
wget https://storage.googleapis.com/ultralytics/yolov3/latest_v1_0.pt -O weights/latest_v1_0.pt
# Copy last.pt FROM bucket
gsutil cp gs://ultralytics/last.pt yolov3/weights/last.pt
wget https://storage.googleapis.com/ultralytics/yolov3/last_v1_0.pt -O weights/last_v1_0.pt
wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt
# Reproduce tutorials
@ -94,7 +94,7 @@ python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/ba
# Debug/Development
python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou
python3 test.py --weights weights/latest.pt --cfg cfg/yolov3-spp.cfg --img-size 320
python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320
gsutil cp evolve.txt gs://ultralytics
sudo shutdown