From a1151c04a735e90605dbbacc4b30113c646cef66 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 17 Nov 2019 18:48:50 -0800 Subject: [PATCH] updates --- train.py | 21 ++++++++-------- utils/gcp.sh | 71 +++++----------------------------------------------- 2 files changed, 16 insertions(+), 76 deletions(-) diff --git a/train.py b/train.py index c96969fb..832ce0c2 100644 --- a/train.py +++ b/train.py @@ -104,8 +104,6 @@ def train(): attempt_download(weights) if weights.endswith('.pt'): # pytorch format # possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc. - if opt.bucket: - os.system('gsutil cp gs://%s/last.pt %s' % (opt.bucket, last)) # download from bucket chkpt = torch.load(weights, map_location=device) # load model @@ -347,8 +345,6 @@ def train(): # Save last checkpoint torch.save(chkpt, last) - if opt.bucket and not opt.prebias: - os.system('gsutil cp %s gs://%s' % (last, opt.bucket)) # upload to bucket # Save best checkpoint if best_fitness == fitness: @@ -365,18 +361,21 @@ def train(): # end training if len(opt.name) and not opt.prebias: - os.rename('results.txt', 'results_%s.txt' % opt.name) - os.rename(wdir + 'last.pt', wdir + 'last_%s.pt' % opt.name) if os.path.exists(wdir + 'last.pt') else None - os.rename(wdir + 'best.pt', wdir + 'best_%s.pt' % opt.name) if os.path.exists(wdir + 'best.pt') else None + fresults, flast, fbest = 'results%s.txt' % opt.name, 'last%s.pt' % opt.name, 'best%s.pt' % opt.name + os.rename('results.txt', fresults) + os.rename(wdir + 'last.pt', wdir + flast) if os.path.exists(wdir + 'last.pt') else None + os.rename(wdir + 'best.pt', wdir + fbest) if os.path.exists(wdir + 'best.pt') else None + + # save to cloud + if opt.bucket: + os.system('gsutil cp %s gs://%s' % (fresults, opt.bucket)) + os.system('gsutil cp %s gs://%s' % (wdir + flast, opt.bucket)) + plot_results() # save as results.png print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() - # save to cloud - # os.system('gsutil cp results.txt gs://...') - # os.system('gsutil cp weights/best.pt gs://...') - return results diff --git a/utils/gcp.sh b/utils/gcp.sh index 2fef2b35..028ab47b 100755 --- a/utils/gcp.sh +++ b/utils/gcp.sh @@ -87,32 +87,6 @@ rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco -./darknet detector train data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp -gsutil cp -r backup/*5000.weights gs://sm6/weights -sudo shutdown - - -./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny -./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights # resume -python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave # train ultralytics -python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg # test -gsutil cp -r backup/*.weights gs://sm6/weights # weights to bucket - -python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2 # test -python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2 # test -python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2 # test -python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2 # test -python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2 # test - -python3 train.py --img-size 320 --epochs 27 --batch-size 64 --accumulate 1 --nosave --notest && python3 test.py --weights weights/last.pt --img-size 320 --save-json && sudo shutdown - -# Debug/Development -python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou -python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320 - -gsutil cp evolve.txt gs://ultralytics -sudo shutdown - #Docker sudo docker kill $(sudo docker ps -q) sudo docker pull ultralytics/yolov3:v0 @@ -124,49 +98,16 @@ do python3 train.py --weights '' --prebias --img-size 512 --batch-size 32 --accumulate 2 --evolve --epochs 27 --bucket yolov4/512_coco_27e --device 0 done -python3 train.py --weights '' --prebias --img-size 512 --batch-size 16 --accumulate 4 --epochs 27 --device 0 + +export tag=ultralytics/yolov3:v1 && sudo docker pull $tag && sudo nvidia-docker run -it --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco $tag python3 train.py --weights '' --epochs 27 --batch-size 32 --accumulate 2 --prebias --bucket yolov4 --name 63 --device 0 +export tag=ultralytics/yolov3:v2 && sudo docker pull $tag && sudo nvidia-docker run -it --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco $tag python3 train.py --weights '' --epochs 27 --batch-size 32 --accumulate 2 --prebias --bucket yolov4 --name 64 --device 1 + + + while true; do python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e; done -rm -rf yolov3 # Warning: remove existing -git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master -python3 train.py --img-size 320 --data ../data/sm3/out.data --weights weights/yolov3-spp.weights --cfg cfg/yolov3-spp.cfg --prebias --epochs 300 --batch-size 32 --accumulate 2 --multi --name sm3b_yolov3_spp -python3 train.py --img-size 320 --data ../data/sm3/out.data --weights weights/yolov3-tiny.weights --cfg cfg/yolov3-tiny.cfg --prebias --epochs 300 --batch-size 32 --accumulate 2 --multi --name sm3b_yolov3_tiny -sudo shutdown - - -rm -rf yolov3 # Warning: remove existing -git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master -python3 train.py --data data/coco_64img.data --batch-size 16 --accumulate 1 --nosave --weights weights/yolov3-spp.weights --transfer --name yolov3-spp_transfer -python3 train.py --data data/coco_64img.data --batch-size 16 --accumulate 1 --nosave --name from_scratch -python3 train.py --data data/coco_64img.data --batch-size 16 --accumulate 1 --nosave --weights weights/darknet53.conv.74 --name darknet53_backbone -python3 train.py --data data/coco_64img.data --batch-size 16 --accumulate 1 --nosave --weights weights/yolov3-spp.weights --name yolov3-spp_backbone -sudo shutdown - - -rm -rf yolov3 # Warning: remove existing -git clone https://github.com/ultralytics/yolov3 && cd yolov3 # clone -# bash yolov3/data/get_coco_dataset_gdrive.sh # copy COCO2014 dataset (20GB) -python3 train.py --data data/coco_1cls.data --batch-size 5 --accumulate 1 --weights weights/darknet53.conv.74 --nosave --cfg cfg/yolov3-spp.cfg --name 1cls -python3 train.py --data data/coco_1cls.data --batch-size 5 --accumulate 1 --weights weights/darknet53.conv.74 --nosave --cfg cfg/yolov3-spp-1cls.cfg --name 1cls_1clscfg -python3 -c "from utils import utils; utils.plot_results()" # plot as 'results.png' - - -clear -python3 test.py --img-size 320 --save-json --weights weights/last.pt -python3 test.py --img-size 416 --save-json --weights weights/last.pt -python3 test.py --img-size 608 --save-json --weights weights/last.pt -python3 test.py --img-size 640 --save-json --weights weights/last.pt --batch-size 8 -python3 test.py --img-size 800 --save-json --weights weights/last.pt --batch-size 8 -sudo shutdown - - -clear -rm -rf yolov3 # Warning: remove existing -git clone https://github.com/ultralytics/yolov3 && cd yolov3 # clone -python3 train.py --weights '' --img-size 512 --batch-size 32 --accumulate 2 --epochs 27 --prebias --nosave --notest --name 512default -sudo shutdown