diff --git a/our_scripts/config_bayes.yml b/our_scripts/config_bayes.yml index 8d14a578..05ae609b 100644 --- a/our_scripts/config_bayes.yml +++ b/our_scripts/config_bayes.yml @@ -1,12 +1,11 @@ bayes: - iterations: 2 + iterations: 10 train: epochs: type: discrete - values: [10] + values: [30] batch-size: type: discrete - #values: [128] min: 1 max: 5 step: 1 @@ -18,12 +17,12 @@ train: img-size-start: type: discrete min: 512 - max: 576 + max: 1088 step: 64 img-size-end: type: discrete min: 512 - max: 576 + max: 1088 step: 64 rect: type: discrete @@ -122,16 +121,16 @@ detect: test-img-size: type: discrete min: 512 - max: 576 + max: 1088 step: 64 conf-thres: type: continuous - min: 0.0 - max: 1.0 + min: 0.3 + max: 0.6 iou-thres: type: continuous - min: 0.0 - max: 1.0 + min: 0.3 + max: 0.6 classes: agnostic-nms: augment: diff --git a/our_scripts/out_logs_new.txt b/our_scripts/out_logs_new.txt new file mode 100644 index 00000000..a581d799 --- /dev/null +++ b/our_scripts/out_logs_new.txt @@ -0,0 +1,651066 @@ +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 1024 704 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 8.72497790514975, 'cls': 49.37844321472914, 'cls_pw': 3.6578895439524595, 'obj': 63.42480951252861, 'obj_pw': 9.578792456250868, 'iou_t': 0.5669387805548604, 'lr0': 0.06478404735808212, 'lrf': 0.018463276617118017, 'momentum': 0.24776714263681232, 'weight_decay': 0.4899225685313834, 'fl_gamma': 9.52623979196223, 'hsv_h': 0.7020795421755219, 'hsv_s': 0.7504070976604654, 'hsv_v': 0.8091596229110347, 'degrees': 23.621344196662363, 'translate': 0.021648372813146022, 'scale': 0.5519108532117785, 'shear': 0.9877987507753335}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 + + train(hyp) # train normally + + File "train.py", line 275, in train + + img_size = random.randrange(grid_min, grid_max + 1) * gs + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/random.py", line 200, in randrange + + raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width)) + +ValueError: empty range for randrange() (32,23, -9) + +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 144, in yolov3 + call_training_script(bayes_hyps) + File "run_yolov3_process_bayes.py", line 46, in call_training_script + call_subprocess(cmd) + File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess + raise RuntimeError("An error occured during calling subprocess") +RuntimeError: An error occured during calling subprocess + +Returning 1 from current bayessian iteration +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 1 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 1024 896 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 6.010451423165159, 'cls': 88.55288021287316, 'cls_pw': 2.210308239731712, 'obj': 95.47184660879452, 'obj_pw': 7.6436506785692675, 'iou_t': 0.6101966321643838, 'lr0': 0.007448522026722432, 'lrf': 0.019946569357479824, 'momentum': 0.886207951831397, 'weight_decay': 0.7187447196646981, 'fl_gamma': 0.146082232009781, 'hsv_h': 0.331832818918261, 'hsv_s': 0.3171862014440885, 'hsv_v': 0.10800886594971348, 'degrees': 15.499103662707274, 'translate': 0.23669469812333876, 'scale': 0.9388032247289597, 'shear': 0.8438005813149901}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 + + train(hyp) # train normally + + File "train.py", line 275, in train + + img_size = random.randrange(grid_min, grid_max + 1) * gs + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/random.py", line 200, in randrange + + raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width)) + +ValueError: empty range for randrange() (32,29, -3) + +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 144, in yolov3 + call_training_script(bayes_hyps) + File "run_yolov3_process_bayes.py", line 46, in call_training_script + call_subprocess(cmd) + File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess + raise RuntimeError("An error occured during calling subprocess") +RuntimeError: An error occured during calling subprocess + +Returning 1 from current bayessian iteration +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 704 896 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 1.7038973686987091, 'cls': 79.80453825073883, 'cls_pw': 7.641403990897682, 'obj': 98.0896723462599, 'obj_pw': 3.195427785893025, 'iou_t': 0.7615362710464499, 'lr0': 0.006154266042305081, 'lrf': 0.06391302389198193, 'momentum': 0.03989930189437496, 'weight_decay': 0.6970892851738646, 'fl_gamma': 5.000595598041961, 'hsv_h': 0.03033946882347094, 'hsv_s': 0.24492934923224274, 'hsv_v': 0.46951538847468355, 'degrees': 3.438444080389489, 'translate': 0.6732897446397307, 'scale': 0.2543620463416282, 'shear': 0.6069596451995198}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 + + train(hyp) # train normally + + File "train.py", line 336, in train + + multi_label=ni > n_burn) + + File "/home/tomekb/yolov3/test.py", line 99, in test + + output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) + + File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression + + i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms + + return torch.ops.torchvision.nms(boxes, scores, iou_threshold) + +RuntimeError: CUDA out of memory. Tried to allocate 5.17 GiB (GPU 0; 10.76 GiB total capacity; 702.80 MiB already allocated; 5.11 GiB free; 4.73 GiB reserved in total by PyTorch) + +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 144, in yolov3 + call_training_script(bayes_hyps) + File "run_yolov3_process_bayes.py", line 46, in call_training_script + call_subprocess(cmd) + File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess + raise RuntimeError("An error occured during calling subprocess") +RuntimeError: An error occured during calling subprocess + +Returning 1 from current bayessian iteration +num acquisition: 1, time elapsed: 895.96s +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 1024 768 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 7.146189460666151, 'cls': 94.09412817288651, 'cls_pw': 8.78976713129892, 'obj': 26.191629845057463, 'obj_pw': 4.481612001146913, 'iou_t': 0.09901495330879251, 'lr0': 0.09714478226489481, 'lrf': 0.02319169037720873, 'momentum': 0.5978813251597963, 'weight_decay': 0.6603599231844964, 'fl_gamma': 4.943248509206138, 'hsv_h': 0.9630045019883604, 'hsv_s': 0.2617348232472283, 'hsv_v': 0.07201256895762376, 'degrees': 5.438995944523822, 'translate': 0.2806287538051211, 'scale': 0.4841087901678365, 'shear': 0.37700241840983606}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 + + train(hyp) # train normally + + File "train.py", line 275, in train + + img_size = random.randrange(grid_min, grid_max + 1) * gs + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/random.py", line 200, in randrange + + raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width)) + +ValueError: empty range for randrange() (32,25, -7) + +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 144, in yolov3 + call_training_script(bayes_hyps) + File "run_yolov3_process_bayes.py", line 46, in call_training_script + call_subprocess(cmd) + File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess + raise RuntimeError("An error occured during calling subprocess") +RuntimeError: An error occured during calling subprocess + +Returning 1 from current bayessian iteration +num acquisition: 2, time elapsed: 900.92s +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 640 704 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 6.313256877452881, 'cls': 34.999218580331444, 'cls_pw': 5.438396324649241, 'obj': 89.4862224157707, 'obj_pw': 6.724640386020911, 'iou_t': 0.71919193845806, 'lr0': 0.034745444052488327, 'lrf': 0.08571450911867988, 'momentum': 0.026499561286118967, 'weight_decay': 0.3677084109953611, 'fl_gamma': 1.7202371853925624, 'hsv_h': 0.030018797801517505, 'hsv_s': 0.10262883913858056, 'hsv_v': 0.6249211507435959, 'degrees': 9.00362513941426, 'translate': 0.9943085918713593, 'scale': 0.9754826652751983, 'shear': 0.9662288908007186}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 ./experiments/2020-07-27_20-05-32/confussion-matrix.tsv +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 152, in yolov3 + y_val = 1 - ((y_dict['match'] * 10 - y_dict['false positives'] * 3) / y_dict['mistakes']) +ZeroDivisionError: division by zero + +Returning 1 from current bayessian iteration +num acquisition: 3, time elapsed: 9178.96s +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 2 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 832 960 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 4.623461586044252, 'cls': 61.383559105749896, 'cls_pw': 2.6476226228621047, 'obj': 75.34258597434437, 'obj_pw': 3.0962823299661677, 'iou_t': 0.5381671587676429, 'lr0': 0.08110329920097356, 'lrf': 0.06054484048151723, 'momentum': 0.6365909459313417, 'weight_decay': 0.7623395511207529, 'fl_gamma': 8.526778403592587, 'hsv_h': 0.6947812504859171, 'hsv_s': 0.28654207778660146, 'hsv_v': 0.15309066407904748, 'degrees': 29.215210977715575, 'translate': 0.8357464975850669, 'scale': 0.5978816730945402, 'shear': 0.2317535796063369}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 + + train(hyp) # train normally + + File "train.py", line 336, in train + + multi_label=ni > n_burn) + + File "/home/tomekb/yolov3/test.py", line 99, in test + + output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) + + File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression + + i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms + + return torch.ops.torchvision.nms(boxes, scores, iou_threshold) + +RuntimeError: CUDA out of memory. Tried to allocate 6.04 GiB (GPU 0; 10.76 GiB total capacity; 677.92 MiB already allocated; 5.17 GiB free; 4.67 GiB reserved in total by PyTorch) + +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 144, in yolov3 + call_training_script(bayes_hyps) + File "run_yolov3_process_bayes.py", line 46, in call_training_script + call_subprocess(cmd) + File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess + raise RuntimeError("An error occured during calling subprocess") +RuntimeError: An error occured during calling subprocess + +Returning 1 from current bayessian iteration +num acquisition: 4, time elapsed: 10609.60s +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 640 832 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 9.549235370223307, 'cls': 56.2268071833936, 'cls_pw': 5.90919060262285, 'obj': 25.976376558768102, 'obj_pw': 5.141310140955158, 'iou_t': 0.7155329731241602, 'lr0': 0.046525720369811376, 'lrf': 0.06913493953291242, 'momentum': 0.6465608682422157, 'weight_decay': 0.5006409265110087, 'fl_gamma': 7.405891238589323, 'hsv_h': 0.961351275406733, 'hsv_s': 0.8038414263264594, 'hsv_v': 0.45859659526460983, 'degrees': 15.293788395057776, 'translate': 0.14029533080714618, 'scale': 0.34277550069222773, 'shear': 0.012166569537150451}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 + + train(hyp) # train normally + + File "train.py", line 336, in train + + multi_label=ni > n_burn) + + File "/home/tomekb/yolov3/test.py", line 99, in test + + output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) + + File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression + + i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms + + return torch.ops.torchvision.nms(boxes, scores, iou_threshold) + +RuntimeError: CUDA out of memory. Tried to allocate 5.37 GiB (GPU 0; 10.76 GiB total capacity; 670.48 MiB already allocated; 5.11 GiB free; 4.73 GiB reserved in total by PyTorch) + +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 144, in yolov3 + call_training_script(bayes_hyps) + File "run_yolov3_process_bayes.py", line 46, in call_training_script + call_subprocess(cmd) + File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess + raise RuntimeError("An error occured during calling subprocess") +RuntimeError: An error occured during calling subprocess + +Returning 1 from current bayessian iteration +num acquisition: 5, time elapsed: 11401.26s +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 4 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 960 1024 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 4.246756725292756, 'cls': 11.198057186315507, 'cls_pw': 3.098304619053107, 'obj': 62.61296660237411, 'obj_pw': 8.198351971243412, 'iou_t': 0.9968222134402118, 'lr0': 0.08781751025223361, 'lrf': 0.0721137300843447, 'momentum': 0.4849862271081299, 'weight_decay': 0.4682616773016629, 'fl_gamma': 1.0393521599184086, 'hsv_h': 0.10854949672981251, 'hsv_s': 0.6303870219739742, 'hsv_v': 0.5257176961368889, 'degrees': 22.418402657971637, 'translate': 0.31082264570582063, 'scale': 0.29282786854129794, 'shear': 0.01941008975448877}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 ./experiments/2020-07-27_21-53-25/confussion-matrix.tsv +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 152, in yolov3 + y_val = 1 - ((y_dict['match'] * 10 - y_dict['false positives'] * 3) / y_dict['mistakes']) +ZeroDivisionError: division by zero + +Returning 1 from current bayessian iteration +num acquisition: 6, time elapsed: 15663.69s +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 768 1024 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 2.7624106325181996, 'cls': 61.50183843060771, 'cls_pw': 8.469222909088703, 'obj': 56.22721762230748, 'obj_pw': 5.8281454817980505, 'iou_t': 0.34606360862484564, 'lr0': 0.06079478524038113, 'lrf': 0.06826768010503577, 'momentum': 0.9208798498843362, 'weight_decay': 0.5892609404206524, 'fl_gamma': 3.6350123658220346, 'hsv_h': 0.7056722377887225, 'hsv_s': 0.12329600034224986, 'hsv_v': 0.9382549731758573, 'degrees': 27.59828302534629, 'translate': 0.5629202322998729, 'scale': 0.5874782553728248, 'shear': 0.4745581471925713}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 + + train(hyp) # train normally + + File "train.py", line 336, in train + + multi_label=ni > n_burn) + + File "/home/tomekb/yolov3/test.py", line 99, in test + + output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) + + File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression + + i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms + + return torch.ops.torchvision.nms(boxes, scores, iou_threshold) + +RuntimeError: CUDA out of memory. Tried to allocate 5.07 GiB (GPU 0; 10.76 GiB total capacity; 722.74 MiB already allocated; 5.01 GiB free; 4.83 GiB reserved in total by PyTorch) + +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 144, in yolov3 + call_training_script(bayes_hyps) + File "run_yolov3_process_bayes.py", line 46, in call_training_script + call_subprocess(cmd) + File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess + raise RuntimeError("An error occured during calling subprocess") +RuntimeError: An error occured during calling subprocess + +Returning 1 from current bayessian iteration +num acquisition: 7, time elapsed: 19288.29s +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 1 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 960 1024 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 3.601286789106768, 'cls': 40.41479520993333, 'cls_pw': 8.072263351594946, 'obj': 93.43525212209255, 'obj_pw': 4.204996875240019, 'iou_t': 0.818581613633346, 'lr0': 0.05698088288204337, 'lrf': 0.05554575943768989, 'momentum': 0.034077353761861495, 'weight_decay': 0.32287984681247117, 'fl_gamma': 2.709306802989163, 'hsv_h': 0.8866403080515353, 'hsv_s': 0.6657477432898976, 'hsv_v': 0.6638833778874473, 'degrees': 19.993680608611307, 'translate': 0.6467890255815546, 'scale': 0.9327764385409818, 'shear': 0.4710194872536998}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 + + train(hyp) # train normally + + File "train.py", line 336, in train + + multi_label=ni > n_burn) + + File "/home/tomekb/yolov3/test.py", line 99, in test + + output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) + + File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression + + i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms + + return torch.ops.torchvision.nms(boxes, scores, iou_threshold) + +RuntimeError: CUDA out of memory. Tried to allocate 6.42 GiB (GPU 0; 10.76 GiB total capacity; 652.40 MiB already allocated; 5.01 GiB free; 4.83 GiB reserved in total by PyTorch) + +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 144, in yolov3 + call_training_script(bayes_hyps) + File "run_yolov3_process_bayes.py", line 46, in call_training_script + call_subprocess(cmd) + File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess + raise RuntimeError("An error occured during calling subprocess") +RuntimeError: An error occured during calling subprocess + +Returning 1 from current bayessian iteration +num acquisition: 8, time elapsed: 31753.94s +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 1 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 576 960 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 6.048138505765027, 'cls': 76.50129773787151, 'cls_pw': 4.123507076365048, 'obj': 48.12720999798328, 'obj_pw': 0.8552486525937786, 'iou_t': 0.15098540146956585, 'lr0': 0.023869581379601934, 'lrf': 0.03686949827098312, 'momentum': 0.8343698795150166, 'weight_decay': 0.1880353498217422, 'fl_gamma': 8.949542488478707, 'hsv_h': 0.2870057333868685, 'hsv_s': 0.19637192220212063, 'hsv_v': 0.2169847895418775, 'degrees': 10.262968760603645, 'translate': 0.03690325584184029, 'scale': 0.4667914088315841, 'shear': 0.4144343366176697}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 + + train(hyp) # train normally + + File "train.py", line 336, in train + + multi_label=ni > n_burn) + + File "/home/tomekb/yolov3/test.py", line 99, in test + + output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) + + File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression + + i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms + + return torch.ops.torchvision.nms(boxes, scores, iou_threshold) + +RuntimeError: Trying to create tensor with negative dimension -961626254: [-961626254] + +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 144, in yolov3 + call_training_script(bayes_hyps) + File "run_yolov3_process_bayes.py", line 46, in call_training_script + call_subprocess(cmd) + File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess + raise RuntimeError("An error occured during calling subprocess") +RuntimeError: An error occured during calling subprocess + +Returning 1 from current bayessian iteration +num acquisition: 9, time elapsed: 34651.69s +_______ CALLING TRAINING SCRIPT _______ +python -u train.py --epochs 30 --batch-size 1 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 640 1024 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 9.78331902147645, 'cls': 67.92686466296935, 'cls_pw': 7.563445361920521, 'obj': 70.04003004697609, 'obj_pw': 9.24027457767048, 'iou_t': 0.4717054234555471, 'lr0': 0.023266497240161114, 'lrf': 0.01976363376216637, 'momentum': 0.3952390441220761, 'weight_decay': 0.8272285942424523, 'fl_gamma': 0.25479835194478273, 'hsv_h': 0.08047814066126147, 'hsv_s': 0.38728346932389535, 'hsv_v': 0.4606630937623931, 'degrees': 4.579264609112537, 'translate': 0.15988655515121253, 'scale': 0.2503882837081164, 'shear': 0.5698078076854681}" +Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) + + + +Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ + +Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients + +Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other + + + + 0%| | 0/4788 [00:00 + + train(hyp) # train normally + + File "train.py", line 294, in train + + scaled_loss.backward() + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/contextlib.py", line 119, in __exit__ + + next(self.gen) + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/apex/amp/handle.py", line 123, in scale_loss + + optimizer._post_amp_backward(loss_scaler) + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/apex/amp/_process_optimizer.py", line 249, in post_backward_no_master_weights + + post_backward_models_are_masters(scaler, params, stashed_grads) + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/apex/amp/_process_optimizer.py", line 135, in post_backward_models_are_masters + + scale_override=(grads_have_scale, stashed_have_scale, out_scale)) + + File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/apex/amp/scaler.py", line 183, in unscale_with_stashed + + out_scale/grads_have_scale, + +ZeroDivisionError: float division by zero + +An error occured during running training-detect-confussion process + Traceback (most recent call last): + File "run_yolov3_process_bayes.py", line 144, in yolov3 + call_training_script(bayes_hyps) + File "run_yolov3_process_bayes.py", line 46, in call_training_script + call_subprocess(cmd) + File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess + raise RuntimeError("An error occured during calling subprocess") +RuntimeError: An error occured during calling subprocess + +Returning 1 from current bayessian iteration +num acquisition: 10, time elapsed: 34833.43s diff --git a/our_scripts/run_yolov3_process_bayes.py b/our_scripts/run_yolov3_process_bayes.py index 7e424a6b..7d598ceb 100644 --- a/our_scripts/run_yolov3_process_bayes.py +++ b/our_scripts/run_yolov3_process_bayes.py @@ -8,7 +8,8 @@ import traceback import GPyOpt from config_bayes import Configuration -from utils import call_subprocess, get_values_from_conff_matrix +from utils import call_subprocess, get_values_from_conff_matrix, load_previous_bayes_experiments + dir_path = os.path.dirname(os.path.realpath(__file__)) PROJECT_ROOT = os.path.join(dir_path, '..') @@ -19,7 +20,6 @@ config = Configuration(bayes_config_yaml) date_string = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') bayes_params_file = open(os.path.join(PROJECT_ROOT, config.experiments.dir, f"{date_string}_bayes_params.txt"), 'a+') - def call_training_script(gaussian_hyps): cmd = 'python -u train.py' cmd += ' --epochs ' + gaussian_hyps['epochs'].__str__() @@ -149,7 +149,7 @@ def yolov3(x): y_dict = get_values_from_conff_matrix(conf_matrix_path) # tutaj wzór na wyliczanie funkcji - y_val = 1 - ((y_dict['match'] * 10 - y_dict['false positives'] * 3) / y_dict['mistakes']) + y_val = 1 - (y_dict['match'] * 10 - y_dict['false positives'] * 3 - y_dict['mistakes']) / y_dict['all labels'] # zapisywanie do pliku zadeklarowanego globalnie line = "\t".join([bayes_hyps.__str__(), str(y_val)]) @@ -168,14 +168,9 @@ def yolov3(x): if __name__ == '__main__': bounds = config.get_bayes_bounds() - # for b in bounds: - # print(b) - - # tutaj będzie wczytywanie z poprzednich eksperymentów plik bayes_params - X = None - Y = None - os.chdir(PROJECT_ROOT) # change to project root directory + # wczytywanie z poprzednich eksperymentów plik bayes_params + X, Y = load_previous_bayes_experiments(config.experiments.dir) bayes_optimizer = GPyOpt.methods.BayesianOptimization(f=yolov3, domain=bounds, X=X, Y=Y, verbosity=True, initial_design_numdata=2) diff --git a/our_scripts/utils.py b/our_scripts/utils.py index f9f9a9af..2732f806 100644 --- a/our_scripts/utils.py +++ b/our_scripts/utils.py @@ -1,5 +1,9 @@ +import ast import io +import os import subprocess +import numpy as np +from glob import glob def call_subprocess(cmd): @@ -53,3 +57,37 @@ def get_bayes_params_as_dict(x): 'conf-thres': float(x[:, 26]), 'iou-thres': float(x[:, 27]) } + + +def load_previous_bayes_experiments(experiments_dir): + paths = list(glob(os.path.join(experiments_dir, '*bayes_params.txt'))) + y_values = [] + x_values = [] + + for p in paths: + file = open(p, 'r') + lines = file.readlines() + for line in lines: + try: + dict_str, y_val = line.split('\t') + bayes_dict = ast.literal_eval(dict_str) + bayes_values = dict_to_numpy(bayes_dict) + x_values.append(bayes_values) + y_values.append(float(y_val)) + except: + raise Exception(f"Cannot parse line {line} from file {p}") + return np.array(x_values), np.array(y_values).reshape((len(y_values), 1)) + + +def dict_to_numpy(d): + x = [] + for key, value in d.items(): + if type(value) == bool: + x.append(int(value)) + elif key == 'img-size': + start_img_size, end_img_size = value.split(" ") + x.append(int(start_img_size)) + x.append(int(end_img_size)) + else: + x.append(float(value)) + return x