diff --git a/our_scripts/out_logs_new.txt b/our_scripts/out_logs_new.txt deleted file mode 100644 index a581d799..00000000 --- a/our_scripts/out_logs_new.txt +++ /dev/null @@ -1,651066 +0,0 @@ -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 1024 704 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 8.72497790514975, 'cls': 49.37844321472914, 'cls_pw': 3.6578895439524595, 'obj': 63.42480951252861, 'obj_pw': 9.578792456250868, 'iou_t': 0.5669387805548604, 'lr0': 0.06478404735808212, 'lrf': 0.018463276617118017, 'momentum': 0.24776714263681232, 'weight_decay': 0.4899225685313834, 'fl_gamma': 9.52623979196223, 'hsv_h': 0.7020795421755219, 'hsv_s': 0.7504070976604654, 'hsv_v': 0.8091596229110347, 'degrees': 23.621344196662363, 'translate': 0.021648372813146022, 'scale': 0.5519108532117785, 'shear': 0.9877987507753335}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 - - train(hyp) # train normally - - File "train.py", line 275, in train - - img_size = random.randrange(grid_min, grid_max + 1) * gs - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/random.py", line 200, in randrange - - raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width)) - -ValueError: empty range for randrange() (32,23, -9) - -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 144, in yolov3 - call_training_script(bayes_hyps) - File "run_yolov3_process_bayes.py", line 46, in call_training_script - call_subprocess(cmd) - File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess - raise RuntimeError("An error occured during calling subprocess") -RuntimeError: An error occured during calling subprocess - -Returning 1 from current bayessian iteration -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 1 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 1024 896 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 6.010451423165159, 'cls': 88.55288021287316, 'cls_pw': 2.210308239731712, 'obj': 95.47184660879452, 'obj_pw': 7.6436506785692675, 'iou_t': 0.6101966321643838, 'lr0': 0.007448522026722432, 'lrf': 0.019946569357479824, 'momentum': 0.886207951831397, 'weight_decay': 0.7187447196646981, 'fl_gamma': 0.146082232009781, 'hsv_h': 0.331832818918261, 'hsv_s': 0.3171862014440885, 'hsv_v': 0.10800886594971348, 'degrees': 15.499103662707274, 'translate': 0.23669469812333876, 'scale': 0.9388032247289597, 'shear': 0.8438005813149901}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 - - train(hyp) # train normally - - File "train.py", line 275, in train - - img_size = random.randrange(grid_min, grid_max + 1) * gs - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/random.py", line 200, in randrange - - raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width)) - -ValueError: empty range for randrange() (32,29, -3) - -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 144, in yolov3 - call_training_script(bayes_hyps) - File "run_yolov3_process_bayes.py", line 46, in call_training_script - call_subprocess(cmd) - File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess - raise RuntimeError("An error occured during calling subprocess") -RuntimeError: An error occured during calling subprocess - -Returning 1 from current bayessian iteration -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 704 896 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 1.7038973686987091, 'cls': 79.80453825073883, 'cls_pw': 7.641403990897682, 'obj': 98.0896723462599, 'obj_pw': 3.195427785893025, 'iou_t': 0.7615362710464499, 'lr0': 0.006154266042305081, 'lrf': 0.06391302389198193, 'momentum': 0.03989930189437496, 'weight_decay': 0.6970892851738646, 'fl_gamma': 5.000595598041961, 'hsv_h': 0.03033946882347094, 'hsv_s': 0.24492934923224274, 'hsv_v': 0.46951538847468355, 'degrees': 3.438444080389489, 'translate': 0.6732897446397307, 'scale': 0.2543620463416282, 'shear': 0.6069596451995198}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 - - train(hyp) # train normally - - File "train.py", line 336, in train - - multi_label=ni > n_burn) - - File "/home/tomekb/yolov3/test.py", line 99, in test - - output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) - - File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression - - i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms - - return torch.ops.torchvision.nms(boxes, scores, iou_threshold) - -RuntimeError: CUDA out of memory. Tried to allocate 5.17 GiB (GPU 0; 10.76 GiB total capacity; 702.80 MiB already allocated; 5.11 GiB free; 4.73 GiB reserved in total by PyTorch) - -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 144, in yolov3 - call_training_script(bayes_hyps) - File "run_yolov3_process_bayes.py", line 46, in call_training_script - call_subprocess(cmd) - File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess - raise RuntimeError("An error occured during calling subprocess") -RuntimeError: An error occured during calling subprocess - -Returning 1 from current bayessian iteration -num acquisition: 1, time elapsed: 895.96s -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 1024 768 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 7.146189460666151, 'cls': 94.09412817288651, 'cls_pw': 8.78976713129892, 'obj': 26.191629845057463, 'obj_pw': 4.481612001146913, 'iou_t': 0.09901495330879251, 'lr0': 0.09714478226489481, 'lrf': 0.02319169037720873, 'momentum': 0.5978813251597963, 'weight_decay': 0.6603599231844964, 'fl_gamma': 4.943248509206138, 'hsv_h': 0.9630045019883604, 'hsv_s': 0.2617348232472283, 'hsv_v': 0.07201256895762376, 'degrees': 5.438995944523822, 'translate': 0.2806287538051211, 'scale': 0.4841087901678365, 'shear': 0.37700241840983606}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 - - train(hyp) # train normally - - File "train.py", line 275, in train - - img_size = random.randrange(grid_min, grid_max + 1) * gs - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/random.py", line 200, in randrange - - raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width)) - -ValueError: empty range for randrange() (32,25, -7) - -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 144, in yolov3 - call_training_script(bayes_hyps) - File "run_yolov3_process_bayes.py", line 46, in call_training_script - call_subprocess(cmd) - File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess - raise RuntimeError("An error occured during calling subprocess") -RuntimeError: An error occured during calling subprocess - -Returning 1 from current bayessian iteration -num acquisition: 2, time elapsed: 900.92s -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 640 704 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 6.313256877452881, 'cls': 34.999218580331444, 'cls_pw': 5.438396324649241, 'obj': 89.4862224157707, 'obj_pw': 6.724640386020911, 'iou_t': 0.71919193845806, 'lr0': 0.034745444052488327, 'lrf': 0.08571450911867988, 'momentum': 0.026499561286118967, 'weight_decay': 0.3677084109953611, 'fl_gamma': 1.7202371853925624, 'hsv_h': 0.030018797801517505, 'hsv_s': 0.10262883913858056, 'hsv_v': 0.6249211507435959, 'degrees': 9.00362513941426, 'translate': 0.9943085918713593, 'scale': 0.9754826652751983, 'shear': 0.9662288908007186}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 ./experiments/2020-07-27_20-05-32/confussion-matrix.tsv -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 152, in yolov3 - y_val = 1 - ((y_dict['match'] * 10 - y_dict['false positives'] * 3) / y_dict['mistakes']) -ZeroDivisionError: division by zero - -Returning 1 from current bayessian iteration -num acquisition: 3, time elapsed: 9178.96s -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 2 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 832 960 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 4.623461586044252, 'cls': 61.383559105749896, 'cls_pw': 2.6476226228621047, 'obj': 75.34258597434437, 'obj_pw': 3.0962823299661677, 'iou_t': 0.5381671587676429, 'lr0': 0.08110329920097356, 'lrf': 0.06054484048151723, 'momentum': 0.6365909459313417, 'weight_decay': 0.7623395511207529, 'fl_gamma': 8.526778403592587, 'hsv_h': 0.6947812504859171, 'hsv_s': 0.28654207778660146, 'hsv_v': 0.15309066407904748, 'degrees': 29.215210977715575, 'translate': 0.8357464975850669, 'scale': 0.5978816730945402, 'shear': 0.2317535796063369}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 - - train(hyp) # train normally - - File "train.py", line 336, in train - - multi_label=ni > n_burn) - - File "/home/tomekb/yolov3/test.py", line 99, in test - - output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) - - File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression - - i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms - - return torch.ops.torchvision.nms(boxes, scores, iou_threshold) - -RuntimeError: CUDA out of memory. Tried to allocate 6.04 GiB (GPU 0; 10.76 GiB total capacity; 677.92 MiB already allocated; 5.17 GiB free; 4.67 GiB reserved in total by PyTorch) - -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 144, in yolov3 - call_training_script(bayes_hyps) - File "run_yolov3_process_bayes.py", line 46, in call_training_script - call_subprocess(cmd) - File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess - raise RuntimeError("An error occured during calling subprocess") -RuntimeError: An error occured during calling subprocess - -Returning 1 from current bayessian iteration -num acquisition: 4, time elapsed: 10609.60s -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 640 832 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 9.549235370223307, 'cls': 56.2268071833936, 'cls_pw': 5.90919060262285, 'obj': 25.976376558768102, 'obj_pw': 5.141310140955158, 'iou_t': 0.7155329731241602, 'lr0': 0.046525720369811376, 'lrf': 0.06913493953291242, 'momentum': 0.6465608682422157, 'weight_decay': 0.5006409265110087, 'fl_gamma': 7.405891238589323, 'hsv_h': 0.961351275406733, 'hsv_s': 0.8038414263264594, 'hsv_v': 0.45859659526460983, 'degrees': 15.293788395057776, 'translate': 0.14029533080714618, 'scale': 0.34277550069222773, 'shear': 0.012166569537150451}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 - - train(hyp) # train normally - - File "train.py", line 336, in train - - multi_label=ni > n_burn) - - File "/home/tomekb/yolov3/test.py", line 99, in test - - output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) - - File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression - - i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms - - return torch.ops.torchvision.nms(boxes, scores, iou_threshold) - -RuntimeError: CUDA out of memory. Tried to allocate 5.37 GiB (GPU 0; 10.76 GiB total capacity; 670.48 MiB already allocated; 5.11 GiB free; 4.73 GiB reserved in total by PyTorch) - -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 144, in yolov3 - call_training_script(bayes_hyps) - File "run_yolov3_process_bayes.py", line 46, in call_training_script - call_subprocess(cmd) - File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess - raise RuntimeError("An error occured during calling subprocess") -RuntimeError: An error occured during calling subprocess - -Returning 1 from current bayessian iteration -num acquisition: 5, time elapsed: 11401.26s -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 4 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 960 1024 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 4.246756725292756, 'cls': 11.198057186315507, 'cls_pw': 3.098304619053107, 'obj': 62.61296660237411, 'obj_pw': 8.198351971243412, 'iou_t': 0.9968222134402118, 'lr0': 0.08781751025223361, 'lrf': 0.0721137300843447, 'momentum': 0.4849862271081299, 'weight_decay': 0.4682616773016629, 'fl_gamma': 1.0393521599184086, 'hsv_h': 0.10854949672981251, 'hsv_s': 0.6303870219739742, 'hsv_v': 0.5257176961368889, 'degrees': 22.418402657971637, 'translate': 0.31082264570582063, 'scale': 0.29282786854129794, 'shear': 0.01941008975448877}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 ./experiments/2020-07-27_21-53-25/confussion-matrix.tsv -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 152, in yolov3 - y_val = 1 - ((y_dict['match'] * 10 - y_dict['false positives'] * 3) / y_dict['mistakes']) -ZeroDivisionError: division by zero - -Returning 1 from current bayessian iteration -num acquisition: 6, time elapsed: 15663.69s -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 3 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 768 1024 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 2.7624106325181996, 'cls': 61.50183843060771, 'cls_pw': 8.469222909088703, 'obj': 56.22721762230748, 'obj_pw': 5.8281454817980505, 'iou_t': 0.34606360862484564, 'lr0': 0.06079478524038113, 'lrf': 0.06826768010503577, 'momentum': 0.9208798498843362, 'weight_decay': 0.5892609404206524, 'fl_gamma': 3.6350123658220346, 'hsv_h': 0.7056722377887225, 'hsv_s': 0.12329600034224986, 'hsv_v': 0.9382549731758573, 'degrees': 27.59828302534629, 'translate': 0.5629202322998729, 'scale': 0.5874782553728248, 'shear': 0.4745581471925713}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 - - train(hyp) # train normally - - File "train.py", line 336, in train - - multi_label=ni > n_burn) - - File "/home/tomekb/yolov3/test.py", line 99, in test - - output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) - - File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression - - i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms - - return torch.ops.torchvision.nms(boxes, scores, iou_threshold) - -RuntimeError: CUDA out of memory. Tried to allocate 5.07 GiB (GPU 0; 10.76 GiB total capacity; 722.74 MiB already allocated; 5.01 GiB free; 4.83 GiB reserved in total by PyTorch) - -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 144, in yolov3 - call_training_script(bayes_hyps) - File "run_yolov3_process_bayes.py", line 46, in call_training_script - call_subprocess(cmd) - File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess - raise RuntimeError("An error occured during calling subprocess") -RuntimeError: An error occured during calling subprocess - -Returning 1 from current bayessian iteration -num acquisition: 7, time elapsed: 19288.29s -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 1 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 960 1024 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 3.601286789106768, 'cls': 40.41479520993333, 'cls_pw': 8.072263351594946, 'obj': 93.43525212209255, 'obj_pw': 4.204996875240019, 'iou_t': 0.818581613633346, 'lr0': 0.05698088288204337, 'lrf': 0.05554575943768989, 'momentum': 0.034077353761861495, 'weight_decay': 0.32287984681247117, 'fl_gamma': 2.709306802989163, 'hsv_h': 0.8866403080515353, 'hsv_s': 0.6657477432898976, 'hsv_v': 0.6638833778874473, 'degrees': 19.993680608611307, 'translate': 0.6467890255815546, 'scale': 0.9327764385409818, 'shear': 0.4710194872536998}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 - - train(hyp) # train normally - - File "train.py", line 336, in train - - multi_label=ni > n_burn) - - File "/home/tomekb/yolov3/test.py", line 99, in test - - output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) - - File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression - - i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms - - return torch.ops.torchvision.nms(boxes, scores, iou_threshold) - -RuntimeError: CUDA out of memory. Tried to allocate 6.42 GiB (GPU 0; 10.76 GiB total capacity; 652.40 MiB already allocated; 5.01 GiB free; 4.83 GiB reserved in total by PyTorch) - -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 144, in yolov3 - call_training_script(bayes_hyps) - File "run_yolov3_process_bayes.py", line 46, in call_training_script - call_subprocess(cmd) - File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess - raise RuntimeError("An error occured during calling subprocess") -RuntimeError: An error occured during calling subprocess - -Returning 1 from current bayessian iteration -num acquisition: 8, time elapsed: 31753.94s -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 1 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --multi-scale --img-size 576 960 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 6.048138505765027, 'cls': 76.50129773787151, 'cls_pw': 4.123507076365048, 'obj': 48.12720999798328, 'obj_pw': 0.8552486525937786, 'iou_t': 0.15098540146956585, 'lr0': 0.023869581379601934, 'lrf': 0.03686949827098312, 'momentum': 0.8343698795150166, 'weight_decay': 0.1880353498217422, 'fl_gamma': 8.949542488478707, 'hsv_h': 0.2870057333868685, 'hsv_s': 0.19637192220212063, 'hsv_v': 0.2169847895418775, 'degrees': 10.262968760603645, 'translate': 0.03690325584184029, 'scale': 0.4667914088315841, 'shear': 0.4144343366176697}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 - - train(hyp) # train normally - - File "train.py", line 336, in train - - multi_label=ni > n_burn) - - File "/home/tomekb/yolov3/test.py", line 99, in test - - output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, multi_label=multi_label) - - File "/home/tomekb/yolov3/utils/utils.py", line 537, in non_max_suppression - - i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/torchvision/ops/boxes.py", line 35, in nms - - return torch.ops.torchvision.nms(boxes, scores, iou_threshold) - -RuntimeError: Trying to create tensor with negative dimension -961626254: [-961626254] - -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 144, in yolov3 - call_training_script(bayes_hyps) - File "run_yolov3_process_bayes.py", line 46, in call_training_script - call_subprocess(cmd) - File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess - raise RuntimeError("An error occured during calling subprocess") -RuntimeError: An error occured during calling subprocess - -Returning 1 from current bayessian iteration -num acquisition: 9, time elapsed: 34651.69s -_______ CALLING TRAINING SCRIPT _______ -python -u train.py --epochs 30 --batch-size 1 --cfg ./cfg/yolov3-spp-21cls.cfg --data ./data/widok_01_21.data --img-size 640 1024 --weights ./weights/yolov3-spp-ultralytics.pt --device 1 --adam --freeze-layers --experiment-dir ./experiments --hyp "{'giou': 9.78331902147645, 'cls': 67.92686466296935, 'cls_pw': 7.563445361920521, 'obj': 70.04003004697609, 'obj_pw': 9.24027457767048, 'iou_t': 0.4717054234555471, 'lr0': 0.023266497240161114, 'lrf': 0.01976363376216637, 'momentum': 0.3952390441220761, 'weight_decay': 0.8272285942424523, 'fl_gamma': 0.25479835194478273, 'hsv_h': 0.08047814066126147, 'hsv_s': 0.38728346932389535, 'hsv_v': 0.4606630937623931, 'degrees': 4.579264609112537, 'translate': 0.15988655515121253, 'scale': 0.2503882837081164, 'shear': 0.5698078076854681}" -Using CUDA Apex device0 _CudaDeviceProperties(name='GeForce RTX 2080 Ti', total_memory=11019MB) - - - -Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ - -Model Summary: 225 layers, 6.2681e+07 parameters, 6.2681e+07 gradients - -Optimizer groups: 76 .bias, 76 Conv2d.weight, 73 other - - - - 0%| | 0/4788 [00:00 - - train(hyp) # train normally - - File "train.py", line 294, in train - - scaled_loss.backward() - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/contextlib.py", line 119, in __exit__ - - next(self.gen) - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/apex/amp/handle.py", line 123, in scale_loss - - optimizer._post_amp_backward(loss_scaler) - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/apex/amp/_process_optimizer.py", line 249, in post_backward_no_master_weights - - post_backward_models_are_masters(scaler, params, stashed_grads) - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/apex/amp/_process_optimizer.py", line 135, in post_backward_models_are_masters - - scale_override=(grads_have_scale, stashed_have_scale, out_scale)) - - File "/home/tomekb/miniconda3/envs/conda3.7/lib/python3.7/site-packages/apex/amp/scaler.py", line 183, in unscale_with_stashed - - out_scale/grads_have_scale, - -ZeroDivisionError: float division by zero - -An error occured during running training-detect-confussion process - Traceback (most recent call last): - File "run_yolov3_process_bayes.py", line 144, in yolov3 - call_training_script(bayes_hyps) - File "run_yolov3_process_bayes.py", line 46, in call_training_script - call_subprocess(cmd) - File "/home/tomekb/yolov3/our_scripts/utils.py", line 14, in call_subprocess - raise RuntimeError("An error occured during calling subprocess") -RuntimeError: An error occured during calling subprocess - -Returning 1 from current bayessian iteration -num acquisition: 10, time elapsed: 34833.43s