car-detection-bayes/our_scripts/generate_txt_from_xml.py

163 lines
5.5 KiB
Python
Raw Permalink Normal View History

2020-06-28 12:37:21 +00:00
# - *- coding: utf- 8 - *-
import sys
import os
import re
import xml.etree.ElementTree as ET
from glob import glob
from os.path import join
from pathlib import Path
# This should just be a folder of xmls
annotations = sys.argv[1]
# Then you have a folder of txts.
modified_annotations = sys.argv[2]
def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = (box[0] + box[1]) / 2.0 - 1
y = (box[2] + box[3]) / 2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = round(x * dw, 4)
w = round(w * dw, 4)
y = round(y * dh, 4)
h = round(h * dh, 4)
return (x, y, w, h)
def map_class_name_to_id(class_name, xml_document, class_distribution):
if class_name in ['1. rower']:
class_distribution[0] += 1
return 0
elif class_name in ['2. motocykl']:
class_distribution[1] += 1
return 1
elif class_name in ['3. osobowy']:
class_distribution[2] += 1
return 2
elif class_name in ['4. osobowy pickup']:
class_distribution[3] += 1
return 3
elif class_name in ['5. osobowy dostawczy']:
class_distribution[4] += 1
return 4
elif class_name in ['6. osobowy van 7-9']:
class_distribution[5] += 1
return 5
2020-07-25 22:43:07 +00:00
elif class_name in ['7. dostawczy blaszak']:
2020-06-28 12:37:21 +00:00
class_distribution[6] += 1
return 6
elif class_name in ['8. dostawczy zabudowany']:
class_distribution[7] += 1
return 7
elif class_name in ['9. dostawczy pickup (w tym pomoc drog.)']:
class_distribution[8] += 1
return 8
2020-07-25 22:43:07 +00:00
elif class_name in ['10. dostawczy VAN (osobowy)', '11. autobus maly 10-24']:
2020-06-28 12:37:21 +00:00
class_distribution[9] += 1
return 9
2020-06-30 19:51:10 +00:00
elif class_name in ['12. autobus miejski', '13. autobus turystyczny i inny']:
2020-06-28 12:37:21 +00:00
class_distribution[10] += 1
return 10
2020-06-30 19:51:10 +00:00
elif class_name in ['14. ciezarowy pow. 3,5t zabudowany', '17. ciezarowy z widoczna przyczepa']:
2020-06-28 12:37:21 +00:00
class_distribution[11] += 1
return 11
2020-06-30 19:51:10 +00:00
elif class_name in ['15. ciezarowy pow. 3,5t otwarty (w tym duzy holownik)']:
2020-06-28 12:37:21 +00:00
class_distribution[12] += 1
return 12
2020-06-30 19:51:10 +00:00
elif class_name in ['16. ciezarowy pow. 3,5t inny (wanna, gruszka, dzwig itp.)']:
2020-06-28 12:37:21 +00:00
class_distribution[13] += 1
return 13
2020-06-30 19:51:10 +00:00
elif class_name in ['18. ciagnik siodlowy z widoczna naczepa', '19. ciagnik siodlowy bez naczepy']:
2020-06-28 12:37:21 +00:00
class_distribution[14] += 1
return 14
2020-06-30 19:51:10 +00:00
elif class_name in ['23. inne pojazdy silnikowe', '20. camper', '21. woz strazacki', '22. ciagnik roliczy, koparka, spychacz']:
2020-06-28 12:37:21 +00:00
class_distribution[15] += 1
return 15
elif class_name in ['24. przyczepa']:
class_distribution[16] += 1
return 16
elif class_name in ['25. BUS-karetka/policja']:
class_distribution[17] += 1
return 17
2020-06-30 19:51:10 +00:00
elif class_name in ['26. BUS brygadowka']:
class_distribution[18] += 1
return 18
2020-07-25 22:43:07 +00:00
elif class_name in ['27. BUS sredni dostawczy']:
class_distribution[19] += 1
return 19
elif class_name in ['28. BUS sredni osobowy']:
class_distribution[20] += 1
return 20
2020-06-28 12:37:21 +00:00
else:
raise Exception('Unknown Class ', xml_document, class_name)
2020-06-30 19:51:10 +00:00
#print(f'{xml_document.split("/")[-1]} {class_name}')
2020-06-28 12:37:21 +00:00
def generate_txt_from_xml():
2020-07-25 22:43:07 +00:00
class_distribution = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0,0]
2020-06-30 19:51:10 +00:00
filepaths = glob(join(annotations , '*.xml'))
duplicates = set([x for x in filepaths if filepaths.count(x) > 1])
if duplicates:
print('Duplicate files : ')
print(duplicates)
2020-06-28 12:37:21 +00:00
for filepath in filepaths:
txtpath = join(modified_annotations, re.sub(r"\.xml$", ".txt", os.path.basename(filepath)))
in_file = open(filepath, mode='r', encoding='utf-8')
tree = ET.parse(in_file)
root = tree.getroot()
2020-06-30 19:51:10 +00:00
print(filepath)
if root.attrib == {}:
continue
if root.attrib['verified'] != 'yes':
continue
2020-06-28 12:37:21 +00:00
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
good_file = True
for obj in root.iter('object'):
#difficult = obj.find('difficult').text
class_label = obj.find('name').text
#if int(difficult) == 1:
# raise Exception("Difficult == 1")
cls_id = map_class_name_to_id(class_label, filepath, class_distribution)
if cls_id == -1 :
good_file = False
if not good_file :
print('File discarded.')
continue
Path(txtpath).touch()
out_file = open(txtpath, mode='w', encoding='utf-8')
for obj in root.iter('object'):
#difficult = obj.find('difficult').text
class_label = obj.find('name').text
#if int(difficult) == 1:
# raise Exception("Difficult == 1")
cls_id = map_class_name_to_id(class_label, filepath, class_distribution)
if cls_id != -1 :
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
print(class_distribution)
generate_txt_from_xml()