# - *- coding: utf- 8 - *- import sys import os import re import xml.etree.ElementTree as ET from glob import glob from os.path import join from pathlib import Path # This should just be a folder of xmls annotations = sys.argv[1] # Then you have a folder of txts. modified_annotations = sys.argv[2] def convert(size, box): dw = 1. / (size[0]) dh = 1. / (size[1]) x = (box[0] + box[1]) / 2.0 - 1 y = (box[2] + box[3]) / 2.0 - 1 w = box[1] - box[0] h = box[3] - box[2] x = round(x * dw, 4) w = round(w * dw, 4) y = round(y * dh, 4) h = round(h * dh, 4) return (x, y, w, h) def map_class_name_to_id(class_name, xml_document, class_distribution): if class_name in ['1. rower']: class_distribution[0] += 1 return 0 elif class_name in ['2. motocykl']: class_distribution[1] += 1 return 1 elif class_name in ['3. osobowy']: class_distribution[2] += 1 return 2 elif class_name in ['4. osobowy pickup']: class_distribution[3] += 1 return 3 elif class_name in ['5. osobowy dostawczy']: class_distribution[4] += 1 return 4 elif class_name in ['6. osobowy van 7-9']: class_distribution[5] += 1 return 5 elif class_name in ['7. dostawczy blaszak']: class_distribution[6] += 1 return 6 elif class_name in ['8. dostawczy zabudowany']: class_distribution[7] += 1 return 7 elif class_name in ['9. dostawczy pickup (w tym pomoc drog.)']: class_distribution[8] += 1 return 8 elif class_name in ['10. dostawczy VAN (osobowy)', '11. autobus maly 10-24']: class_distribution[9] += 1 return 9 elif class_name in ['12. autobus miejski', '13. autobus turystyczny i inny']: class_distribution[10] += 1 return 10 elif class_name in ['14. ciezarowy pow. 3,5t zabudowany', '17. ciezarowy z widoczna przyczepa']: class_distribution[11] += 1 return 11 elif class_name in ['15. ciezarowy pow. 3,5t otwarty (w tym duzy holownik)']: class_distribution[12] += 1 return 12 elif class_name in ['16. ciezarowy pow. 3,5t inny (wanna, gruszka, dzwig itp.)']: class_distribution[13] += 1 return 13 elif class_name in ['18. ciagnik siodlowy z widoczna naczepa', '19. ciagnik siodlowy bez naczepy']: class_distribution[14] += 1 return 14 elif class_name in ['23. inne pojazdy silnikowe', '20. camper', '21. woz strazacki', '22. ciagnik roliczy, koparka, spychacz']: class_distribution[15] += 1 return 15 elif class_name in ['24. przyczepa']: class_distribution[16] += 1 return 16 elif class_name in ['25. BUS-karetka/policja']: class_distribution[17] += 1 return 17 elif class_name in ['26. BUS brygadowka']: class_distribution[18] += 1 return 18 elif class_name in ['27. BUS sredni dostawczy']: class_distribution[19] += 1 return 19 elif class_name in ['28. BUS sredni osobowy']: class_distribution[20] += 1 return 20 else: raise Exception('Unknown Class ', xml_document, class_name) #print(f'{xml_document.split("/")[-1]} {class_name}') def generate_txt_from_xml(): class_distribution = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0,0] filepaths = glob(join(annotations , '*.xml')) duplicates = set([x for x in filepaths if filepaths.count(x) > 1]) if duplicates: print('Duplicate files : ') print(duplicates) for filepath in filepaths: txtpath = join(modified_annotations, re.sub(r"\.xml$", ".txt", os.path.basename(filepath))) in_file = open(filepath, mode='r', encoding='utf-8') tree = ET.parse(in_file) root = tree.getroot() print(filepath) if root.attrib == {}: continue if root.attrib['verified'] != 'yes': continue size = root.find('size') w = int(size.find('width').text) h = int(size.find('height').text) good_file = True for obj in root.iter('object'): #difficult = obj.find('difficult').text class_label = obj.find('name').text #if int(difficult) == 1: # raise Exception("Difficult == 1") cls_id = map_class_name_to_id(class_label, filepath, class_distribution) if cls_id == -1 : good_file = False if not good_file : print('File discarded.') continue Path(txtpath).touch() out_file = open(txtpath, mode='w', encoding='utf-8') for obj in root.iter('object'): #difficult = obj.find('difficult').text class_label = obj.find('name').text #if int(difficult) == 1: # raise Exception("Difficult == 1") cls_id = map_class_name_to_id(class_label, filepath, class_distribution) if cls_id != -1 : xmlbox = obj.find('bndbox') b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) bb = convert((w, h), b) out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') print(class_distribution) generate_txt_from_xml()