car-detection-bayes/our_scripts/generate_txt_from_xml.py

# - *- coding: utf- 8 - *-
import sys
import os
import re
import xml.etree.ElementTree as ET
from glob import glob
from os.path import join
from pathlib import Path

# This should just be a folder of xmls
annotations = sys.argv[1]
# Then you have a folder of txts.
modified_annotations = sys.argv[2]


def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = round(x * dw, 4)
    w = round(w * dw, 4)
    y = round(y * dh, 4)
    h = round(h * dh, 4)
    return (x, y, w, h)


def map_class_name_to_id(class_name, xml_document, class_distribution):
    if class_name in ['1. rower']:
        class_distribution[0] += 1
        return 0
    elif class_name in ['2. motocykl']:
        class_distribution[1] += 1
        return 1
    elif class_name in ['3. osobowy']:
        class_distribution[2] += 1
        return 2
    elif class_name in ['4. osobowy pickup']:
        class_distribution[3] += 1
        return 3
    elif class_name in ['5. osobowy dostawczy']:
        class_distribution[4] += 1
        return 4
    elif class_name in ['6. osobowy van 7-9']:
        class_distribution[5] += 1
        return 5
    elif class_name in ['7. dostawczy blaszak', '27. BUS sredni dostawczy']:
        class_distribution[6] += 1
        return 6
    elif class_name in ['8. dostawczy zabudowany']:
        class_distribution[7] += 1
        return 7
    elif class_name in ['9. dostawczy pickup (w tym pomoc drog.)']:
        class_distribution[8] += 1
        return 8
    elif class_name in ['10. dostawczy VAN (osobowy)', '11. autobus maly 10-24', '28. BUS sredni osobowy']:
        class_distribution[9] += 1
        return 9
    elif class_name in ['12. autobus miejski', '13. autobus turystyczny i inny']:
        class_distribution[10] += 1
        return 10
    elif class_name in ['14. ciezarowy pow. 3,5t zabudowany', '17. ciezarowy z widoczna przyczepa']:
        class_distribution[11] += 1
        return 11
    elif class_name in ['15. ciezarowy pow. 3,5t otwarty (w tym duzy holownik)']:
        class_distribution[12] += 1
        return 12
    elif class_name in ['16. ciezarowy pow. 3,5t inny (wanna, gruszka, dzwig itp.)']:
        class_distribution[13] += 1
        return 13
    elif class_name in ['18. ciagnik siodlowy z widoczna naczepa', '19. ciagnik siodlowy bez naczepy']:
        class_distribution[14] += 1
        return 14
    elif class_name in ['23. inne pojazdy silnikowe', '20. camper', '21. woz strazacki', '22. ciagnik roliczy, koparka, spychacz']:
        class_distribution[15] += 1
        return 15
    elif class_name in ['24. przyczepa']:
        class_distribution[16] += 1
        return 16
    elif class_name in ['25. BUS-karetka/policja']:
        class_distribution[17] += 1
        return 17
    elif class_name in ['26. BUS brygadowka']:
        class_distribution[18] += 1
        return 18
    else:
        raise Exception('Unknown Class ', xml_document, class_name)
        #print(f'{xml_document.split("/")[-1]} {class_name}')


def generate_txt_from_xml():
    class_distribution = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

    filepaths = glob(join(annotations , '*.xml'))

    duplicates = set([x for x in filepaths if filepaths.count(x) > 1])
    if duplicates:
        print('Duplicate files : ')
        print(duplicates)

    for filepath in filepaths:
        txtpath = join(modified_annotations, re.sub(r"\.xml$", ".txt", os.path.basename(filepath)))

        in_file = open(filepath, mode='r', encoding='utf-8')

        tree = ET.parse(in_file)
        root = tree.getroot()

        print(filepath)

        if root.attrib == {}:
            continue
        if root.attrib['verified'] != 'yes':
            continue

        size = root.find('size')
        w = int(size.find('width').text)
        h = int(size.find('height').text)

        good_file = True

        for obj in root.iter('object'):
            #difficult = obj.find('difficult').text
            class_label = obj.find('name').text
            #if int(difficult) == 1:
            #    raise Exception("Difficult == 1")
            cls_id = map_class_name_to_id(class_label, filepath, class_distribution)
            if cls_id == -1 :
                good_file = False

        if not good_file :
            print('File discarded.')
            continue

        Path(txtpath).touch()
        out_file = open(txtpath, mode='w', encoding='utf-8')

        for obj in root.iter('object'):
            #difficult = obj.find('difficult').text
            class_label = obj.find('name').text
            #if int(difficult) == 1:
            #    raise Exception("Difficult == 1")
            cls_id = map_class_name_to_id(class_label, filepath, class_distribution)
            if cls_id != -1 :
                xmlbox = obj.find('bndbox')
                b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
                float(xmlbox.find('ymax').text))
                bb = convert((w, h), b)
                out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')


    print(class_distribution)

generate_txt_from_xml()
Update 2020-06-28 12:37:21 +00:00			`# - - coding: utf- 8 - -`
			`import sys`
			`import os`
			`import re`
			`import xml.etree.ElementTree as ET`
			`from glob import glob`
			`from os.path import join`
			`from pathlib import Path`

			`# This should just be a folder of xmls`
			`annotations = sys.argv[1]`
			`# Then you have a folder of txts.`
			`modified_annotations = sys.argv[2]`


			`def convert(size, box):`
			`dw = 1. / (size[0])`
			`dh = 1. / (size[1])`
			`x = (box[0] + box[1]) / 2.0 - 1`
			`y = (box[2] + box[3]) / 2.0 - 1`
			`w = box[1] - box[0]`
			`h = box[3] - box[2]`
			`x = round(x * dw, 4)`
			`w = round(w * dw, 4)`
			`y = round(y * dh, 4)`
			`h = round(h * dh, 4)`
			`return (x, y, w, h)`


			`def map_class_name_to_id(class_name, xml_document, class_distribution):`
			`if class_name in ['1. rower']:`
			`class_distribution[0] += 1`
			`return 0`
			`elif class_name in ['2. motocykl']:`
			`class_distribution[1] += 1`
			`return 1`
			`elif class_name in ['3. osobowy']:`
			`class_distribution[2] += 1`
			`return 2`
			`elif class_name in ['4. osobowy pickup']:`
			`class_distribution[3] += 1`
			`return 3`
			`elif class_name in ['5. osobowy dostawczy']:`
			`class_distribution[4] += 1`
			`return 4`
			`elif class_name in ['6. osobowy van 7-9']:`
			`class_distribution[5] += 1`
			`return 5`
Update 2020-06-30 19:51:10 +00:00			`elif class_name in ['7. dostawczy blaszak', '27. BUS sredni dostawczy']:`
Update 2020-06-28 12:37:21 +00:00			`class_distribution[6] += 1`
			`return 6`
			`elif class_name in ['8. dostawczy zabudowany']:`
			`class_distribution[7] += 1`
			`return 7`
			`elif class_name in ['9. dostawczy pickup (w tym pomoc drog.)']:`
			`class_distribution[8] += 1`
			`return 8`
Update 2020-06-30 19:51:10 +00:00			`elif class_name in ['10. dostawczy VAN (osobowy)', '11. autobus maly 10-24', '28. BUS sredni osobowy']:`
Update 2020-06-28 12:37:21 +00:00			`class_distribution[9] += 1`
			`return 9`
Update 2020-06-30 19:51:10 +00:00			`elif class_name in ['12. autobus miejski', '13. autobus turystyczny i inny']:`
Update 2020-06-28 12:37:21 +00:00			`class_distribution[10] += 1`
			`return 10`
Update 2020-06-30 19:51:10 +00:00			`elif class_name in ['14. ciezarowy pow. 3,5t zabudowany', '17. ciezarowy z widoczna przyczepa']:`
Update 2020-06-28 12:37:21 +00:00			`class_distribution[11] += 1`
			`return 11`
Update 2020-06-30 19:51:10 +00:00			`elif class_name in ['15. ciezarowy pow. 3,5t otwarty (w tym duzy holownik)']:`
Update 2020-06-28 12:37:21 +00:00			`class_distribution[12] += 1`
			`return 12`
Update 2020-06-30 19:51:10 +00:00			`elif class_name in ['16. ciezarowy pow. 3,5t inny (wanna, gruszka, dzwig itp.)']:`
Update 2020-06-28 12:37:21 +00:00			`class_distribution[13] += 1`
			`return 13`
Update 2020-06-30 19:51:10 +00:00			`elif class_name in ['18. ciagnik siodlowy z widoczna naczepa', '19. ciagnik siodlowy bez naczepy']:`
Update 2020-06-28 12:37:21 +00:00			`class_distribution[14] += 1`
			`return 14`
Update 2020-06-30 19:51:10 +00:00			`elif class_name in ['23. inne pojazdy silnikowe', '20. camper', '21. woz strazacki', '22. ciagnik roliczy, koparka, spychacz']:`
Update 2020-06-28 12:37:21 +00:00			`class_distribution[15] += 1`
			`return 15`
			`elif class_name in ['24. przyczepa']:`
			`class_distribution[16] += 1`
			`return 16`
			`elif class_name in ['25. BUS-karetka/policja']:`
			`class_distribution[17] += 1`
			`return 17`
Update 2020-06-30 19:51:10 +00:00			`elif class_name in ['26. BUS brygadowka']:`
			`class_distribution[18] += 1`
			`return 18`
Update 2020-06-28 12:37:21 +00:00			`else:`
			`raise Exception('Unknown Class ', xml_document, class_name)`
Update 2020-06-30 19:51:10 +00:00			`#print(f'{xml_document.split("/")[-1]} {class_name}')`
Update 2020-06-28 12:37:21 +00:00

			`def generate_txt_from_xml():`
Update 2020-06-30 19:51:10 +00:00			`class_distribution = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]`

			`filepaths = glob(join(annotations , '*.xml'))`

			`duplicates = set([x for x in filepaths if filepaths.count(x) > 1])`
			`if duplicates:`
			`print('Duplicate files : ')`
			`print(duplicates)`
Update 2020-06-28 12:37:21 +00:00
			`for filepath in filepaths:`
			`txtpath = join(modified_annotations, re.sub(r"\.xml$", ".txt", os.path.basename(filepath)))`

			`in_file = open(filepath, mode='r', encoding='utf-8')`

			`tree = ET.parse(in_file)`
			`root = tree.getroot()`
Update 2020-06-30 19:51:10 +00:00
			`print(filepath)`

			`if root.attrib == {}:`
			`continue`
			`if root.attrib['verified'] != 'yes':`
			`continue`

Update 2020-06-28 12:37:21 +00:00			`size = root.find('size')`
			`w = int(size.find('width').text)`
			`h = int(size.find('height').text)`

			`good_file = True`

			`for obj in root.iter('object'):`
			`#difficult = obj.find('difficult').text`
			`class_label = obj.find('name').text`
			`#if int(difficult) == 1:`
			`# raise Exception("Difficult == 1")`
			`cls_id = map_class_name_to_id(class_label, filepath, class_distribution)`
			`if cls_id == -1 :`
			`good_file = False`

			`if not good_file :`
			`print('File discarded.')`
			`continue`

			`Path(txtpath).touch()`
			`out_file = open(txtpath, mode='w', encoding='utf-8')`

			`for obj in root.iter('object'):`
			`#difficult = obj.find('difficult').text`
			`class_label = obj.find('name').text`
			`#if int(difficult) == 1:`
			`# raise Exception("Difficult == 1")`
			`cls_id = map_class_name_to_id(class_label, filepath, class_distribution)`
			`if cls_id != -1 :`
			`xmlbox = obj.find('bndbox')`
			`b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),`
			`float(xmlbox.find('ymax').text))`
			`bb = convert((w, h), b)`
			`out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')`


			`print(class_distribution)`

			`generate_txt_from_xml()`