Source code for dynamicdl.processing.xmlfile

from typing import Union, Any
import xmltodict

from .._utils import load_config
from ..parsing.static import Static
from ..parsing.generic import Generic
from .datafile import DataFile

config = load_config()

[docs] class XMLFile(DataFile): ''' The `XMLFile` class represents an annotation object and is similar to the `JSONFile` class in terms of hierarchical structure and parsing. The one key difference is the needed usage of `AmbiguousList` over `GenericList`, as the presence of multiple tags of the same name will be parsed as a list, while tags of one name will be parsed as an item. The algorithm appropriately interprets list objects as `AmbiguousList` for this exact reason in `XMLFile`, but if one desires a `GenericList` it will have to be instantiated manually. The structure follows suit to the hierarchy, just as in `JSONFile`. Here is a snippet from the Oxford-IIIT Pets Dataset: .. code-block:: xml <annotation> <folder>OXIIIT</folder> <filename>Abyssinian_1.jpg</filename> <source> <database>OXFORD-IIIT Pet Dataset</database> <annotation>OXIIIT</annotation> <image>flickr</image> </source> <size> <width>600</width> <height>400</height> <depth>3</depth> </size> <segmented>0</segmented> <object> <name>cat</name> <pose>Frontal</pose> <truncated>0</truncated> <occluded>0</occluded> <bndbox> <xmin>333</xmin> <ymin>72</ymin> <xmax>425</xmax> <ymax>158</ymax> </bndbox> <difficult>0</difficult> </object> </annotation> Here we do not specify the extraneous information and get straight to the point: .. code-block:: python XMLFile({ "annotation": { "filename": Generic("{}.jpg", DT.IMAGE_NAME), "object": AmbiguousList({ "name": DT.BBOX_CLASS_NAME, "bndbox": { "xmin": DT.XMIN, "ymin": DT.YMIN, "xmax": DT.XMAX, "ymax": DT.YMAX } }) } }) :param form: The form which matches the data to be read from `XMLFile`. :type form: dict[str | DataType | Static | Generic | Alias, Any] | list[Any] ''' def __init__(self, form: dict[Union[Static, Generic], Any]) -> None: self.form = form
[docs] def parse( self, path: str, curr_path: list[str] ) -> dict: from .._main._engine import expand_generics with open(path, 'r', encoding='utf-8') as f: data = xmltodict.parse(f.read()) return expand_generics( curr_path, data, self.form, xml = True )