Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add general XML de-serialization to Python #533

Merged
merged 1 commit into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
332 changes: 332 additions & 0 deletions aas_core_codegen/python/xmlization/_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,277 @@ def {function_name}(
)


def _generate_read_from_iterparse(
aas_module: python_common.QualifiedModuleName,
) -> Stripped:
"""Generate the general read function to parse an instance from iterparse."""
function_name = "from_iterparse"

return Stripped(
f"""\
def {function_name}(
{I}iterator: Iterator[Tuple[str, Element]]
) -> aas_types.Class:
{I}\"\"\"
{I}Read an instance from the :paramref:`iterator`.

{I}The type of the instance is determined by the very first start element.

{I}Example usage:

{I}.. code-block::

{I} import pathlib
{I} import xml.etree.ElementTree as ET

{I} import {aas_module}.xmlization as aas_xmlization

{I} path = pathlib.Path(...)
{I} with path.open("rt") as fid:
{I} iterator = ET.iterparse(
{I} source=fid,
{I} events=['start', 'end']
{I} )
{I} instance = aas_xmlization.{function_name}(
{I} iterator
{I} )

{I} # Do something with the ``instance``

{I}:param iterator:
{II}Input stream of ``(event, element)`` coming from
{II}:py:func:`xml.etree.ElementTree.iterparse` with the argument
{II}``events=["start", "end"]``
{I}:raise: :py:class:`DeserializationException` if unexpected input
{I}:return:
{II}Instance of :py:class:`.types.Class` read from the :paramref:`iterator`
{I}\"\"\"
{I}next_event_element = next(iterator, None)
{I}if next_event_element is None:
{II}raise DeserializationException(
{III}# fmt: off
{III}"Expected the start element of an instance, "
{III}"but got the end-of-input"
{III}# fmt: on
{II})

{I}next_event, next_element = next_event_element
{I}if next_event != 'start':
{II}raise DeserializationException(
{III}f"Expected the start element of an instance, "
{III}f"but got event {{next_event!r}} and element {{next_element.tag!r}}"
{II})

{I}try:
{II}return _read_as_element(
{III}next_element,
{III}iterator
{II})
{I}except DeserializationException as exception:
{II}exception.path._prepend(ElementSegment(next_element))
{II}raise exception"""
)


def _generate_read_from_stream(
aas_module: python_common.QualifiedModuleName,
) -> Stripped:
"""Generate the general read function to parse an instance from a text stream."""
function_name = python_naming.function_name(Identifier("from_stream"))

return Stripped(
f"""\
def {function_name}(
{I}stream: TextIO,
{I}has_iterparse: HasIterparse = xml.etree.ElementTree
) -> aas_types.Class:
{I}\"\"\"
{I}Read an instance from the :paramref:`stream`.

{I}The type of the instance is determined by the very first start element.

{I}Example usage:

{I}.. code-block::

{I} import {aas_module}.xmlization as aas_xmlization

{I} with open_some_stream_over_network(...) as stream:
{I} instance = aas_xmlization.{function_name}(
{I} stream
{I} )

{I} # Do something with the ``instance``

{I}:param stream:
{II}representing an instance in XML
{I}:param has_iterparse:
{II}Module containing ``iterparse`` function.

{II}Default is to use :py:mod:`xml.etree.ElementTree` from the standard
{II}library. If you have to deal with malicious input, consider using
{II}a library such as `defusedxml.ElementTree`_.
{I}:raise: :py:class:`DeserializationException` if unexpected input
{I}:return:
{II}Instance read from :paramref:`stream`
{I}\"\"\"
{I}iterator = has_iterparse.iterparse(
{II}stream,
{II}['start', 'end']
{I})
{I}return from_iterparse(
{II}_with_elements_cleared_after_yield(iterator)
{I})"""
)


def _generate_read_from_file(aas_module: python_common.QualifiedModuleName) -> Stripped:
"""Generate the general read function to parse an instance from a file."""
function_name = python_naming.function_name(Identifier("from_file"))

return Stripped(
f"""\
def {function_name}(
{I}path: PathLike,
{I}has_iterparse: HasIterparse = xml.etree.ElementTree
) -> aas_types.Class:
{I}\"\"\"
{I}Read an instance from the file at the :paramref:`path`.

{I}Example usage:

{I}.. code-block::

{I} import pathlib
{I} import {aas_module}.xmlization as aas_xmlization

{I} path = pathlib.Path(...)
{I} instance = aas_xmlization.{function_name}(
{I} path
{I} )

{I} # Do something with the ``instance``

{I}:param path:
{II}to the file representing an instance in XML
{I}:param has_iterparse:
{II}Module containing ``iterparse`` function.

{II}Default is to use :py:mod:`xml.etree.ElementTree` from the standard
{II}library. If you have to deal with malicious input, consider using
{II}a library such as `defusedxml.ElementTree`_.
{I}:raise: :py:class:`DeserializationException` if unexpected input
{I}:return:
{II}Instance read from the file at :paramref:`path`
{I}\"\"\"
{I}with open(os.fspath(path), "rt", encoding='utf-8') as fid:
{II}iterator = has_iterparse.iterparse(
{III}fid,
{III}['start', 'end']
{II})
{II}return from_iterparse(
{III}_with_elements_cleared_after_yield(iterator)
{II})"""
)


def _generate_read_from_str(aas_module: python_common.QualifiedModuleName) -> Stripped:
"""Generate the general read function to parse an instance from a string."""
function_name = python_naming.function_name(Identifier("from_str"))

return Stripped(
f"""\
def {function_name}(
{I}text: str,
{I}has_iterparse: HasIterparse = xml.etree.ElementTree
) -> aas_types.Class:
{I}\"\"\"
{I}Read an instance from the :paramref:`text`.

{I}Example usage:

{I}.. code-block::

{I} import pathlib
{I} import {aas_module}.xmlization as aas_xmlization

{I} text = "<...>...</...>"
{I} instance = aas_xmlization.{function_name}(
{I} text
{I} )

{I} # Do something with the ``instance``

{I}:param text:
{II}representing an instance in XML
{I}:param has_iterparse:
{II}Module containing ``iterparse`` function.

{II}Default is to use :py:mod:`xml.etree.ElementTree` from the standard
{II}library. If you have to deal with malicious input, consider using
{II}a library such as `defusedxml.ElementTree`_.
{I}:raise: :py:class:`DeserializationException` if unexpected input
{I}:return:
{II}Instance read from :paramref:`text`
{I}\"\"\"
{I}iterator = has_iterparse.iterparse(
{II}io.StringIO(text),
{II}['start', 'end']
{I})
{I}return from_iterparse(
{II}_with_elements_cleared_after_yield(iterator)
{I})"""
)


def _generate_general_read_as_element(
symbol_table: intermediate.SymbolTable,
) -> Stripped:
"""Generate the general read function to dispatch on concrete classes."""
dispatch_map = python_naming.private_constant_name(Identifier("general_dispatch"))

body = Stripped(
f"""\
tag_wo_ns = _parse_element_tag(element)
read_as_sequence = {dispatch_map}.get(
{I}tag_wo_ns,
{I}None
)

if read_as_sequence is None:
{I}raise DeserializationException(
{II}f"Expected the element tag to be a valid model type "
{II}f"of a concrete instance, "
{II}f"but got tag {{tag_wo_ns!r}}"
{I})

return read_as_sequence(
{I}element,
{I}iterator
)"""
)

return Stripped(
f"""\
def _read_as_element(
{I}element: Element,
{I}iterator: Iterator[Tuple[str, Element]]
) -> aas_types.Class:
{I}\"\"\"
{I}Read an instance from :paramref:`iterator`, including the end element.

{I}:param element: start element
{I}:param iterator:
{II}Input stream of ``(event, element)`` coming from
{II}:py:func:`xml.etree.ElementTree.iterparse` with the argument
{II}``events=["start", "end"]``
{I}:raise: :py:class:`DeserializationException` if unexpected input
{I}:return: parsed instance
{I}\"\"\"
{I}{indent_but_first_line(body, I)}"""
)


_READ_FUNCTION_BY_PRIMITIVE_TYPE = {
intermediate.PrimitiveType.BOOL: "_read_bool_from_element_text",
intermediate.PrimitiveType.INT: "_read_int_from_element_text",
Expand Down Expand Up @@ -1071,6 +1342,54 @@ def _generate_dispatch_map_for_class(
return Stripped(mapping_writer.getvalue())


def _generate_general_dispatch_map(symbol_table: intermediate.SymbolTable) -> Stripped:
"""Generate the general mapping model type 🠒 read-as-sequence function."""
mapping_name = python_naming.private_constant_name(Identifier("general_dispatch"))

mapping_writer = io.StringIO()

mapping_writer.write(
"""\
#: Dispatch XML class names to read-as-sequence functions
#: corresponding to the concrete classes
"""
)

mapping_writer.write(
f"""\
{mapping_name}: Mapping[
{I}str,
{I}Callable[
{II}[
{III}Element,
{III}Iterator[Tuple[str, Element]]
{II}],
{II}aas_types.Class
{I}]
] = {{
"""
)

for concrete_cls in symbol_table.concrete_classes:
read_as_sequence_name = python_naming.private_function_name(
Identifier(f"read_{concrete_cls.name}_as_sequence")
)

xml_name_literal = python_common.string_literal(
naming.xml_class_name(concrete_cls.name)
)

mapping_writer.write(
f"""\
{I}{xml_name_literal}: {read_as_sequence_name},
"""
)

mapping_writer.write("}")

return Stripped(mapping_writer.getvalue())


def _generate_reader_and_setter_map(cls: intermediate.ConcreteClass) -> Stripped:
"""Generate the mapping property name 🠒 read function."""
# fmt: off
Expand Down Expand Up @@ -2198,6 +2517,15 @@ def _with_elements_cleared_after_yield(

blocks.append(_generate_read_cls_from_str(cls=cls, aas_module=aas_module))

blocks.extend(
[
_generate_read_from_iterparse(aas_module=aas_module),
_generate_read_from_stream(aas_module=aas_module),
_generate_read_from_file(aas_module=aas_module),
_generate_read_from_str(aas_module=aas_module),
]
)

blocks.extend(
[
Stripped(
Expand Down Expand Up @@ -2595,6 +2923,8 @@ def _read_bytes_from_element_text(
else:
assert_never(our_type)

blocks.append(_generate_general_read_as_element(symbol_table=symbol_table))

for cls in symbol_table.classes:
if isinstance(cls, intermediate.AbstractClass):
blocks.append(_generate_dispatch_map_for_class(cls=cls))
Expand All @@ -2608,6 +2938,8 @@ def _read_bytes_from_element_text(
else:
assert_never(cls)

blocks.append(_generate_general_dispatch_map(symbol_table=symbol_table))

blocks.append(Stripped("# endregion"))

blocks.append(Stripped("# region Serialization"))
Expand Down
Loading
Loading