Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor doc generation #95

Merged
merged 3 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 10 additions & 115 deletions ifex/model/ifex_ast_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,79 +12,14 @@
"""

from dataclasses import fields
from typing import Union, get_origin, get_args
from ifex.model.ifex_ast import Namespace
import re, itertools

#
# These helper functions determine the type of object, or they coerce
# special case handling into a simpler expression that can be used in later
# functions.
#

# typing.Optional?
def is_optional(field):
return get_origin(field) is Union and type(None) in get_args(field)


# typing.ForwardRef?
def is_forwardref(field):
return type(field).__name__ == 'ForwardRef'


# typing.List[<something>]? (also Optional[List[<something>]])
def is_list(field):
if field.type in [str, int]:
return False
else:
# (I would prefer to compare types here with "is" or issubclass() or
# similar, instead of comparing strings but this is so far the way I
# found to make the test:
return actual_type(field).__name__ == 'List'


# This takes care about the fact that ForwardRef does not have
# a member __name__ (because it's not actually a type, as such)
# Instead it has __forward_arg__ which is a string containing
# the referenced type name.
def type_name(ttype):
if is_forwardref(ttype):
return ttype.__forward_arg__
else:
return ttype.__name__


# This strips off Optional[] from the type hierarchy so that we are left
# with the "real" inner type. (It can still be a List of Something)
def actual_type(field):
if type(field) in [str, int]:
return type(field)
if is_optional(field.type):
return get_args(field.type)[0]
else:
return field.type


def actual_type_name(field):
return type_name(actual_type(field))


# Return the type of members of a List
# Only call if it is already known to be a List.
def list_member_type(field):
return get_args(actual_type(field))[0]


def list_member_type_name(field):
return type_name(list_member_type(field))


from ifex.model.ifex_ast_introspect import walk_type_tree, field_is_list, is_optional, type_name, field_actual_type, field_inner_type
import re,itertools

#
# Document generation functions
#


def markdown_heading(n: int, s: str):
for _ in range(n):
print("#", end='')
Expand All @@ -96,14 +31,15 @@ def markdown_table_row(field):
print(f"| {field.name} | ", end='')
if field.type is str:
print("A single **str**", end='')
elif is_list(field):
print(f"A list of **{list_member_type_name(field)}**_s_", end='')
elif field_is_list(field):
print(f"A list of **{type_name(field_inner_type(field))}**_s_", end='')
else:
print(f"A single **{actual_type_name(field)}**", end='')
print(f"A single **{type_name(field_actual_type(field))}**", end='')

print(docstring(field), end='')
print(" |")


def determine_indentation(s):
count = 0
# groupby() will collect up repeating characters (like space) so we can
Expand All @@ -123,6 +59,7 @@ def determine_indentation(s):
break
return count


def docstring(item):

# We can't remove all whitespace at start of every line because this
Expand All @@ -143,12 +80,14 @@ def docstring(item):
else:
return ""


def markdown_table(fields):
print(f"|Field Name|Required contents|")
print(f"|Field Name|Contents|")
print(f"|-----|-----------|")
for f in fields:
markdown_table_row(f)


def document_fields(node):
name = type_name(node)

Expand All @@ -165,50 +104,6 @@ def document_fields(node):
print("\n")


import typing
def walk_type_tree(node, process, seen={}):
"""Walk the AST class hierarchy as defined by @dataclasses with type
hints from typing module.

Performs a depth-first traversal. Parent node is processed first, then its
children, going as deep as possible before backtracking. Type names that have
already been seen before are identical so recursion is cut off there.
The given hook function "process" is called for every unique type.

Arguments: node = a @dataclass class
process = function to call for each node"""

# Skip duplicates (like Namespace, it appears more than once in AST model)
name = type_name(node)
if seen.get(name):
return

# (No need to document, or recurse on the following types):
if node in [str, int, typing.Any]:
return

# Process this node
process(node)
seen[name] = True

# ForwardRef will fail if we try to recurse its children.
# However, the types that are handled with ForwardRef (Namespace)
# ought to appear anyhow somewhere in the recursion.
if is_forwardref(node):
return

# Next, recurse on each AST type used in child fields (stripping
# away 'List' and 'Optional' to get to the interesting class)
for n in fields(node):
if is_list(n):
# Document Node types that are found inside Lists
walk_type_tree(list_member_type(n), process, seen)
else:
# Document Node types found directly
walk_type_tree(actual_type(n), process, seen)


if __name__ == "__main__":
walk_type_tree(Namespace, document_fields)


197 changes: 197 additions & 0 deletions ifex/model/ifex_ast_introspect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
# SPDX-License-Identifier: MPL-2.0

# (C) 2023 MBition GmbH
# (C) 2022 Novaspring AB
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

"""
Provide helper functions to inspect the IFEX Core IDL language definition,
as it is defined by the class tree/hierarchy (not an inheritance hierarchy)
in the `ifex_ast` python file. These function can be used by any other code
that needs to process this underlying meta-model. It helps to ensure that the
fundamental language is defined in a single file. """

from ifex.model import ifex_ast
from dataclasses import is_dataclass, fields
from typing import get_args, get_origin, List, Optional, Union, Any, ForwardRef
import typing

# As we traverse the "tree" of dataclass definitions, it can be quite difficult
# to keep track of which type each variable has. Here is an explanation of how
# we try to keep track:
#
# The following functions come in two flavors each. Functions like: is_xxx()
# take an object, which is an instance of typing.<some class> which is
# essentially an object that indicates the "type hint" using concepts from
# the 'typing' python module. Examples are: Optional, List, Union, Any, etc.
# We here call variables that reference such a typing.Something object a
# type_indicator. It corresponds to the type hint information on the right
# side of the colon : in an expression like this:
#
# namespaces: Optional[List[Namespace]]
#
# The type_indicator is the: `Optional[List[Namespace]]`
# (or if fully qualified: `typing.Optional[typing.List[ifex_ast.Namespace]]`)
# Note that instead of being a dataclass like ifex_ast.Namespace, the inner
# type can of course be a built-in simple type like str. e.g. typing.List[str]
#
# Next, in the 'dataclasses' python module we find the function fields().
# It returns a list that represents the fields (members) of the dataclass.
# Each field is represented by an object (an instance of the dataclasses.Field
# class). We name variables that refer to such Field() instances as `field`.
# A field thus represents a member variable in the python (data)class.
# A field object contains several informations such as the name of the member
# variable (field.name), and the `.type` member, which gives us the
# type_indicator as described above.
#
# For each is_xxx() function, there is a convenience function named field_is_xxx()
# which takes an instance of the field itself, instead of the field's type.
# As you can see, most of those functions simply reference the field.type
# member to get the type_indicator, and then pass it the is_xxx() function.
#
# NOTE: Here in the descriptions we might refer to an object's "type" when we
# strictly mean its Type Indicator. Since typing in python is dynamic,
# the actual type of an object could be different (and can be somewhat fungible
# too in theory, but generally not in this code).

def is_dataclass_type(cls):
"""Check if a class is a dataclass."""
return is_dataclass(cls)

def is_optional(type_indicator):
"""Check if the type indicator is Optional."""
# Note: Inside typing, Optional[MyType] is actually a Union of <MyType, None>.
# So the following expression returns True if the type indicator is an Optional.
return get_origin(type_indicator) is Union and type(None) in get_args(type_indicator)

def field_is_optional(field):
"""Check if the typing hint of a member field is Optional."""
return is_optional(field.type)

def is_list(type_indicator):
"""Check if the type indicator is List (Optional or not)"""
# If type indicator is wrapped in Optional we must extract the inner "actual type":
if is_optional(type_indicator):
return is_list(actual_type(type_indicator))
else:
return get_origin(type_indicator) is list

def field_is_list(field):
"""Check if the typing hint of a member field indicates that it is a List"""
return is_list(field.type)

def inner_type(type_indicator):
"""Return the type of objects in the List *if* given a type indicator that is List.
(Failure if type is not a List)"""
if is_list(type_indicator):
return get_args(actual_type(type_indicator))[0]

def field_inner_type(field):
"""Return the type of objects inside the List *if* given a *field* of type List.
(Failure if type is not a List)"""
return inner_type(field.type)

def actual_type(type_indicator):
"""Return the type X for a type indicator that is Optional[X].
(Returns the type X also if input was non-optional)"""
if type_indicator in [str, int]:
return type_indicator
if is_optional(type_indicator):
return get_args(type_indicator)[0]
else:
return type_indicator

def field_actual_type(field):
"""Return the type X for a field that was defined as Optional[X]
(Returns the type X also if input was non-optional)"""
return actual_type(field.type)

def is_forwardref(type_indicator):
"""Check if type indicator is a ForwardRef"""
return type(type_indicator) is ForwardRef

def field_is_forwardref(field):
"""Check if type indicator for a fieldo indicates that it is a ForwardRef"""
return is_forwardref(field.type)

# This takes care about the fact that ForwardRef does not have a member
# __name__ (because it's not actually a type, as such). Instead it has
# __forward_arg__ which is a *string* containing the referenced type name.
def type_name(type_indicator):
"""Return the type name of the given type indicator, also supporting if it is a ForwardRef"""
if is_forwardref(type_indicator):
return type_indicator.__forward_arg__
else:
return type_indicator.__name__

VERBOSE = False

# Tree processing function:
def walk_type_tree(node, process, seen={}):
"""Walk the AST class hierarchy as defined by @dataclasses with type
hints from typing module.

Performs a depth-first traversal. Parent node is processed first, then its
children, going as deep as possible before backtracking. Type names that have
already been seen before are identical so recursion is cut off there.
The given hook function "process" is called for every unique type.

Arguments: node = a @dataclass class
process = a "callback" function to call for each node"""

# (No need to document, or recurse on the following types):
# FIXME: this is correct for our documentation generation but maybe not for all cases
if node in [str, int, typing.Any]:
return

# Skip duplicates (like Namespace, it appears more than once in the AST model)
name = type_name(node)
if seen.get(name):
if VERBOSE:
print(f" note: a field of type {name} was skipped")
return

seen[name] = True

# Process this node
process(node)

# ForwardRef will fail if we try to recurse over its children.
# However, the types that are handled with ForwardRef (Namespace)
# ought to appear anyhow somewhere else in the recursion, so we
# skip them.
if is_forwardref(node):
return

# Next, recurse on each AST type used in child fields (stripping
# away 'List' and 'Optional' to get to the interesting class)
for f in fields(node):
if field_is_list(f):
if VERBOSE:
print(f" field: {f.name}")
# Document Node types that are found inside Lists
walk_type_tree(field_inner_type(f), process, seen)
else:
# Document Node types found directly
walk_type_tree(field_actual_type(f), process, seen)


# Test code:

# Comment: Here's one way to get the typing hints of a member of a
# dataclass from typing import: get_type_hints
# print(get_type_hints(ifex_ast.Namespace)['interface'])

# Simple processor function for testing - just print the text representation of the node
def _simple_process(arg):
global VERBOSE
VERBOSE = True
print(arg)

# Run module as a program - for testing/development only:
if __name__ == "__main__":
print("TEST: Note that already seen types are skipped, and this is a depth-first search => The structure of the tree is not easily seen from this output.")
walk_type_tree(ifex_ast.Namespace, _simple_process)
Loading