Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the nx_char type for numpy to and . #554

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 77 additions & 68 deletions src/pynxtools/dataconverter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def _log(self, path: str, log_type: ValidationProblem, value: Optional[Any], *ar
)
elif log_type == ValidationProblem.InvalidEnum:
logger.warning(
f"The value at {path} should be on of the following strings: {value}"
f"The value at {path} should be one of the following: {value}"
)
elif log_type == ValidationProblem.MissingRequiredGroup:
logger.warning(f"The required group, {path}, hasn't been supplied.")
Expand Down Expand Up @@ -579,83 +579,88 @@ def is_value_valid_element_of_enum(value, elist) -> Tuple[bool, list]:
NUMPY_INT_TYPES = (np.short, np.intc, np.int_)
NUMPY_UINT_TYPES = (np.ushort, np.uintc, np.uint)
# np int for np version 1.26.0
np_int = (
np.intc,
np.int_,
np.intp,
np.int8,
np.int16,
np.int32,
np.int64,
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.unsignedinteger,
np.signedinteger,
)
np_float = (np.float16, np.float32, np.float64, np.floating)
np_bytes = (np.bytes_, np.byte, np.ubyte)
np_char = (np.str_, np.char.chararray, *np_bytes)
np_int = (np.integer,)
np_float = (np.floating,)
# Not to be confused with `np.byte` and `np.ubyte`, these store
# an integer of `8bit` and `unsigned 8bit` respectively.
np_bytes = (np.bytes_,)
np_char = (np.str_, np.bytes_) # Only numpy Unicode string and Byte string
np_bool = (np.bool_,)
np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle)
np_complex = (np.complex64, np.complex128, np.cdouble, np.csingle, np.complex_)
NEXUS_TO_PYTHON_DATA_TYPES = {
"ISO8601": (str,),
"NX_BINARY": (
bytes,
bytearray,
np.ndarray,
*np_bytes,
),
"NX_BOOLEAN": (bool, np.ndarray, *np_bool),
"NX_CHAR": (str, np.ndarray, *np_char),
"NX_BOOLEAN": (bool, *np_bool),
"NX_CHAR": (str, *np_char),
"NX_DATE_TIME": (str,),
"NX_FLOAT": (float, np.ndarray, *np_float),
"NX_INT": (int, np.ndarray, *np_int),
"NX_UINT": (np.ndarray, np.unsignedinteger),
"NX_FLOAT": (float, *np_float),
"NX_INT": (int, *np_int),
"NX_UINT": (
np.unsignedinteger,
np.uint,
),
"NX_NUMBER": (
int,
float,
np.ndarray,
*np_int,
*np_float,
dict,
),
"NX_POSINT": (
int,
np.ndarray,
np.signedinteger,
), # > 0 is checked in is_valid_data_field()
"NX_COMPLEX": (complex, np.ndarray, *np_complex),
"NXDL_TYPE_UNAVAILABLE": (str,), # Defaults to a string if a type is not provided.
"NX_COMPLEX": (complex, *np_complex),
"NXDL_TYPE_UNAVAILABLE": (
str,
*np_char,
), # Defaults to a string if a type is not provided.
"NX_CHAR_OR_NUMBER": (
str,
int,
float,
np.ndarray,
*np_char,
*np_int,
*np_float,
dict,
),
}


def check_all_children_for_callable(objects: list, check: Callable, *args) -> bool:
"""Checks whether all objects in list are validated by given callable."""
for obj in objects:
if not check(obj, *args):
return False
def check_all_children_for_callable(
objects: Union[list, np.ndarray],
checker: Optional[Callable] = None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to rename this to callable

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or test_function

accepted_types: Optional[tuple] = None,
) -> bool:
"""Checks whether all objects in list or numpy array are validated
by given callable and types.
"""

return True
if checker is not None:
for obj in objects:
args = (obj, accepted_types) if accepted_types is not None else (obj,)
if not checker(*args):
return False
return True
if isinstance(objects, tuple):
return False
if isinstance(objects, list):
# Handles list and list of list
return all([type(elem) in accepted_types for elem in objects])
if isinstance(objects, np.ndarray):
return any([np.issubdtype(objects.dtype, type_) for type_ in accepted_types])

return False


def is_valid_data_type(value, accepted_types):
"""Checks whether the given value or its children are of an accepted type."""
if not isinstance(value, list):

if not isinstance(value, (list, np.ndarray)):
return isinstance(value, accepted_types)

return check_all_children_for_callable(value, isinstance, accepted_types)
return check_all_children_for_callable(objects=value, accepted_types=accepted_types)


def is_positive_int(value):
Expand All @@ -665,56 +670,53 @@ def is_greater_than(num):
return num.flat[0] > 0 if isinstance(num, np.ndarray) else num > 0

if isinstance(value, list):
return check_all_children_for_callable(value, is_greater_than)
return check_all_children_for_callable(objects=value, checker=is_greater_than)

return value.flat[0] > 0 if isinstance(value, np.ndarray) else value > 0


def convert_str_to_bool_safe(value):
def convert_str_to_bool_safe(value: str) -> Optional[bool]:
"""Only returns True or False if someone mistakenly adds quotation marks but mean a bool.

For everything else it returns None.
For everything else it raises a ValueError.
"""
if value.lower() == "true":
return True
if value.lower() == "false":
return False
return None
raise ValueError(f"Could not interpret string '{value}' as boolean.")


def is_valid_data_field(value, nxdl_type, path):
# todo: Check this funciton and wtire test for it. It seems the funciton is not
def is_valid_data_field(value: Any, nxdl_type: str, nxdl_enum: list, path: str) -> Any:
# todo: Check this function and write test for it. It seems the function is not
# working as expected.
"""Checks whether a given value is valid according to what is defined in the NXDL.

This function will also try to convert typical types, for example int to float,
and return the successful conversion.
"""Checks whether a given value is valid according to the type defined in the NXDL.

If it fails to convert, it raises an Exception.
This function only tries to convert boolean value in str format (e.g. "true" ) to
python Boolean (True). In case, it fails to convert, it raises an Exception.

Returns two values: first, boolean (True if the the value corresponds to nxdl_type,
False otherwise) and second, result of attempted conversion or the original value
(if conversion is not needed or impossible)
Return:
value: the possibly converted data value
"""
accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type]
output_value = value

accepted_types = NEXUS_TO_PYTHON_DATA_TYPES[nxdl_type]
# Do not count the dict as it represents a link value
if not isinstance(value, dict) and not is_valid_data_type(value, accepted_types):
try:
if accepted_types[0] is bool and isinstance(value, str):
# try to convert string to bool
if accepted_types[0] is bool and isinstance(value, str):
try:
value = convert_str_to_bool_safe(value)
if value is None:
raise ValueError
output_value = accepted_types[0](value)
except ValueError:
except (ValueError, TypeError):
collector.collect_and_log(
path, ValidationProblem.InvalidType, accepted_types, nxdl_type
)
else:
collector.collect_and_log(
path, ValidationProblem.InvalidType, accepted_types, nxdl_type
)
return False, value

if nxdl_type == "NX_POSINT" and not is_positive_int(value):
collector.collect_and_log(path, ValidationProblem.IsNotPosInt, value)
return False, value

if nxdl_type in ("ISO8601", "NX_DATE_TIME"):
iso8601 = re.compile(
Expand All @@ -724,9 +726,16 @@ def is_valid_data_field(value, nxdl_type, path):
results = iso8601.search(value)
if results is None:
collector.collect_and_log(path, ValidationProblem.InvalidDatetime, value)
return False, value

return True, output_value
# Check enumeration
if nxdl_enum is not None and value not in nxdl_enum:
collector.collect_and_log(
path,
ValidationProblem.InvalidEnum,
nxdl_enum,
)

return value


@lru_cache(maxsize=None)
Expand Down
17 changes: 13 additions & 4 deletions src/pynxtools/dataconverter/nexus_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ class NexusEntity(NexusNode):
type: Literal["field", "attribute"]
unit: Optional[NexusUnitCategory] = None
dtype: NexusType = "NX_CHAR"
items: Optional[List[str]] = None
items: Optional[List[Any]] = None
shape: Optional[Tuple[Optional[int], ...]] = None

def _set_type(self):
Expand Down Expand Up @@ -790,14 +790,23 @@ def _set_items(self):
based on the values in the inheritance chain.
The first vale found is used.
"""
if not self.dtype == "NX_CHAR":
return
for elem in self.inheritance:
enum = elem.find(f"nx:enumeration", namespaces=namespaces)
if enum is not None:
self.items = []
for items in enum.findall(f"nx:item", namespaces=namespaces):
self.items.append(items.attrib["value"])
value = items.attrib["value"]
if value[0] == "[" and value[-1] == "]":
import ast

try:
self.items.append(ast.literal_eval(value))
except (ValueError, SyntaxError):
raise Exception(
f"Error parsing enumeration item in the provided NXDL: {value}"
)
else:
self.items.append(value)
return

def _set_shape(self):
Expand Down
Loading
Loading