diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a878c2..e63edd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## Version 3.4.0 ## -In this release, test coverage is 74%. +In this release, test coverage is 76%. 💥 New features: * `dataset.io.h5fmt.HDF5Reader.read` method: added new `default` argument to set @@ -16,6 +16,11 @@ In this release, test coverage is 74%. automatically accepted or not (default is `None`, meaning no automatic acceptance): this allows more coverage of the test suite. For now, this attribute has only been proven useful in `tests/dataset/test_all_features.py`. + * Added unit tests for HDF5 and JSON serialization/deserialization: + * Testing an arbitrary data model saved/loaded to/from HDF5 and JSON files, + with various data sets and other data types. + * Testing for backward compatibility with previous versions of the data model + (e.g. new attributes, removed attributes, etc.) API breaking changes: * `guidata.dataset.io` module is now deprecated and will be removed in a future diff --git a/guidata/io/h5fmt.py b/guidata/io/h5fmt.py index f915495..77fec73 100644 --- a/guidata/io/h5fmt.py +++ b/guidata/io/h5fmt.py @@ -25,11 +25,11 @@ class TypeConverter: """Handles conversion between types for HDF5 serialization. Args: - to_type (Any): The target type for the HDF5 representation. - from_type (Any | None): The original type from the HDF5 representation. - Defaults to `to_type` if not specified. + to_type: The target type for the HDF5 representation. + from_type: The original type from the HDF5 representation. + Defaults to `to_type` if not specified. - Note: + .. note:: Instances of this class are used to ensure data consistency when serializing and deserializing data to and from HDF5 format. """ @@ -46,10 +46,10 @@ def to_hdf(self, value: Any) -> Any: """Converts the value to the target type for HDF5 serialization. Args: - value (Any): The value to be converted. + value: The value to be converted. Returns: - Any: The converted value in the target type. + The converted value in the target type. Raises: Exception: If the conversion to the target type fails. @@ -64,10 +64,10 @@ def from_hdf(self, value: Any) -> Any: """Converts the value from the HDF5 representation to target type. Args: - value (Any): The HDF5 value to be converted. + value: The HDF5 value to be converted. Returns: - Any: The converted value in the original type. + The converted value in the original type. """ return self._from_type(value) @@ -76,13 +76,13 @@ class Attr: """Helper class representing class attribute for HDF5 serialization. Args: - hdf_name (str): Name of the attribute in the HDF5 file. - struct_name (str | None): Name of the attribute in the object. - Defaults to `hdf_name` if not specified. - type (TypeConverter | None): Attribute type. If None, type is guessed. - optional (bool): If True, attribute absence will not raise error. + hdf_name: Name of the attribute in the HDF5 file. + struct_name: Name of the attribute in the object. + Defaults to `hdf_name` if not specified. + type: Attribute type. If None, type is guessed. + optional: If True, attribute absence will not raise error. - Note: + .. note:: This class manages serialization and deserialization of the object's attributes to and from HDF5 format. """ @@ -103,10 +103,10 @@ def get_value(self, struct: Any) -> Any: """Get the value of the attribute from the object. Args: - struct (Any): The object to extract the attribute from. + struct: The object to extract the attribute from. Returns: - Any: The value of the attribute. + The value of the attribute. """ if self.optional: return getattr(struct, self.struct_name, None) @@ -116,8 +116,8 @@ def set_value(self, struct: Any, value: Any) -> None: """Set the value of the attribute in the object. Args: - struct (Any): The object to set the attribute value in. - value (Any): The value to set. + struct: The object to set the attribute value in. + value: The value to set. """ setattr(struct, self.struct_name, value) @@ -125,8 +125,8 @@ def save(self, group: h5py.Group, struct: Any) -> None: """Save the attribute to an HDF5 group. Args: - group (h5py.Group): The HDF5 group to save the attribute to. - struct (Any): The object to save the attribute from. + group: The HDF5 group to save the attribute to. + struct: The object to save the attribute from. Raises: Exception: If an error occurs while saving the attribute. @@ -148,8 +148,8 @@ def load(self, group: h5py.Group, struct: Any) -> None: """Load the attribute from an HDF5 group into an object. Args: - group (h5py.Group): The HDF5 group to load the attribute from. - struct (Any): The object to load the attribute into. + group: The HDF5 group to load the attribute from. + struct: The object to load the attribute into. Raises: KeyError: If the attribute is not found in the HDF5 group. @@ -171,9 +171,9 @@ def createdset(group: h5py.Group, name: str, value: np.ndarray | list) -> None: Creates a dataset in the provided HDF5 group. Args: - group (h5py.Group): The group in the HDF5 file to add the dataset to. - name (str): The name of the dataset. - value (np.ndarray or list): The data to be stored in the dataset. + group: The group in the HDF5 file to add the dataset to. + name: The name of the dataset. + value: The data to be stored in the dataset. Returns: None @@ -187,13 +187,11 @@ class Dset(Attr): Handles the conversion of the scalar value, if any. Args: - hdf_name (str): The name of the HDF5 attribute. - struct_name (str): The name of the structure. Defaults to None. - type (type): The expected data type of the attribute. - Defaults to None. - scalar (Callable): Function to convert the scalar value, if any. - Defaults to None. - optional (bool): Whether the attribute is optional. Defaults to False. + hdf_name: The name of the HDF5 attribute. + struct_name: The name of the structure. Defaults to None. + type: The expected data type of the attribute. Defaults to None. + scalar: Function to convert the scalar value, if any. Defaults to None. + optional: Whether the attribute is optional. Defaults to False. """ def __init__( @@ -212,8 +210,8 @@ def save(self, group: h5py.Group, struct: Any) -> None: Save the attribute to the given HDF5 group. Args: - group (h5py.Group): The group in the HDF5 file to save the attribute to. - struct (Any): The structure containing the attribute. + group: The group in the HDF5 file to save the attribute to. + struct: The structure containing the attribute. """ value = self.get_value(struct) if isinstance(value, float): @@ -238,8 +236,8 @@ def load(self, group: h5py.Group, struct: Any) -> None: Load the attribute from the given HDF5 group. Args: - group (h5py.Group): The group in the HDF5 file to load the attribute from. - struct (Any): The structure to load the attribute into. + group: The group in the HDF5 file to load the attribute from. + struct: The structure to load the attribute into. Raises: KeyError: If the attribute cannot be found in the HDF5 group. @@ -265,13 +263,11 @@ class Dlist(Dset): handle lists specifically. Args: - hdf_name (str): The name of the HDF5 attribute. - struct_name (str): The name of the structure. Defaults to None. - type (type): The expected data type of the attribute. - Defaults to None. - scalar (Callable): Function to convert the scalar value, if any. - Defaults to None. - optional (bool): Whether the attribute is optional. Defaults to False. + hdf_name: The name of the HDF5 attribute. + struct_name: The name of the structure. Defaults to None. + type: The expected data type of the attribute. Defaults to None. + scalar: Function to convert the scalar value, if any. Defaults to None. + optional: Whether the attribute is optional. Defaults to False. """ def get_value(self, struct: Any) -> np.ndarray: @@ -279,11 +275,10 @@ def get_value(self, struct: Any) -> np.ndarray: Returns the value of the attribute in the given structure as a numpy array. Args: - struct (Any): The structure containing the attribute. + struct: The structure containing the attribute. Returns: - np.ndarray: The value of the attribute in the given structure as a - numpy array. + The value of the attribute in the given structure as a numpy array. """ return np.array(getattr(struct, self.struct_name)) @@ -293,9 +288,8 @@ def set_value(self, struct: Any, value: np.ndarray) -> None: the values of the given numpy array. Args: - struct (Any): The structure in which to set the attribute. - value (np.ndarray): A numpy array containing the values to set the - attribute to. + struct: The structure in which to set the attribute. + value: A numpy array containing the values to set the attribute to. """ setattr(struct, self.struct_name, list(value)) @@ -309,7 +303,7 @@ class H5Store: Class for managing HDF5 files. Args: - filename (str): The name of the HDF5 file. + filename: The name of the HDF5 file. """ def __init__(self, filename: str) -> None: @@ -321,10 +315,10 @@ def open(self, mode: str = "a") -> h5py._hl.files.File: Opens an HDF5 file in the given mode. Args: - mode (str): The mode in which to open the file. Defaults to "a". + mode: The mode in which to open the file. Defaults to "a". Returns: - h5py._hl.files.File: The opened HDF5 file. + The opened HDF5 file. Raises: Exception: If there is an error while trying to open the file. @@ -357,7 +351,7 @@ def __enter__(self) -> "H5Store": Support for 'with' statement. Returns: - H5Store: The instance of the class itself. + The instance of the class itself. """ return self @@ -372,11 +366,11 @@ def generic_save(self, parent: Any, source: Any, structure: list[Attr]) -> None: Saves the data from source into the file using 'structure' as a descriptor. Args: - parent (Any): The parent HDF5 group. - source (Any): The source of the data to save. - structure (List[Attr]): A list of attribute descriptors (Attr, Dset, - Dlist, etc.) that describe the conversion of data and the names - of the attributes in the source and in the file. + parent: The parent HDF5 group. + source: The source of the data to save. + structure: A list of attribute descriptors (Attr, Dset, Dlist, etc.) that + describes the conversion of data and the names of the attributes in the + source and in the file. """ for instr in structure: instr.save(parent, source) @@ -386,11 +380,11 @@ def generic_load(self, parent: Any, dest: Any, structure: list[Attr]) -> None: Loads the data from the file into 'dest' using 'structure' as a descriptor. Args: - parent (Any): The parent HDF5 group. - dest (Any): The destination to load the data into. - structure (List[Attr]): A list of attribute descriptors (Attr, Dset, - Dlist, etc.) that describe the conversion of data and the names - of the attributes in the file and in the destination. + parent: The parent HDF5 group. + dest: The destination to load the data into. + structure: A list of attribute descriptors (Attr, Dset, Dlist, etc.) that + describes the conversion of data and the names of the attributes in the + file and in the destination. Raises: Exception: If there is an error while trying to load an item. @@ -413,7 +407,7 @@ class HDF5Handler(H5Store, BaseIOHandler): Base HDF5 I/O Handler object. Inherits from H5Store and BaseIOHandler. Args: - filename (str): The name of the HDF5 file. + filename: The name of the HDF5 file. """ def __init__(self, filename: str) -> None: @@ -425,7 +419,7 @@ def get_parent_group(self) -> h5py._hl.group.Group: Returns the parent group in the HDF5 file based on the current option. Returns: - h5py._hl.group.Group: The parent group in the HDF5 file. + The parent group in the HDF5 file. """ parent = self.h5 for option in self.option[:-1]: @@ -442,7 +436,7 @@ class HDF5Writer(HDF5Handler, WriterMixin): Writer for HDF5 files. Inherits from HDF5Handler and WriterMixin. Args: - filename (str): The name of the HDF5 file. + filename: The name of the HDF5 file. """ def __init__(self, filename: str) -> None: @@ -453,9 +447,9 @@ def write(self, val: Any, group_name: str | None = None) -> None: """ Write a value depending on its type, optionally within a named group. Args: - val (Any): The value to be written. - group_name (Optional[str]): The name of the group. If provided, the group - context will be used for writing the value. + val: The value to be written. + group_name: The name of the group. If provided, the group + context will be used for writing the value. """ if group_name: self.begin(group_name) @@ -492,7 +486,7 @@ def write_any(self, val: Any) -> None: Write the value to the HDF5 file as an attribute. Args: - val (Any): The value to write. + val: The value to write. """ group = self.get_parent_group() group.attrs[self.option[-1]] = val @@ -504,7 +498,7 @@ def write_bool(self, val: bool) -> None: Write the boolean value to the HDF5 file as an attribute. Args: - val (bool): The boolean value to write. + val: The boolean value to write. """ self.write_int(int(val)) @@ -513,7 +507,7 @@ def write_array(self, val: np.ndarray) -> None: Write the numpy array value to the HDF5 file. Args: - val (np.ndarray): The numpy array value to write. + val: The numpy array value to write. """ group = self.get_parent_group() group[self.option[-1]] = val @@ -544,7 +538,7 @@ def write_dict(self, val: dict[str, Any]) -> None: """Write dictionary to h5 file Args: - val (dict[str, Any]): dictionary to write + val: dictionary to write """ # Check if keys are all strings, raise an error if not if not all(isinstance(key, str) for key in val.keys()): @@ -562,9 +556,8 @@ def write_object_list(self, seq: Sequence[Any] | None, group_name: str) -> None: Objects must implement the DataSet-like `serialize` method. Args: - seq (Sequence[Any]): The object sequence to write. - Defaults to None. - group_name (str): The name of the group in which to write the objects. + seq: The object sequence to write. Defaults to None. + group_name: The name of the group in which to write the objects. """ with self.group(group_name): if seq is None: @@ -594,7 +587,7 @@ class HDF5Reader(HDF5Handler): Reader for HDF5 files. Inherits from HDF5Handler. Args: - filename (str): The name of the HDF5 file. + filename: The name of the HDF5 file. """ def __init__(self, filename: str): @@ -652,7 +645,7 @@ def read_any(self) -> str | bytes: Read a value from the current group as a generic type. Returns: - Union[str, bytes]: The read value. + The read value. """ group = self.get_parent_group() try: @@ -673,7 +666,7 @@ def read_bool(self) -> bool | None: Read a boolean value from the current group. Returns: - Optional[bool]: The read boolean value. + The read boolean value, or None if the value is not found. """ val = self.read_any() if val != "": @@ -684,7 +677,7 @@ def read_int(self) -> int | None: Read an integer value from the current group. Returns: - Optional[int]: The read integer value. + The read integer value, or None if the value is not found. """ val = self.read_any() if val != "": @@ -695,7 +688,7 @@ def read_float(self) -> float | None: Read a float value from the current group. Returns: - Optional[float]: The read float value. + The read float value, or None if the value is not found. """ val = self.read_any() if val != "": @@ -708,7 +701,7 @@ def read_array(self) -> np.ndarray: Read a numpy array from the current group. Returns: - np.ndarray: The read numpy array. + The read numpy array. """ group = self.get_parent_group() return group[self.option[-1]][...] @@ -754,7 +747,7 @@ def read_dict(self) -> dict[str, Any]: """Read dictionary from h5 file Returns: - dict[str, Any]: dictionary read from h5 file + Dictionary read from h5 file """ group = self.get_parent_group() dict_group = group[self.option[-1]] @@ -796,16 +789,17 @@ def read_object_list( klass: type[Any], progress_callback: Callable[[int], bool] | None = None, ) -> list[Any]: - """ - Read an object sequence from a group. + """Read an object sequence from a group. Objects must implement the DataSet-like `deserialize` method. `klass` is the object class which constructor requires no argument. - progress_callback: if not None, this function is called with - an integer argument (progress: 0 --> 100). Function returns the - `cancel` state (True: progress dialog has been canceled, False - otherwise) + Args: + group_name: The name of the group to read the object sequence from. + klass: The object class which constructor requires no argument. + progress_callback: A function to call with an integer argument (progress: + 0 --> 100). The function returns the `cancel` state (True: progress + dialog has been canceled, False otherwise). """ with self.group(group_name): try: diff --git a/guidata/io/jsonfmt.py b/guidata/io/jsonfmt.py index 7324746..d95ab28 100644 --- a/guidata/io/jsonfmt.py +++ b/guidata/io/jsonfmt.py @@ -12,8 +12,12 @@ # pylint: disable=invalid-name # Allows short reference names like x, y, ... +from __future__ import annotations + import json import os +from collections.abc import Callable, Sequence +from typing import Any from uuid import uuid1 import numpy as np @@ -24,7 +28,7 @@ class CustomJSONEncoder(json.JSONEncoder): """Custom JSON Encoder""" - def default(self, o): + def default(self, o: Any) -> Any: """Override JSONEncoder method""" if isinstance(o, np.ndarray): olist = o.tolist() @@ -46,10 +50,10 @@ def default(self, o): class CustomJSONDecoder(json.JSONDecoder): """Custom JSON Decoder""" - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) - def __iterate_dict(self, obj): + def __iterate_dict(self, obj: Any) -> Any: """Iterate dictionaries""" if isinstance(obj, list) and len(obj) == 3: family, data, dtypestr = obj @@ -68,7 +72,7 @@ def __iterate_dict(self, obj): obj[key] = self.__iterate_dict(value) return obj - def object_hook(self, obj: dict): # pylint: disable=E0202 + def object_hook(self, obj: dict) -> dict: # pylint: disable=E0202 """Object hook""" for key, value in list(obj.items()): obj[key] = self.__iterate_dict(value) @@ -82,42 +86,57 @@ class JSONHandler(BaseIOHandler): filename: JSON filename (if None, use `jsontext` attribute) """ - def __init__(self, filename=None): + def __init__(self, filename: str | None = None) -> None: super().__init__() self.jsondata = {} - self.jsontext = None + self.jsontext: str | None = None self.filename = filename - def get_parent_group(self): + def get_parent_group(self) -> dict: """Get parent group""" parent = self.jsondata for option in self.option[:-1]: parent = parent.setdefault(option, {}) return parent - def set_json_dict(self, jsondata: dict): - """Set JSON data dictionary""" + def set_json_dict(self, jsondata: dict) -> None: + """Set JSON data dictionary + + Args: + jsondata: JSON data dictionary + """ self.jsondata = jsondata def get_json_dict(self) -> dict: """Return JSON data dictionary""" return self.jsondata - def get_json(self, indent=None): - """Get JSON string""" + def get_json(self, indent: int | None = None) -> str | None: + """Get JSON string + + Args: + indent: Indentation level + + Returns: + JSON string + """ if self.jsondata is not None: return json.dumps(self.jsondata, indent=indent, cls=CustomJSONEncoder) return None - def load(self): + def load(self) -> None: """Load JSON file""" if self.filename is not None: with open(self.filename, mode="rb") as fdesc: self.jsontext = fdesc.read().decode() self.jsondata = json.loads(self.jsontext, cls=CustomJSONDecoder) - def save(self, path=None): - """Save JSON file""" + def save(self, path: str | None = None) -> None: + """Save JSON file + + Args: + path: Path to save the JSON file (if None, implies current directory) + """ if self.filename is not None: filepath = self.filename if path: @@ -125,19 +144,19 @@ def save(self, path=None): with open(filepath, mode="wb") as fdesc: fdesc.write(self.get_json(indent=4).encode()) - def close(self): + def close(self) -> None: """Expected close method: do nothing for JSON I/O handler classes""" class JSONWriter(JSONHandler, WriterMixin): """Class handling JSON serialization""" - def write_any(self, val): + def write_any(self, val) -> None: """Write any value type""" group = self.get_parent_group() group[self.option[-1]] = val - def write_none(self): + def write_none(self) -> None: """Write None""" self.write_any(None) @@ -145,9 +164,15 @@ def write_none(self): write_array ) = write_any - def write_object_list(self, seq, group_name): - """Write object sequence in group. - Objects must implement the DataSet-like `serialize` method""" + def write_object_list(self, seq: Sequence[Any] | None, group_name: str) -> None: + """ + Write an object sequence to the HDF5 file in a group. + Objects must implement the DataSet-like `serialize` method. + + Args: + seq: The object sequence to write. Defaults to None. + group_name: The name of the group in which to write the objects. + """ with self.group(group_name): if seq is None: self.write_none() @@ -164,6 +189,12 @@ def write_object_list(self, seq, group_name): self.write(ids, "IDs") +class NoDefault: + """Class to represent the absence of a default value.""" + + pass + + class JSONReader(JSONHandler): """Class handling JSON deserialization @@ -171,7 +202,7 @@ class JSONReader(JSONHandler): fname_or_jsontext: JSON filename or JSON text """ - def __init__(self, fname_or_jsontext): + def __init__(self, fname_or_jsontext: str) -> None: """JSONReader constructor""" JSONHandler.__init__(self, fname_or_jsontext) if fname_or_jsontext is not None and not os.path.isfile(fname_or_jsontext): @@ -179,44 +210,74 @@ def __init__(self, fname_or_jsontext): self.jsontext = fname_or_jsontext self.load() - def read(self, group_name=None, func=None, instance=None): - """Read value within current group or group_name. - - Optional argument `instance` is an object which - implements the DataSet-like `deserialize` method.""" + def read( + self, + group_name: str | None = None, + func: Callable[[], Any] | None = None, + instance: Any | None = None, + default: Any | NoDefault = NoDefault, + ) -> Any: + """ + Read a value from the current group or specified group_name. + + Args: + group_name: The name of the group to read from. Defaults to None. + func: The function to use for reading the value. Defaults to None. + instance: An object that implements the DataSet-like `deserialize` method. + Defaults to None. + default: The default value to return if the value is not found. + Defaults to `NoDefault` (no default value: raises an exception if the + value is not found). + + Returns: + The read value. + """ if group_name: self.begin(group_name) - if instance is None: - if func is None: - func = self.read_any - val = func() - else: - group = self.get_parent_group() - if group_name not in group: - # This is an attribute (not a group), meaning that - # the object was None when deserializing it - val = None + try: + if instance is None: + if func is None: + func = self.read_any + val = func() else: - instance.deserialize(self) - val = instance + group = self.get_parent_group() + if group_name not in group: + # This is an attribute (not a group), meaning that + # the object was None when deserializing it + val = None + else: + instance.deserialize(self) + val = instance + except Exception: # pylint:disable=broad-except + if default is NoDefault: + raise + val = default if group_name: self.end(group_name) return val - def read_any(self): + def read_any(self) -> Any: """Read any value type""" group = self.get_parent_group() return group[self.option[-1]] - def read_object_list(self, group_name, klass, progress_callback=None): - """Read object sequence in group. + def read_object_list( + self, + group_name: str, + klass: type[Any], + progress_callback: Callable[[int], bool] | None = None, + ) -> list[Any]: + """Read an object sequence from a group. + Objects must implement the DataSet-like `deserialize` method. `klass` is the object class which constructor requires no argument. - progress_callback: if not None, this function is called with - an integer argument (progress: 0 --> 100). Function returns the - `cancel` state (True: progress dialog has been canceled, False - otherwise) + Args: + group_name: The name of the group to read the object sequence from. + klass: The object class which constructor requires no argument. + progress_callback: A function to call with an integer argument (progress: + 0 --> 100). The function returns the `cancel` state (True: progress + dialog has been canceled, False otherwise). """ with self.group(group_name): try: diff --git a/guidata/tests/unit/test_h5fmt.py b/guidata/tests/unit/test_h5fmt.py new file mode 100644 index 0000000..22c2686 --- /dev/null +++ b/guidata/tests/unit/test_h5fmt.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +# +# Licensed under the terms of the BSD 3-Clause +# (see guidata/LICENSE for details) + +""" +Test HDF5 I/O +------------- + +Testing various use cases of HDF5 I/O: + +* Serialize and deserialize a data model, handling versioning and compatibility breaks. +""" + +from __future__ import annotations + +import atexit +import os +import os.path as osp + +import guidata.dataset as gds +from guidata.env import execenv +from guidata.io import HDF5Reader, HDF5Writer + + +# The following class represents a data model that we want to serialize and deserialize. +# This is the first version of the data model. +class MyFirstDataSetV10(gds.DataSet): + """First data set version 1.0""" + + alpha = gds.FloatItem("Alpha", default=0.0) + number = gds.IntItem("Number", default=0) + text = gds.StringItem("Text", default="") + + +class MySecondDataSetV10(gds.DataSet): + """Second data set version 1.0""" + + length = gds.FloatItem("Length", default=0.0) + duration = gds.IntItem("Duration", default=0) + + +class MyDataObjectV10: + """Data object version 1.0""" + + def __init__(self, title: str = "") -> None: + self.title = title + self.metadata = {"author": "John Doe", "age": 24, "skills": ["Python", "C++"]} + + def __str__(self) -> str: + """Return the string representation of the object""" + return f"{self.__class__.__name__}({self.title})" + + def serialize(self, writer: HDF5Writer) -> None: + """Serialize the data model to an HDF5 file""" + writer.write(self.title, "title") + with writer.group("metadata"): + writer.write_dict(self.metadata) + + def deserialize(self, reader: HDF5Reader) -> None: + """Deserialize the data model from an HDF5 file""" + self.title = reader.read("title") + with reader.group("metadata"): + self.metadata = reader.read_dict() + + +class MyDataModelV10: + """Data model version 1.0""" + + VERSION = "1.0" + MYDATAOBJCLASS = MyDataObjectV10 + MYDATASETCLASS1 = MyFirstDataSetV10 + MYDATASETCLASS2 = MySecondDataSetV10 + + def __init__(self) -> None: + self.obj1 = MyDataObjectV10("first_obj_title") + self.obj2 = MyDataObjectV10("second_obj_title") + self.obj3 = MyDataObjectV10("third_obj_title") + self.param1 = MyFirstDataSetV10() + self.param2 = MySecondDataSetV10() + + def __str__(self) -> str: + """Return the string representation of the object""" + text = f"{self.__class__.__name__}:" + text += f"\n {self.obj1}" + text += f"\n {self.obj2}" + text += f"\n {self.obj3}" + text += f"\n {self.param1}" + text += f"\n {self.param2}" + return text + + def save(self, filename: str) -> None: + """Save the data model from an HDF5 file""" + objs = [self.obj1, self.obj2] + writer = HDF5Writer(filename) + writer.write(self.VERSION, "created_version") + writer.write_object_list(objs, "ObjList") + writer.write(self.obj3, "IndividualObj") + writer.write(self.param1, "Param1") + writer.write(self.param2, "Param2") + writer.close() + + def load(self, filename: str) -> None: + """Load the data model to an HDF5 file""" + reader = HDF5Reader(filename) + created_version = reader.read("created_version") + self.obj1, self.obj2 = reader.read_object_list("ObjList", self.MYDATAOBJCLASS) + self.obj3 = reader.read("IndividualObj", self.MYDATAOBJCLASS) + self.param1 = reader.read("Param1", self.MYDATASETCLASS1) + self.param2 = reader.read("Param2", self.MYDATASETCLASS2) + execenv.print("Created version:", created_version) + execenv.print("Current version:", self.VERSION) + execenv.print("Model data:", self) + reader.close() + + +# The following class represents a new version of the data model: let's assume that +# it replaces the previous version and we want to be able to deserialize the old +# version as well as the new version. +class MyFirstDataSetV11(MyFirstDataSetV10): + """First data set version 1.1""" + + # Adding a new item + beta = gds.FloatItem("Beta", default=0.0) + + +class MySecondDataSetV11(gds.DataSet): + """Second data set version 1.1""" + + # Redefining the data set with new items (replacing the previous version) + width = gds.FloatItem("Width", default=10.0) + height = gds.FloatItem("Height", default=20.0) + + +class MyDataObjectV11(MyDataObjectV10): + """Data object version 1.1""" + + def __init__(self, title: str = "", subtitle: str = "") -> None: + super().__init__(title) + self.subtitle = subtitle # New attribute + + def __str__(self) -> str: + """Return the string representation of the object""" + return f"{self.__class__.__name__}({self.title}, {self.subtitle})" + + def serialize(self, writer: HDF5Writer): + """Serialize the data model to an HDF5 file""" + super().serialize(writer) + writer.write(self.subtitle, "subtitle") + + def deserialize(self, reader: HDF5Reader): + """Deserialize the data model from an HDF5 file""" + super().deserialize(reader) + # Handling compatibility with the previous version is done by providing a + # default value for the new attribute: + self.subtitle = reader.read("subtitle", default="") + + +class MyDataModelV11(MyDataModelV10): + """Data model version 1.1""" + + VERSION = "1.1" + MYDATAOBJCLASS = MyDataObjectV11 + MYDATASETCLASS1 = MyFirstDataSetV11 + MYDATASETCLASS2 = MySecondDataSetV11 + + def __init__(self) -> None: + self.obj1 = MyDataObjectV11("first_obj_title") + self.obj2 = MyDataObjectV11("second_obj_title") + self.obj3 = MyDataObjectV11("third_obj_title") + self.param1 = MyFirstDataSetV11() + self.param2 = MySecondDataSetV11() + + +def test_hdf5_datamodel_compatiblity(): + """Test HDF5 I/O with data model compatibility""" + path = osp.abspath("test.h5") + atexit.register(lambda: os.unlink(path)) + # Serialize the first version of the data model + model_v10 = MyDataModelV10() + model_v10.save(path) + # Deserialize the first version of the data model + model_v10.load(path) + # Deserialize using the new version of the data model + model_v11 = MyDataModelV11() + model_v11.load(path) + + +if __name__ == "__main__": + test_hdf5_datamodel_compatiblity() diff --git a/guidata/tests/unit/test_jsonfmt.py b/guidata/tests/unit/test_jsonfmt.py new file mode 100644 index 0000000..e4f2ac1 --- /dev/null +++ b/guidata/tests/unit/test_jsonfmt.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +# +# Licensed under the terms of the BSD 3-Clause +# (see guidata/LICENSE for details) + +""" +Test JSON I/O +------------- + +Testing various use cases of JSON I/O: + +* Serialize and deserialize a data model, handling versioning and compatibility breaks. +""" + +from __future__ import annotations + +import os.path as osp + +from guidata.env import execenv +from guidata.io import JSONReader, JSONWriter + + +# The following class represents a data model that we want to serialize and deserialize. +# This is the first version of the data model. +class MyDataObjectV10: + """Data object version 1.0""" + + def __init__(self, title: str = "") -> None: + self.title = title + + def __str__(self) -> str: + """Return the string representation of the object""" + return f"{self.__class__.__name__}({self.title})" + + def serialize(self, writer: JSONWriter) -> None: + """Serialize the data model to an JSON file""" + writer.write(self.title, "title") + + def deserialize(self, reader: JSONReader) -> None: + """Deserialize the data model from an JSON file""" + self.title = reader.read("title") + + +class MyDataModelV10: + """Data model version 1.0""" + + VERSION = "1.0" + MYDATAOBJCLASS = MyDataObjectV10 + + def __init__(self) -> None: + self.obj1 = MyDataObjectV10("first_obj_title") + self.obj2 = MyDataObjectV10("second_obj_title") + + def __str__(self) -> str: + """Return the string representation of the object""" + return f"{self.__class__.__name__}({self.obj1}, {self.obj2})" + + def save(self, filename: str) -> None: + """Save the data model from an JSON file""" + objs = [self.obj1, self.obj2] + writer = JSONWriter(filename) + writer.write(self.VERSION, "created_version") + writer.write_object_list(objs, "ObjList") + writer.save() + + def load(self, filename: str) -> None: + """Load the data model to an JSON file""" + reader = JSONReader(filename) + created_version = reader.read("created_version") + self.obj1, self.obj2 = reader.read_object_list("ObjList", self.MYDATAOBJCLASS) + execenv.print("Created version:", created_version) + execenv.print("Current version:", self.VERSION) + execenv.print("Model data:", self) + reader.close() + + +# The following class represents a new version of the data model: let's assume that +# it replaces the previous version and we want to be able to deserialize the old +# version as well as the new version. +class MyDataObjectV11(MyDataObjectV10): + """Data object version 1.1""" + + def __init__(self, title: str = "", subtitle: str = "") -> None: + super().__init__(title) + self.subtitle = subtitle # New attribute + + def __str__(self) -> str: + """Return the string representation of the object""" + return f"{self.__class__.__name__}({self.title}, {self.subtitle})" + + def serialize(self, writer: JSONWriter): + """Serialize the data model to an JSON file""" + super().serialize(writer) + writer.write(self.subtitle, "subtitle") + + def deserialize(self, reader: JSONReader): + """Deserialize the data model from an JSON file""" + super().deserialize(reader) + # Handling compatibility with the previous version is done by providing a + # default value for the new attribute: + self.subtitle = reader.read("subtitle", default="") + + +class MyDataModelV11(MyDataModelV10): + """Data model version 1.1""" + + VERSION = "1.1" + MYDATAOBJCLASS = MyDataObjectV11 + + def __init__(self) -> None: + self.obj1 = MyDataObjectV11("first_obj_title") + self.obj2 = MyDataObjectV11("second_obj_title") + + +def test_json_datamodel_compatiblity(): + """Test JSON I/O with data model compatibility""" + path = osp.abspath("test.json") + # atexit.register(lambda: os.unlink(path)) + # Serialize the first version of the data model + model_v10 = MyDataModelV10() + model_v10.save(path) + # Deserialize the first version of the data model + model_v10.load(path) + # Deserialize using the new version of the data model + model_v11 = MyDataModelV11() + model_v11.load(path) + + +if __name__ == "__main__": + test_json_datamodel_compatiblity()