From 7085f0c324ab9e08918c17ab1fb476c3f4a0cb14 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Fri, 4 Oct 2024 22:21:08 +0200 Subject: [PATCH 1/6] docs(storage): add typing (#1235) --- openfisca_core/data_storage/__init__.py | 7 ++- .../data_storage/in_memory_storage.py | 20 ++++--- .../data_storage/on_disk_storage.py | 44 +++++++++----- openfisca_core/data_storage/types.py | 14 +++++ openfisca_core/indexed_enums/enum.py | 5 +- openfisca_core/indexed_enums/enum_array.py | 13 ++--- openfisca_core/indexed_enums/types.py | 4 +- openfisca_core/types.py | 58 +++++++++++++++++-- openfisca_tasks/lint.mk | 1 + 9 files changed, 124 insertions(+), 42 deletions(-) create mode 100644 openfisca_core/data_storage/types.py diff --git a/openfisca_core/data_storage/__init__.py b/openfisca_core/data_storage/__init__.py index e2b4d8911d..9f63047fbd 100644 --- a/openfisca_core/data_storage/__init__.py +++ b/openfisca_core/data_storage/__init__.py @@ -21,5 +21,8 @@ # # See: https://www.python.org/dev/peps/pep-0008/#imports -from .in_memory_storage import InMemoryStorage # noqa: F401 -from .on_disk_storage import OnDiskStorage # noqa: F401 +from . import types +from .in_memory_storage import InMemoryStorage +from .on_disk_storage import OnDiskStorage + +__all__ = ["InMemoryStorage", "OnDiskStorage", "types"] diff --git a/openfisca_core/data_storage/in_memory_storage.py b/openfisca_core/data_storage/in_memory_storage.py index e6a5a866ce..40b68df503 100644 --- a/openfisca_core/data_storage/in_memory_storage.py +++ b/openfisca_core/data_storage/in_memory_storage.py @@ -1,8 +1,14 @@ +from __future__ import annotations + +from collections.abc import KeysView, MutableMapping + import numpy from openfisca_core import periods from openfisca_core.periods import DateUnit +from . import types as t + class InMemoryStorage: """Storing and retrieving calculated vectors in memory. @@ -16,13 +22,13 @@ class InMemoryStorage: is_eternal: bool #: A dictionary containing data that has been stored in memory. - _arrays: dict + _arrays: MutableMapping[t.Period, t.Array[t.DTypeGeneric]] - def __init__(self, is_eternal=False) -> None: + def __init__(self, is_eternal: bool = False) -> None: self._arrays = {} self.is_eternal = is_eternal - def get(self, period): + def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: """Retrieve the data for the specified period from memory. Args: @@ -57,7 +63,7 @@ def get(self, period): return None return values - def put(self, value, period) -> None: + def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: """Store the specified data in memory for the specified period. Args: @@ -87,7 +93,7 @@ def put(self, value, period) -> None: self._arrays[period] = value - def delete(self, period=None) -> None: + def delete(self, period: None | t.Period = None) -> None: """Delete the data for the specified period from memory. Args: @@ -137,7 +143,7 @@ def delete(self, period=None) -> None: if not period.contains(period_item) } - def get_known_periods(self): + def get_known_periods(self) -> KeysView[t.Period]: """List of storage's known periods. Returns: @@ -161,7 +167,7 @@ def get_known_periods(self): return self._arrays.keys() - def get_memory_usage(self): + def get_memory_usage(self) -> t.MemoryUsage: """Memory usage of the storage. Returns: diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index 4b40756138..495820c789 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from collections.abc import KeysView, MutableMapping + import os import shutil @@ -7,6 +11,8 @@ from openfisca_core.indexed_enums import EnumArray from openfisca_core.periods import DateUnit +from . import types as t + class OnDiskStorage: """Storing and retrieving calculated vectors on disk. @@ -28,13 +34,16 @@ class OnDiskStorage: preserve_storage_dir: bool #: Mapping of file paths to possible Enum values. - _enums: dict + _enums: MutableMapping[str, type[t.Enum]] #: Mapping of periods to file paths. - _files: dict + _files: MutableMapping[t.Period, str] def __init__( - self, storage_dir, is_eternal=False, preserve_storage_dir=False + self, + storage_dir: str, + is_eternal: bool = False, + preserve_storage_dir: bool = False, ) -> None: self._files = {} self._enums = {} @@ -42,7 +51,7 @@ def __init__( self.preserve_storage_dir = preserve_storage_dir self.storage_dir = storage_dir - def _decode_file(self, file): + def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]: """Decode a file by loading its contents as a ``numpy`` array. Args: @@ -82,11 +91,15 @@ def _decode_file(self, file): """ enum = self._enums.get(file) + if enum is not None: return EnumArray(numpy.load(file), enum) - return numpy.load(file) - def get(self, period): + array: t.Array[t.DTypeGeneric] = numpy.load(file) + + return array + + def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: """Retrieve the data for the specified period from disk. Args: @@ -124,7 +137,7 @@ def get(self, period): return None return self._decode_file(values) - def put(self, value, period) -> None: + def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: """Store the specified data on disk for the specified period. Args: @@ -156,13 +169,13 @@ def put(self, value, period) -> None: filename = str(period) path = os.path.join(self.storage_dir, filename) + ".npy" - if isinstance(value, EnumArray): + if isinstance(value, EnumArray) and value.possible_values is not None: self._enums[path] = value.possible_values value = value.view(numpy.ndarray) numpy.save(path, value) self._files[period] = path - def delete(self, period=None) -> None: + def delete(self, period: None | t.Period = None) -> None: """Delete the data for the specified period from disk. Args: @@ -208,14 +221,13 @@ def delete(self, period=None) -> None: period = periods.period(DateUnit.ETERNITY) period = periods.period(period) - if period is not None: - self._files = { - period_item: value - for period_item, value in self._files.items() - if not period.contains(period_item) - } + self._files = { + period_item: value + for period_item, value in self._files.items() + if not period.contains(period_item) + } - def get_known_periods(self): + def get_known_periods(self) -> KeysView[t.Period]: """List of storage's known periods. Returns: diff --git a/openfisca_core/data_storage/types.py b/openfisca_core/data_storage/types.py new file mode 100644 index 0000000000..db71abbf57 --- /dev/null +++ b/openfisca_core/data_storage/types.py @@ -0,0 +1,14 @@ +from typing_extensions import TypedDict + +from openfisca_core.types import Array, DTypeGeneric, Enum, Period + + +class MemoryUsage(TypedDict, total=True): + """Memory usage information.""" + + cell_size: float + nb_arrays: int + total_nb_bytes: int + + +__all__ = ["Array", "DTypeGeneric", "Enum", "Period"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index ec1afa45a9..a6fd5d7f98 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -1,14 +1,13 @@ from __future__ import annotations -import enum - import numpy +from . import types as t from .config import ENUM_ARRAY_DTYPE from .enum_array import EnumArray -class Enum(enum.Enum): +class Enum(t.Enum): """Enum based on `enum34 `_, whose items have an index. """ diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 1b6c512b8e..a1479d5b82 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -1,17 +1,14 @@ from __future__ import annotations -import typing from typing import Any, NoReturn +from typing_extensions import Self import numpy from . import types as t -if typing.TYPE_CHECKING: - from openfisca_core.indexed_enums import Enum - -class EnumArray(numpy.ndarray): +class EnumArray(t.EnumArray): """NumPy array subclass representing an array of enum items. EnumArrays are encoded as ``int`` arrays to improve performance @@ -22,9 +19,9 @@ class EnumArray(numpy.ndarray): # https://docs.scipy.org/doc/numpy-1.13.0/user/basics.subclassing.html#slightly-more-realistic-example-attribute-added-to-existing-array. def __new__( cls, - input_array: t.Array[numpy.int16], - possible_values: type[Enum] | None = None, - ) -> EnumArray: + input_array: t.Array[t.DTypeEnum], + possible_values: None | type[t.Enum] = None, + ) -> Self: obj = numpy.asarray(input_array).view(cls) obj.possible_values = possible_values return obj diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index 43c38780ff..d69eb098a0 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,3 +1,3 @@ -from openfisca_core.types import Array +from openfisca_core.types import Array, DTypeEnum, Enum, EnumArray -__all__ = ["Array"] +__all__ = ["Array", "DTypeEnum", "Enum", "EnumArray"] diff --git a/openfisca_core/types.py b/openfisca_core/types.py index 711e6c512f..b922cde092 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -3,24 +3,57 @@ from collections.abc import Iterable, Sequence, Sized from numpy.typing import NDArray from typing import Any, NewType, TypeVar, Union -from typing_extensions import Protocol, TypeAlias +from typing_extensions import Protocol, Self, TypeAlias + +import abc +import enum import numpy import pendulum -_N_co = TypeVar("_N_co", bound=numpy.generic, covariant=True) +#: Generic covariant type var. +_T_co = TypeVar("_T_co", covariant=True) + +# Commons + +#: Type var for numpy arrays. +_N_co = TypeVar("_N_co", covariant=True, bound="DTypeGeneric") #: Type representing an numpy array. Array: TypeAlias = NDArray[_N_co] +#: Type var for array-like objects. _L = TypeVar("_L") #: Type representing an array-like object. ArrayLike: TypeAlias = Sequence[_L] -#: Generic type vars. -_T_co = TypeVar("_T_co", covariant=True) +#: Type for bool arrays. +DTypeBool: TypeAlias = numpy.bool_ + +#: Type for int arrays. +DTypeInt: TypeAlias = numpy.int32 + +#: Type for float arrays. +DTypeFloat: TypeAlias = numpy.float32 + +#: Type for string arrays. +DTypeStr: TypeAlias = numpy.str_ + +#: Type for bytes arrays. +DTypeBytes: TypeAlias = numpy.bytes_ + +#: Type for Enum arrays. +DTypeEnum: TypeAlias = numpy.int16 +#: Type for date arrays. +DTypeDate: TypeAlias = numpy.datetime64 + +#: Type for "object" arrays. +DTypeObject: TypeAlias = numpy.object_ + +#: Type for "generic" arrays. +DTypeGeneric: TypeAlias = numpy.generic # Entities @@ -72,6 +105,22 @@ def key(self, /) -> RoleKey: ... def plural(self, /) -> None | RolePlural: ... +# Indexed enums + + +class Enum(enum.Enum, metaclass=enum.EnumMeta): + index: int + + +class EnumArray(Array[DTypeEnum], metaclass=abc.ABCMeta): + possible_values: None | type[Enum] + + @abc.abstractmethod + def __new__( + cls, input_array: Array[DTypeEnum], possible_values: None | type[Enum] = ... + ) -> Self: ... + + # Holders @@ -130,6 +179,7 @@ def start(self, /) -> Instant: ... def size(self, /) -> int: ... @property def stop(self, /) -> Instant: ... + def contains(self, other: Period, /) -> bool: ... def offset(self, offset: str | int, unit: None | DateUnit = None, /) -> Period: ... diff --git a/openfisca_tasks/lint.mk b/openfisca_tasks/lint.mk index 912d0567dc..2f3cbba272 100644 --- a/openfisca_tasks/lint.mk +++ b/openfisca_tasks/lint.mk @@ -41,6 +41,7 @@ check-types: @$(call print_help,$@:) @python -m mypy \ openfisca_core/commons \ + openfisca_core/data_storage \ openfisca_core/entities \ openfisca_core/periods \ openfisca_core/types.py From faeffc4e2c29a532b5c72df42ad465986b8427ad Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Fri, 4 Oct 2024 22:22:55 +0200 Subject: [PATCH 2/6] chore(make): fix typo (#1235) --- openfisca_tasks/lint.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openfisca_tasks/lint.mk b/openfisca_tasks/lint.mk index 2f3cbba272..a3f5a8e45a 100644 --- a/openfisca_tasks/lint.mk +++ b/openfisca_tasks/lint.mk @@ -29,7 +29,7 @@ lint-doc-%: @## @## They can be integrated into setup.cfg once all checks pass. @## The reason they're here is because otherwise we wouldn't be - @## able to integrate documentation improvements progresively. + @## able to integrate documentation improvements progressively. @## @$(call print_help,$(subst $*,%,$@:)) @python -m flake8 --select=D101,D102,D103,DAR openfisca_core/$* From f5627d35ace4d88f935f081ad2fde9ef45aa912d Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Fri, 4 Oct 2024 22:27:43 +0200 Subject: [PATCH 3/6] docs(storage): add __all__ (#1235) --- openfisca_core/data_storage/in_memory_storage.py | 3 +++ openfisca_core/data_storage/on_disk_storage.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/openfisca_core/data_storage/in_memory_storage.py b/openfisca_core/data_storage/in_memory_storage.py index 40b68df503..18387ff648 100644 --- a/openfisca_core/data_storage/in_memory_storage.py +++ b/openfisca_core/data_storage/in_memory_storage.py @@ -196,3 +196,6 @@ def get_memory_usage(self) -> t.MemoryUsage: "total_nb_bytes": array.nbytes * nb_arrays, "cell_size": array.itemsize, } + + +__all__ = ["InMemoryStorage"] diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index 495820c789..d1b8e2c4e0 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -308,3 +308,6 @@ def __del__(self) -> None: parent_dir = os.path.abspath(os.path.join(self.storage_dir, os.pardir)) if not os.listdir(parent_dir): shutil.rmtree(parent_dir) + + +__all__ = ["OnDiskStorage"] From 19b9755709e2e025d6b5e4fb7e205017a708601f Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Fri, 4 Oct 2024 22:31:04 +0200 Subject: [PATCH 4/6] chore: version bump (fixes #1235) --- CHANGELOG.md | 6 ++++++ setup.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b58b1a74de..7f900c330e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +### 42.0.7 [#1264](https://github.com/openfisca/openfisca-core/pull/1264) + +#### Technical changes + +- Add typing to `data_storage` module + ### 42.0.6 [#1263](https://github.com/openfisca/openfisca-core/pull/1263) #### Documentation diff --git a/setup.py b/setup.py index 1e4a464795..202e5e449c 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( name="OpenFisca-Core", - version="42.0.6", + version="42.0.7", author="OpenFisca Team", author_email="contact@openfisca.org", classifiers=[ From e4c079f5a2a68787d1984cdda1f1a4aaaba92b92 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sat, 5 Oct 2024 00:30:09 +0200 Subject: [PATCH 5/6] ci: fix upload to conda --- .github/workflows/_before-conda.yaml | 2 +- .github/workflows/merge.yaml | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/workflows/_before-conda.yaml b/.github/workflows/_before-conda.yaml index 7528a6a1c2..06d0067eff 100644 --- a/.github/workflows/_before-conda.yaml +++ b/.github/workflows/_before-conda.yaml @@ -66,7 +66,7 @@ jobs: if: steps.cache-env.outputs.cache-hit != 'true' - name: Install dependencies - run: mamba install boa rattler-build anaconda-client + run: mamba install boa rattler-build if: steps.cache-env.outputs.cache-hit != 'true' - name: Update conda & dependencies diff --git a/.github/workflows/merge.yaml b/.github/workflows/merge.yaml index 57e0bb80a3..31e863a96b 100644 --- a/.github/workflows/merge.yaml +++ b/.github/workflows/merge.yaml @@ -217,12 +217,11 @@ jobs: use-mamba: true - name: Publish to conda - shell: bash -l {0} run: | - anaconda upload ~/conda-rel/noarch/openfisca-core-* \ - --token ${{ secrets.ANACONDA_TOKEN }} - --user openfisca - --force + rattler-build upload anaconda ~/conda-rel/noarch/*.conda \ + --force \ + --owner openfisca \ + --api-key ${{ secrets.ANACONDA_TOKEN }} test-on-windows: runs-on: windows-2019 @@ -245,9 +244,7 @@ jobs: uses: actions/checkout@v4 - name: Install with conda - shell: bash -l {0} run: conda install -c openfisca openfisca-core - name: Test openfisca - shell: bash -l {0} run: openfisca --help From 2005df15dd8f85e75affc2a5103c8e9480c90846 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sat, 5 Oct 2024 01:15:58 +0200 Subject: [PATCH 6/6] ci: set output to conda v2 --- .conda/openfisca-core/meta.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.conda/openfisca-core/meta.yaml b/.conda/openfisca-core/meta.yaml index be31e84b95..1c90e6191e 100644 --- a/.conda/openfisca-core/meta.yaml +++ b/.conda/openfisca-core/meta.yaml @@ -44,8 +44,10 @@ test: outputs: - name: openfisca-core + type: conda_v2 - name: openfisca-core-api + type: conda_v2 build: noarch: python requirements: @@ -61,6 +63,7 @@ outputs: - {{ pin_subpackage('openfisca-core', exact=True) }} - name: openfisca-core-dev + type: conda_v2 build: noarch: python requirements: