From 88eb8ca9056f17ae50a1aa451398b82f1f060aa1 Mon Sep 17 00:00:00 2001 From: Weddy Gikunda <110189834+caviere@users.noreply.github.com> Date: Thu, 3 Nov 2022 19:48:54 +0300 Subject: [PATCH 001/213] Add documentation for find/findall using visit (#1241) * Add documentation for find/findall using visit * Remove whitespace * Fix print result * Fix indentation issue in the docstring * Indent literal block --- zarr/hierarchy.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 12ca34e7e9..f2188217a8 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -720,6 +720,76 @@ def visit(self, func): baz quux + Search for members matching some name query can be implemented using + ``visit`` that is, ``find`` and ``findall``. Consider the following + tree:: + + / + ├── aaa + │ └── bbb + │ └── ccc + │ └── aaa + ├── bar + └── foo + + It is created as follows: + + >>> root = zarr.group() + >>> foo = root.create_group("foo") + >>> bar = root.create_group("bar") + >>> root.create_group("aaa").create_group("bbb").create_group("ccc").create_group("aaa") + + + For ``find``, the first path that matches a given pattern (for example + "aaa") is returned. Note that a non-None value is returned in the visit + function to stop further iteration. + + >>> import re + >>> pattern = re.compile("aaa") + >>> found = None + >>> def find(path): + ... global found + ... if pattern.search(path) is not None: + ... found = path + ... return True + ... + >>> root.visit(find) + True + >>> print(found) + aaa + + For ``findall``, all the results are gathered into a list + + >>> pattern = re.compile("aaa") + >>> found = [] + >>> def findall(path): + ... if pattern.search(path) is not None: + ... found.append(path) + ... + >>> root.visit(findall) + >>> print(found) + ['aaa', 'aaa/bbb', 'aaa/bbb/ccc', 'aaa/bbb/ccc/aaa'] + + To match only on the last part of the path, use a greedy regex to filter + out the prefix: + + >>> prefix_pattern = re.compile(r".*/") + >>> pattern = re.compile("aaa") + >>> found = [] + >>> def findall(path): + ... match = prefix_pattern.match(path) + ... if match is None: + ... name = path + ... else: + ... _, end = match.span() + ... name = path[end:] + ... if pattern.search(name) is not None: + ... found.append(path) + ... return None + ... + >>> root.visit(findall) + >>> print(found) + ['aaa', 'aaa/bbb/ccc/aaa'] """ base_len = len(self.name) From 3d2cd61a6329b05d872694e44447619f4707283b Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 3 Nov 2022 14:13:11 -0700 Subject: [PATCH 002/213] Add `license_files` to `pyproject.toml` (#1247) --- LICENSE => LICENSE.txt | 0 README.md | 2 +- docs/license.rst | 2 +- pyproject.toml | 2 ++ 4 files changed, 4 insertions(+), 2 deletions(-) rename LICENSE => LICENSE.txt (100%) diff --git a/LICENSE b/LICENSE.txt similarity index 100% rename from LICENSE rename to LICENSE.txt diff --git a/README.md b/README.md index 3576d0c30e..b035ffa597 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ License - + license diff --git a/docs/license.rst b/docs/license.rst index d47e1b2b34..8f93aa7d66 100644 --- a/docs/license.rst +++ b/docs/license.rst @@ -1,4 +1,4 @@ License ======= -.. include:: ../LICENSE \ No newline at end of file +.. include:: ../LICENSE.txt diff --git a/pyproject.toml b/pyproject.toml index 7cd7b5a025..7ef173879e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ classifiers = [ 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', ] +license = { text = "MIT" } [project.optional-dependencies] jupyter = [ @@ -52,6 +53,7 @@ Homepage = "https://github.com/zarr-developers/zarr-python" [tool.setuptools] packages = ["zarr", "zarr._storage", "zarr.tests"] +license-files = ["LICENSE.txt"] [tool.setuptools_scm] version_scheme = "guess-next-dev" From aa84fcbb896e8c269cbc9e2c1a6f3fa419450953 Mon Sep 17 00:00:00 2001 From: AWA BRANDON AWA <51425873+DON-BRAN@users.noreply.github.com> Date: Thu, 3 Nov 2022 22:58:50 +0100 Subject: [PATCH 003/213] updated docs/contributing.rst (#1243) Co-authored-by: jakirkham --- docs/contributing.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index d9e222b643..5bfd5878b2 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -92,7 +92,7 @@ the repository, you can do something like the following:: $ mkdir -p ~/pyenv/zarr-dev $ python -m venv ~/pyenv/zarr-dev $ source ~/pyenv/zarr-dev/bin/activate - $ pip install -r requirements_dev_minimal.txt -r requirements_dev_numpy.txt + $ pip install -r requirements_dev_minimal.txt -r requirements_dev_numpy.txt -r requirements_rtfd.txt $ pip install -e . To verify that your development environment is working, you can run the unit tests:: @@ -248,6 +248,7 @@ The documentation can be built locally by running:: $ cd docs $ make clean; make html + $ open _build/html/index.html The resulting built documentation will be available in the ``docs/_build/html`` folder. From bbc66df11ef0c731ebece330c1e60c38f1f36aed Mon Sep 17 00:00:00 2001 From: jakirkham Date: Fri, 4 Nov 2022 08:55:21 -0700 Subject: [PATCH 004/213] Delete unused files (#1251) * Delete build.cmd This file was leftover from before numcodecs was split from zarr-python. It was used to aid in compiling the Cython extensions on Windows. However zarr-python pure Python. So there is not a need to keep this file. It also doesn't appear to be used here. So drop it. * Update release.rst * Delete unused `.gitmodules` as well This was for pointing to Blosc. Again not needed after the zarr-python/numcodecs split. * Drop `release.txt` This is unused. Releases are handled through GitHub Actions. One should look there if attempting to reproduce outside of that infrastructure. * Drop unused `windows_conda_dev.txt` * Generalize release entry --- .gitmodules | 0 build.cmd | 45 ------------------------------------------- docs/release.rst | 3 +++ release.txt | 10 ---------- windows_conda_dev.txt | 9 --------- 5 files changed, 3 insertions(+), 64 deletions(-) delete mode 100644 .gitmodules delete mode 100644 build.cmd delete mode 100644 release.txt delete mode 100644 windows_conda_dev.txt diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/build.cmd b/build.cmd deleted file mode 100644 index 4e402d5e21..0000000000 --- a/build.cmd +++ /dev/null @@ -1,45 +0,0 @@ -:: To build extensions for 64 bit Python 3, we need to configure environment -:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: -:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) -:: -:: To build extensions for 64 bit Python 2, we need to configure environment -:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: -:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) -:: -:: 32 bit builds do not require specific environment configurations. -:: -:: Note: this script needs to be run with the /E:ON and /V:ON flags for the -:: cmd interpreter, at least for (SDK v7.0) -:: -:: More details at: -:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows -:: https://stackoverflow.com/a/13751649/163740 -:: -:: Author: Olivier Grisel -:: License: CC0 1.0 Universal: https://creativecommons.org/publicdomain/zero/1.0/ -@ECHO OFF - -SET COMMAND_TO_RUN=%* -SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows - -SET MAJOR_PYTHON_VERSION="%PYTHON_VERSION:~0,1%" -IF %MAJOR_PYTHON_VERSION% == "3" ( - SET WINDOWS_SDK_VERSION="v7.1" -) ELSE ( - ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" - EXIT 1 -) - -IF "%DISTUTILS_USE_SDK%"=="1" ( - ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture - SET DISTUTILS_USE_SDK=1 - SET MSSdk=1 - "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% - "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release - ECHO Executing: %COMMAND_TO_RUN% - call %COMMAND_TO_RUN% || EXIT 1 -) ELSE ( - ECHO Using default MSVC build environment - ECHO Executing: %COMMAND_TO_RUN% - call %COMMAND_TO_RUN% || EXIT 1 -) diff --git a/docs/release.rst b/docs/release.rst index 5ff8f74f29..647f722f69 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -20,6 +20,9 @@ Maintenance * Simplify if/else statement. By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1227`. +* Delete unused files. + By :user:`John Kirkham ` :issue:`1251`. + * Migrate to ``pyproject.toml`` and remove redundant infrastructure. By :user:`Saransh Chopra ` :issue:`1158`. diff --git a/release.txt b/release.txt deleted file mode 100644 index d1cefef47c..0000000000 --- a/release.txt +++ /dev/null @@ -1,10 +0,0 @@ -# version=x.x.x -echo $version -git tag -a v$version -m v$version -git push --tags -# Install `build` if not present with `python -m pip install build` or similar -# for building Zarr -python -m build -# Install `twine` if not present with `python -m pip install twine` or similar -# for publishing Zarr to PyPI -twine upload dist/zarr-${version}.tar.gz diff --git a/windows_conda_dev.txt b/windows_conda_dev.txt deleted file mode 100644 index 576674827d..0000000000 --- a/windows_conda_dev.txt +++ /dev/null @@ -1,9 +0,0 @@ -coverage -fasteners -flake8 -monotonic -msgpack-python -numcodecs -numpy -setuptools_scm -twine From 9434acfa04295d35ae2a328654f297a700a67169 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Sat, 5 Nov 2022 19:03:13 -0700 Subject: [PATCH 005/213] Consolidate `.coveragerc` into `pyproject.toml` (#1250) --- .coveragerc | 9 --------- .github/workflows/python-package.yml | 2 +- docs/contributing.rst | 2 +- docs/release.rst | 3 +++ pyproject.toml | 11 +++++++++++ 5 files changed, 16 insertions(+), 11 deletions(-) delete mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 728a27d322..0000000000 --- a/.coveragerc +++ /dev/null @@ -1,9 +0,0 @@ -[run] -omit = - zarr/meta_v1.py - bench/compress_normal.py - -[report] -exclude_lines = - pragma: no cover - pragma: ${PY_MAJOR_VERSION} no cover diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 9abb7c7866..bb04269f07 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -74,7 +74,7 @@ jobs: conda activate zarr-env mkdir ~/blob_emulator azurite -l ~/blob_emulator --debug debug.log 2>&1 > stdouterr.log & - pytest --cov=zarr --cov-config=.coveragerc --doctest-plus --cov-report xml --cov=./ --timeout=300 + pytest --cov=zarr --cov-config=pyproject.toml --doctest-plus --cov-report xml --cov=./ --timeout=300 - uses: codecov/codecov-action@v3 with: #token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos diff --git a/docs/contributing.rst b/docs/contributing.rst index 5bfd5878b2..dc6beb0094 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -214,7 +214,7 @@ Zarr maintains 100% test coverage under the latest Python stable release (curren Python 3.8). Both unit tests and docstring doctests are included when computing coverage. Running:: - $ python -m pytest -v --cov=zarr --cov-config=.coveragerc zarr + $ python -m pytest -v --cov=zarr --cov-config=pyproject.toml zarr will automatically run the test suite with coverage and produce a coverage report. This should be 100% before code can be accepted into the main code base. diff --git a/docs/release.rst b/docs/release.rst index 647f722f69..371dbf01a8 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -26,6 +26,9 @@ Maintenance * Migrate to ``pyproject.toml`` and remove redundant infrastructure. By :user:`Saransh Chopra ` :issue:`1158`. +* Migrate coverage to ``pyproject.toml``. + By :user:`John Kirkham ` :issue:`1250`. + .. _release_2.13.3: 2.13.3 diff --git a/pyproject.toml b/pyproject.toml index 7ef173879e..1592b9887a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,17 @@ Discussions = "https://github.com/zarr-developers/zarr-python/discussions" Documentation = "https://zarr.readthedocs.io/" Homepage = "https://github.com/zarr-developers/zarr-python" +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "pragma: ${PY_MAJOR_VERSION} no cover", +] + +[tool.coverage.run] +omit = [ + "zarr/meta_v1.py", + "bench/compress_normal.py", +] [tool.setuptools] packages = ["zarr", "zarr._storage", "zarr.tests"] From 6d0eeaa7e0bdf0c8049467571483d1eccb6191b5 Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Wed, 9 Nov 2022 23:03:25 +0530 Subject: [PATCH 006/213] Update release.rst with maintenance PRs (#1252) --- docs/release.rst | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 371dbf01a8..2616b184bc 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -17,17 +17,27 @@ Unreleased Maintenance ~~~~~~~~~~~ +* Migrate to ``pyproject.toml`` and remove redundant infrastructure. + By :user:`Saransh Chopra ` :issue:`1158`. + +* Require ``setuptools`` 64.0.0+ + By :user:`Saransh Chopra ` :issue:`1193`. + +* Pin action versions (pypi-publish, setup-miniconda) for dependabot + By :user:`Saransh Chopra ` :issue:`1205`. + +* Remove ``tox`` support + By :user:`Saransh Chopra ` :issue:`1219`. + * Simplify if/else statement. By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1227`. +* Migrate coverage to ``pyproject.toml``. + By :user:`John Kirkham ` :issue:`1250`. + * Delete unused files. By :user:`John Kirkham ` :issue:`1251`. -* Migrate to ``pyproject.toml`` and remove redundant infrastructure. - By :user:`Saransh Chopra ` :issue:`1158`. - -* Migrate coverage to ``pyproject.toml``. - By :user:`John Kirkham ` :issue:`1250`. .. _release_2.13.3: From bae14230f277621fb03b479a441711d42f682e40 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 9 Nov 2022 18:33:51 +0100 Subject: [PATCH 007/213] Add missing newline at EOF (#1253) --- data/donotdelete | 2 +- docs/api/hierarchy.rst | 2 +- docs/installation.rst | 2 +- requirements_rtfd.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data/donotdelete b/data/donotdelete index 1e9ef93e26..b0c96f7ee5 100644 --- a/data/donotdelete +++ b/data/donotdelete @@ -1 +1 @@ -This directory is used for data files created during testing. \ No newline at end of file +This directory is used for data files created during testing. diff --git a/docs/api/hierarchy.rst b/docs/api/hierarchy.rst index 88b9c0fd88..11a5575144 100644 --- a/docs/api/hierarchy.rst +++ b/docs/api/hierarchy.rst @@ -38,4 +38,4 @@ Groups (``zarr.hierarchy``) .. automethod:: zeros_like .. automethod:: ones_like .. automethod:: full_like - .. automethod:: move \ No newline at end of file + .. automethod:: move diff --git a/docs/installation.rst b/docs/installation.rst index a07c1c42e1..8553d451cb 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -28,4 +28,4 @@ To work with Zarr source code in development, install from GitHub:: To verify that Zarr has been fully installed, run the test suite:: $ pip install pytest - $ python -m pytest -v --pyargs zarr \ No newline at end of file + $ python -m pytest -v --pyargs zarr diff --git a/requirements_rtfd.txt b/requirements_rtfd.txt index 0a7d90358e..553384e0bd 100644 --- a/requirements_rtfd.txt +++ b/requirements_rtfd.txt @@ -7,4 +7,4 @@ sphinx-copybutton sphinx-rtd-theme numpydoc numpy!=1.21.0 -msgpack-python==0.5.6 \ No newline at end of file +msgpack-python==0.5.6 From 810ec5a8a902d1b0acf856883172faf1b6165624 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 10 Nov 2022 06:19:00 -0800 Subject: [PATCH 008/213] Add `.flake8` to configure Flake8 (#1249) --- .flake8 | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .flake8 diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000..7da1f9608e --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 100 From b02040471f9b7e3e1aefd4f6b90b52c76efe32b8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 10 Nov 2022 15:47:10 +0100 Subject: [PATCH 009/213] Bump fsspec from 2022.10.0 to 2022.11.0 (#1255) * Bump fsspec from 2022.10.0 to 2022.11.0 Bumps [fsspec](https://github.com/fsspec/filesystem_spec) from 2022.10.0 to 2022.11.0. - [Release notes](https://github.com/fsspec/filesystem_spec/releases) - [Commits](https://github.com/fsspec/filesystem_spec/compare/2022.10.0...2022.11.0) --- updated-dependencies: - dependency-name: fsspec dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Bump s3fs as well close: https://github.com/zarr-developers/zarr-python/pull/1254 Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index fa57d8270c..a30344061e 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -19,6 +19,6 @@ pytest-cov==4.0.0 pytest-doctestplus==0.12.1 pytest-timeout==2.1.0 h5py==3.7.0 -fsspec==2022.10.0 -s3fs==2022.10.0 +fsspec==2022.11.0 +s3fs==2022.11.0 moto[server]>=4.0.8 From 0a25ab2979949bb300204a8b6ed016536b8f346c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Nov 2022 01:30:47 -0800 Subject: [PATCH 010/213] chore: update pre-commit hooks (#1262) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-mypy: v0.982 → v0.990](https://github.com/pre-commit/mirrors-mypy/compare/v0.982...v0.990) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fd1619eefa..6eec599124 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.982 + rev: v0.990 hooks: - id: mypy files: zarr From 090de2c3ff3f117ef9cca84a7c952412dc0553cc Mon Sep 17 00:00:00 2001 From: jakirkham Date: Tue, 15 Nov 2022 03:14:04 -0800 Subject: [PATCH 011/213] Use `conda-incubator/setup-miniconda@v2.2.0` (#1263) --- .github/workflows/minimal.yml | 2 +- .github/workflows/python-package.yml | 2 +- .github/workflows/windows-testing.yml | 2 +- docs/release.rst | 3 +++ 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml index 3eb414059c..2cde38e081 100644 --- a/.github/workflows/minimal.yml +++ b/.github/workflows/minimal.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v2.2.0 with: channels: conda-forge environment-file: environment.yml diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index bb04269f07..872ce52343 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -40,7 +40,7 @@ jobs: with: fetch-depth: 0 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@master + uses: conda-incubator/setup-miniconda@v2.2.0 with: channels: conda-forge python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index af8bae8cf1..ea1d0f64c9 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -21,7 +21,7 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 0 - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v2.2.0 with: auto-update-conda: true python-version: ${{ matrix.python-version }} diff --git a/docs/release.rst b/docs/release.rst index 2616b184bc..d68cafd29c 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -35,6 +35,9 @@ Maintenance * Migrate coverage to ``pyproject.toml``. By :user:`John Kirkham ` :issue:`1250`. +* Use ``conda-incubator/setup-miniconda@v2.2.0``. + By :user:`John Kirkham ` :issue:`1263`. + * Delete unused files. By :user:`John Kirkham ` :issue:`1251`. From b2f088fb099a72004a0f6590f1e1d09d35cf3921 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Tue, 15 Nov 2022 10:20:53 -0800 Subject: [PATCH 012/213] Fix coverage (#1264) --- docs/release.rst | 3 +++ zarr/hierarchy.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index d68cafd29c..e859279ef2 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -32,6 +32,9 @@ Maintenance * Simplify if/else statement. By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1227`. +* Get coverage up to 100%. + By :user:`John Kirkham ` :issue:`1264`. + * Migrate coverage to ``pyproject.toml``. By :user:`John Kirkham ` :issue:`1250`. diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index f2188217a8..82323c7208 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1430,7 +1430,7 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N mode=mode, zarr_version=zarr_version) if getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) != zarr_version: - raise ValueError( + raise ValueError( # pragma: no cover "zarr_version of store and chunk_store must match" ) From d5891b2b3cbacc598f8ef9ab0739bd6a2d0182a8 Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Wed, 16 Nov 2022 01:10:01 +0530 Subject: [PATCH 013/213] Workflow to label PRs with "needs release notes" (#1239) --- .github/labeler.yml | 2 ++ .github/workflows/needs_release_notes.yml | 13 +++++++++++++ docs/release.rst | 3 +++ 3 files changed, 18 insertions(+) create mode 100644 .github/labeler.yml create mode 100644 .github/workflows/needs_release_notes.yml diff --git a/.github/labeler.yml b/.github/labeler.yml new file mode 100644 index 0000000000..dbc3b95333 --- /dev/null +++ b/.github/labeler.yml @@ -0,0 +1,2 @@ +needs release notes: +- all: ['!docs/release.rst'] diff --git a/.github/workflows/needs_release_notes.yml b/.github/workflows/needs_release_notes.yml new file mode 100644 index 0000000000..b0b8b7c97d --- /dev/null +++ b/.github/workflows/needs_release_notes.yml @@ -0,0 +1,13 @@ +name: "Pull Request Labeler" + +on: + - pull_request_target + +jobs: + triage: + runs-on: ubuntu-latest + steps: + - uses: actions/labeler@main + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + sync-labels: true diff --git a/docs/release.rst b/docs/release.rst index e859279ef2..8642ed8b6f 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -29,6 +29,9 @@ Maintenance * Remove ``tox`` support By :user:`Saransh Chopra ` :issue:`1219`. +* Add workflow to label PRs with "needs release notes". + By :user:`Saransh Chopra ` :issue:`1239`. + * Simplify if/else statement. By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1227`. From 19d159f5532a2432f8417eaa96864eb5296e447d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 18 Nov 2022 07:54:01 +0100 Subject: [PATCH 014/213] Bump pymongo from 4.3.2 to 4.3.3 (#1270) Bumps [pymongo](https://github.com/mongodb/mongo-python-driver) from 4.3.2 to 4.3.3. - [Release notes](https://github.com/mongodb/mongo-python-driver/releases) - [Changelog](https://github.com/mongodb/mongo-python-driver/blob/master/doc/changelog.rst) - [Commits](https://github.com/mongodb/mongo-python-driver/compare/4.3.2...4.3.3) --- updated-dependencies: - dependency-name: pymongo dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index a30344061e..f4853e7a0e 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -11,7 +11,7 @@ azure-storage-blob==12.14.1 # pyup: ignore redis==4.3.4 types-redis types-setuptools -pymongo==4.3.2 +pymongo==4.3.3 # optional test requirements coverage flake8==5.0.4 From 42da4aa2b2d6b6e79a6f3d6629e3d1837af8e9b9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Nov 2022 09:22:48 +0100 Subject: [PATCH 015/213] Bump numpy from 1.23.4 to 1.23.5 (#1272) Bumps [numpy](https://github.com/numpy/numpy) from 1.23.4 to 1.23.5. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst) - [Commits](https://github.com/numpy/numpy/compare/v1.23.4...v1.23.5) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_numpy.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_numpy.txt b/requirements_dev_numpy.txt index b1c56d3f7d..f3f810368b 100644 --- a/requirements_dev_numpy.txt +++ b/requirements_dev_numpy.txt @@ -1,4 +1,4 @@ # Break this out into a separate file to allow testing against # different versions of numpy. This file should pin to the latest # numpy version. -numpy==1.23.4 +numpy==1.23.5 From 5f5c868e656a021825c85e042965640b87685f26 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 22 Nov 2022 09:25:19 +0100 Subject: [PATCH 016/213] chore: update pre-commit hooks (#1273) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-mypy: v0.990 → v0.991](https://github.com/pre-commit/mirrors-mypy/compare/v0.990...v0.991) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6eec599124..7541e60500 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.990 + rev: v0.991 hooks: - id: mypy files: zarr From e4668e04b757c86c04e8e3f0659e14bf2a85acb7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Nov 2022 10:15:53 +0100 Subject: [PATCH 017/213] Bump redis from 4.3.4 to 4.3.5 (#1275) Bumps [redis](https://github.com/redis/redis-py) from 4.3.4 to 4.3.5. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.3.4...v4.3.5) --- updated-dependencies: - dependency-name: redis dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index f4853e7a0e..365787bd26 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.0.2 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.14.1 # pyup: ignore -redis==4.3.4 +redis==4.3.5 types-redis types-setuptools pymongo==4.3.3 From 596d9c0e2a05e95b106831d0331aae37c68c83d7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Dec 2022 08:47:52 +0100 Subject: [PATCH 018/213] Bump redis from 4.3.5 to 4.4.0 (#1282) Bumps [redis](https://github.com/redis/redis-py) from 4.3.5 to 4.4.0. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.3.5...v4.4.0) --- updated-dependencies: - dependency-name: redis dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 365787bd26..1f1df63287 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.0.2 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.14.1 # pyup: ignore -redis==4.3.5 +redis==4.4.0 types-redis types-setuptools pymongo==4.3.3 From b3cda2b3318c93a96266f62e383b3b45f105f389 Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Tue, 6 Dec 2022 04:11:08 -0500 Subject: [PATCH 019/213] Fix double counting V3 groups bug (#1268) * fix double counting groups bug * add release notes Co-authored-by: Josh Moore --- docs/release.rst | 6 ++++++ zarr/hierarchy.py | 34 ++++++++++++++++------------------ zarr/tests/test_hierarchy.py | 13 +++++++++++++ 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 8642ed8b6f..e15132b60b 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -14,6 +14,12 @@ Unreleased # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. +* Fix bug that caused double counting of groups in ``groups()`` and ``group_keys()`` + methods with V3 stores. + By :user:`Ryan Abernathey ` :issue:`1228`. + +.. _release_2.13.2: + Maintenance ~~~~~~~~~~~ diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 82323c7208..0dae921500 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -511,9 +511,15 @@ def group_keys(self): else: dir_name = meta_root + self._path group_sfx = '.group' + self._metadata_key_suffix - for key in sorted(listdir(self._store, dir_name)): + # The fact that we call sorted means this can't be a streaming generator. + # The keys are already in memory. + all_keys = sorted(listdir(self._store, dir_name)) + for key in all_keys: if key.endswith(group_sfx): key = key[:-len(group_sfx)] + if key in all_keys: + # otherwise we will double count this group + continue path = self._key_prefix + key if path.endswith(".array" + self._metadata_key_suffix): # skip array keys @@ -552,24 +558,16 @@ def groups(self): zarr_version=self._version) else: - dir_name = meta_root + self._path - group_sfx = '.group' + self._metadata_key_suffix - for key in sorted(listdir(self._store, dir_name)): - if key.endswith(group_sfx): - key = key[:-len(group_sfx)] + for key in self.group_keys(): path = self._key_prefix + key - if path.endswith(".array" + self._metadata_key_suffix): - # skip array keys - continue - if contains_group(self._store, path, explicit_only=False): - yield key, Group( - self._store, - path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version) + yield key, Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version) def array_keys(self, recurse=False): """Return an iterator over member names for arrays only. diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 8d1fabbed3..7d87b6d404 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -770,6 +770,19 @@ def visitor1(val, *args): g1.store.close() + # regression test for https://github.com/zarr-developers/zarr-python/issues/1228 + def test_double_counting_group_v3(self): + root_group = self.create_group() + group_names = ["foo", "foo-", "foo_"] + for name in group_names: + sub_group = root_group.create_group(name) + sub_group.create("bar", shape=10, dtype="i4") + assert list(root_group.group_keys()) == sorted(group_names) + assert list(root_group.groups()) == [ + (name, root_group[name]) + for name in sorted(group_names) + ] + def test_empty_getitem_contains_iterators(self): # setup g = self.create_group() From e7c0eb45ad1f571a8da3bfa9e23586848296e66c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Dec 2022 10:44:47 +0100 Subject: [PATCH 020/213] chore: update pre-commit hooks (#1278) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/flake8: 5.0.4 → 6.0.0](https://github.com/PyCQA/flake8/compare/5.0.4...6.0.0) - [github.com/pre-commit/pre-commit-hooks: v4.3.0 → v4.4.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.3.0...v4.4.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7541e60500..1f629ccf76 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 + rev: 6.0.0 hooks: - id: flake8 args: [ @@ -20,7 +20,7 @@ repos: - id: codespell args: ["-L", "ba,ihs,kake,nd,noe,nwo,te", "-S", "fixture"] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy From 1af77b63ad8d51a5a8dc2cd923bf73cb8abe5a64 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 16 Dec 2022 00:39:30 -0800 Subject: [PATCH 021/213] Bump numcodecs from 0.10.2 to 0.11.0 (#1300) Bumps [numcodecs](https://github.com/zarr-developers/numcodecs) from 0.10.2 to 0.11.0. - [Release notes](https://github.com/zarr-developers/numcodecs/releases) - [Changelog](https://github.com/zarr-developers/numcodecs/blob/main/docs/release.rst) - [Commits](https://github.com/zarr-developers/numcodecs/compare/v0.10.2...v0.11.0) --- updated-dependencies: - dependency-name: numcodecs dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index b76b398a16..3f96d79850 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -1,7 +1,7 @@ # library requirements asciitree==0.3.3 fasteners==0.18 -numcodecs==0.10.2 +numcodecs==0.11.0 msgpack-python==0.5.6 setuptools-scm==7.0.5 # test requirements From 13f7d0d77530a7d59b7f3d4bc80150d436c65061 Mon Sep 17 00:00:00 2001 From: Kola Babalola Date: Wed, 21 Dec 2022 07:38:37 +0000 Subject: [PATCH 022/213] Fix minor indexing errors in tutorial and specification examples of documentation (#1277) --- docs/spec/v1.rst | 6 +++--- docs/spec/v2.rst | 6 +++--- docs/tutorial.rst | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/spec/v1.rst b/docs/spec/v1.rst index d8598c88c4..8584b24e6d 100644 --- a/docs/spec/v1.rst +++ b/docs/spec/v1.rst @@ -144,9 +144,9 @@ are converted to strings and concatenated with the period character ('.') separating each index. For example, given an array with shape (10000, 10000) and chunk shape (1000, 1000) there will be 100 chunks laid out in a 10 by 10 grid. The chunk with indices (0, 0) provides -data for rows 0-1000 and columns 0-1000 and is stored under the key -'0.0'; the chunk with indices (2, 4) provides data for rows 2000-3000 -and columns 4000-5000 and is stored under the key '2.4'; etc. +data for rows 0-999 and columns 0-999 and is stored under the key +'0.0'; the chunk with indices (2, 4) provides data for rows 2000-2999 +and columns 4000-4999 and is stored under the key '2.4'; etc. There is no need for all chunks to be present within an array store. If a chunk is not present then it is considered to be in an diff --git a/docs/spec/v2.rst b/docs/spec/v2.rst index 6d11fd1acc..45e6afb320 100644 --- a/docs/spec/v2.rst +++ b/docs/spec/v2.rst @@ -216,9 +216,9 @@ To form a string key for a chunk, the indices are converted to strings and concatenated with the period character (".") separating each index. For example, given an array with shape (10000, 10000) and chunk shape (1000, 1000) there will be 100 chunks laid out in a 10 by 10 grid. The chunk with indices -(0, 0) provides data for rows 0-1000 and columns 0-1000 and is stored under the -key "0.0"; the chunk with indices (2, 4) provides data for rows 2000-3000 and -columns 4000-5000 and is stored under the key "2.4"; etc. +(0, 0) provides data for rows 0-999 and columns 0-999 and is stored under the +key "0.0"; the chunk with indices (2, 4) provides data for rows 2000-2999 and +columns 4000-4999 and is stored under the key "2.4"; etc. There is no need for all chunks to be present within an array store. If a chunk is not present then it is considered to be in an uninitialized state. An diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 411ce0a163..43e42faf6b 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -525,9 +525,9 @@ When the indexing arrays have different shapes, they are broadcast together. That is, the following two calls are equivalent:: >>> z[1, [1, 3]] - array([5, 7]) + array([6, 8]) >>> z[[1, 1], [1, 3]] - array([5, 7]) + array([6, 8]) Indexing with a mask array ~~~~~~~~~~~~~~~~~~~~~~~~~~ From 5aff4cb2a3114e4fbdeb2692856789827237eb48 Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Wed, 21 Dec 2022 14:26:37 +0530 Subject: [PATCH 023/213] Skip labeler for bot PRs (#1271) --- .github/workflows/needs_release_notes.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/needs_release_notes.yml b/.github/workflows/needs_release_notes.yml index b0b8b7c97d..d81ee0bdc4 100644 --- a/.github/workflows/needs_release_notes.yml +++ b/.github/workflows/needs_release_notes.yml @@ -5,6 +5,7 @@ on: jobs: triage: + if: ${{ github.event.pull_request.user.login != 'dependabot[bot]' }} && ${{ github.event.pull_request.user.login != 'pre-commit-ci[bot]' }} runs-on: ubuntu-latest steps: - uses: actions/labeler@main From ab101e1c9ab094f6817ea78ff08320a513ab35cd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Dec 2022 09:56:52 +0100 Subject: [PATCH 024/213] Bump numpy from 1.23.5 to 1.24.0 (#1301) Bumps [numpy](https://github.com/numpy/numpy) from 1.23.5 to 1.24.0. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst) - [Commits](https://github.com/numpy/numpy/compare/v1.23.5...v1.24.0) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_numpy.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_numpy.txt b/requirements_dev_numpy.txt index f3f810368b..6b5b91b6c8 100644 --- a/requirements_dev_numpy.txt +++ b/requirements_dev_numpy.txt @@ -1,4 +1,4 @@ # Break this out into a separate file to allow testing against # different versions of numpy. This file should pin to the latest # numpy version. -numpy==1.23.5 +numpy==1.24.0 From 4b91976e89b3d907c530ee2223e3277ed12bb931 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Dec 2022 09:57:07 +0100 Subject: [PATCH 025/213] Bump setuptools-scm from 7.0.5 to 7.1.0 (#1302) Bumps [setuptools-scm](https://github.com/pypa/setuptools_scm) from 7.0.5 to 7.1.0. - [Release notes](https://github.com/pypa/setuptools_scm/releases) - [Changelog](https://github.com/pypa/setuptools_scm/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pypa/setuptools_scm/compare/v7.0.5...v7.1.0) --- updated-dependencies: - dependency-name: setuptools-scm dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index 3f96d79850..d5e0798e86 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -3,6 +3,6 @@ asciitree==0.3.3 fasteners==0.18 numcodecs==0.11.0 msgpack-python==0.5.6 -setuptools-scm==7.0.5 +setuptools-scm==7.1.0 # test requirements pytest==7.2.0 From 03853c97b1f21c0cb584ea4d554e4c0f7aad912f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Dec 2022 11:28:28 +0100 Subject: [PATCH 026/213] Bump ipywidgets from 8.0.2 to 8.0.3 (#1287) Bumps [ipywidgets](https://github.com/jupyter-widgets/ipywidgets) from 8.0.2 to 8.0.3. - [Release notes](https://github.com/jupyter-widgets/ipywidgets/releases) - [Commits](https://github.com/jupyter-widgets/ipywidgets/compare/8.0.2...8.0.3) --- updated-dependencies: - dependency-name: ipywidgets dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 1f1df63287..77a2b5abdf 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -3,7 +3,7 @@ lmdb==1.3.0; sys_platform != 'win32' # optional library requirements for Jupyter ipytree==0.2.2 -ipywidgets==8.0.2 +ipywidgets==8.0.3 # optional library requirements for services # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) From d7e568ddd25151970489661ccfce67cadd919ab7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Dec 2022 11:29:18 +0100 Subject: [PATCH 027/213] Remove flake8 dependency (#1276) * Bump flake8 from 5.0.4 to 6.0.0 Bumps [flake8](https://github.com/pycqa/flake8) from 5.0.4 to 6.0.0. - [Release notes](https://github.com/pycqa/flake8/releases) - [Commits](https://github.com/pycqa/flake8/compare/5.0.4...6.0.0) --- updated-dependencies: - dependency-name: flake8 dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Attempt removing flake8 Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 77a2b5abdf..25df1e19cc 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -14,7 +14,6 @@ types-setuptools pymongo==4.3.3 # optional test requirements coverage -flake8==5.0.4 pytest-cov==4.0.0 pytest-doctestplus==0.12.1 pytest-timeout==2.1.0 From 482dfe531b6b180b60a734cec22a0dd7615732d3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Dec 2022 11:29:43 +0100 Subject: [PATCH 028/213] Bump lmdb from 1.3.0 to 1.4.0 (#1288) Bumps [lmdb](https://github.com/jnwatson/py-lmdb) from 1.3.0 to 1.4.0. - [Release notes](https://github.com/jnwatson/py-lmdb/releases) - [Changelog](https://github.com/jnwatson/py-lmdb/blob/py-lmdb_1.4.0/ChangeLog) - [Commits](https://github.com/jnwatson/py-lmdb/compare/py-lmdb_1.3.0...py-lmdb_1.4.0) --- updated-dependencies: - dependency-name: lmdb dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 25df1e19cc..05cc2b8730 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -1,6 +1,6 @@ # optional library requirements # bsddb3==6.2.6; sys_platform != 'win32' -lmdb==1.3.0; sys_platform != 'win32' +lmdb==1.4.0; sys_platform != 'win32' # optional library requirements for Jupyter ipytree==0.2.2 ipywidgets==8.0.3 From 4e633ad9aa434304296900790c4c65e0fa0dfa12 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 22 Dec 2022 09:33:45 +0900 Subject: [PATCH 029/213] Handle fsspec.FSMap using FSStore store (#1304) --- docs/release.rst | 2 ++ zarr/_storage/v3.py | 10 ++++++++++ zarr/storage.py | 19 +++++++++++++++++++ zarr/tests/test_storage.py | 5 +++++ zarr/tests/test_storage_v3.py | 5 +++++ 5 files changed, 41 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index e15132b60b..50eb8316bf 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -17,6 +17,8 @@ Unreleased * Fix bug that caused double counting of groups in ``groups()`` and ``group_keys()`` methods with V3 stores. By :user:`Ryan Abernathey ` :issue:`1228`. +* Handle fsspec.FSMap using FSStore store + By :user:`Rafal Wojdyla ` :issue:`1304`. .. _release_2.13.2: diff --git a/zarr/_storage/v3.py b/zarr/_storage/v3.py index 515e6f5aaa..a0a1870ffc 100644 --- a/zarr/_storage/v3.py +++ b/zarr/_storage/v3.py @@ -567,6 +567,16 @@ def _normalize_store_arg_v3(store: Any, storage_options=None, mode="r") -> BaseS return store if isinstance(store, os.PathLike): store = os.fspath(store) + if FSStore._fsspec_installed(): + import fsspec + if isinstance(store, fsspec.FSMap): + return FSStoreV3(store.root, + fs=store.fs, + mode=mode, + check=store.check, + create=store.create, + missing_exceptions=store.missing_exceptions, + **(storage_options or {})) if isinstance(store, str): if "://" in store or "::" in store: store = FSStoreV3(store, mode=mode, **(storage_options or {})) diff --git a/zarr/storage.py b/zarr/storage.py index 4acf637330..a2a8919d0b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -139,6 +139,16 @@ def _normalize_store_arg_v2(store: Any, storage_options=None, mode="r") -> BaseS return store if isinstance(store, os.PathLike): store = os.fspath(store) + if FSStore._fsspec_installed(): + import fsspec + if isinstance(store, fsspec.FSMap): + return FSStore(store.root, + fs=store.fs, + mode=mode, + check=store.check, + create=store.create, + missing_exceptions=store.missing_exceptions, + **(storage_options or {})) if isinstance(store, str): if "://" in store or "::" in store: return FSStore(store, mode=mode, **(storage_options or {})) @@ -1308,6 +1318,8 @@ def __init__(self, url, normalize_keys=False, key_separator=None, create=False, missing_exceptions=None, **storage_options): + if not self._fsspec_installed(): # pragma: no cover + raise ImportError("`fsspec` is required to use zarr's FSStore") import fsspec mapper_options = {"check": check, "create": create} @@ -1479,6 +1491,13 @@ def clear(self): raise ReadOnlyError() self.map.clear() + @classmethod + def _fsspec_installed(cls): + """Returns true if fsspec is installed""" + import importlib.util + + return importlib.util.find_spec("fsspec") is not None + class TempStore(DirectoryStore): """Directory store using a temporary directory for storage. diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 39d4b5988d..7c23735f36 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -2556,10 +2556,15 @@ def test_normalize_store_arg(tmpdir): assert isinstance(store, Class) if have_fsspec: + import fsspec + path = tempfile.mkdtemp() store = normalize_store_arg("file://" + path, zarr_version=2, mode='w') assert isinstance(store, FSStore) + store = normalize_store_arg(fsspec.get_mapper("file://" + path)) + assert isinstance(store, FSStore) + def test_meta_prefix_6853(): diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 13b5011676..4f6215135c 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -467,11 +467,16 @@ def test_normalize_store_arg_v3(tmpdir): normalize_store_arg(str(fn), zarr_version=3, mode='w', storage_options={"some": "kwargs"}) if have_fsspec: + import fsspec + path = tempfile.mkdtemp() store = normalize_store_arg("file://" + path, zarr_version=3, mode='w') assert isinstance(store, FSStoreV3) assert 'zarr.json' in store + store = normalize_store_arg(fsspec.get_mapper("file://" + path), zarr_version=3) + assert isinstance(store, FSStoreV3) + fn = tmpdir.join('store.n5') with pytest.raises(NotImplementedError): normalize_store_arg(str(fn), zarr_version=3, mode='w') From dcce26e6d4cdecdf7192ae1943127f381b16557c Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 12 Jan 2023 14:00:51 +0100 Subject: [PATCH 030/213] =?UTF-8?q?http://=20=E2=86=92=20https://=20(#1313?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/release.rst | 6 ++++-- notebooks/dask_copy.ipynb | 2 +- notebooks/dask_count_alleles.ipynb | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 50eb8316bf..e63e7f8c22 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -17,11 +17,10 @@ Unreleased * Fix bug that caused double counting of groups in ``groups()`` and ``group_keys()`` methods with V3 stores. By :user:`Ryan Abernathey ` :issue:`1228`. + * Handle fsspec.FSMap using FSStore store By :user:`Rafal Wojdyla ` :issue:`1304`. -.. _release_2.13.2: - Maintenance ~~~~~~~~~~~ @@ -55,6 +54,9 @@ Maintenance * Delete unused files. By :user:`John Kirkham ` :issue:`1251`. +* Uopdate web links: http:// → https:// + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1313`. + .. _release_2.13.3: diff --git a/notebooks/dask_copy.ipynb b/notebooks/dask_copy.ipynb index 5cb712508c..ba4391737a 100644 --- a/notebooks/dask_copy.ipynb +++ b/notebooks/dask_copy.ipynb @@ -33,7 +33,7 @@ "text/html": [ "\n", "
\n", - " \n", + " \n", " Loading BokehJS ...\n", "
" ] diff --git a/notebooks/dask_count_alleles.ipynb b/notebooks/dask_count_alleles.ipynb index 8ca462b232..8b9b7cec6e 100644 --- a/notebooks/dask_count_alleles.ipynb +++ b/notebooks/dask_count_alleles.ipynb @@ -26,7 +26,7 @@ "text/html": [ "\n", "
\n", - " \n", + " \n", " Loading BokehJS ...\n", "
" ] From af2600297d506778c7480b52df6d8efb9ac0bc4b Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Mon, 16 Jan 2023 14:37:22 +0100 Subject: [PATCH 031/213] 2.13.4/2.14.0 draft release notes (#1316) * 2.13.4/2.14.0 draft release notes * Correct typo * Fix headings * Fix issue markup * Fix bullet points * Re-arrange some PRs * Update docs/release.rst Co-authored-by: Sanket Verma * Update docs/release.rst Co-authored-by: Sanket Verma * Apply suggestions from code review Co-authored-by: Sanket Verma * Update docs/release.rst Co-authored-by: Sanket Verma Co-authored-by: Sanket Verma --- docs/release.rst | 120 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 113 insertions(+), 7 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index e63e7f8c22..7ffd751696 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,20 +6,58 @@ Release notes # to document your changes. On releases it will be # re-indented so that it does not show up in the notes. -.. _unreleased: + .. _unreleased: -Unreleased ----------- + Unreleased + ---------- .. # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. +.. _release_2.13.4: + +2.13.4 +------ + +Appreciation +~~~~~~~~~~~~~ + +Special thanks to Outreachy participants for contributing to most of the maintenance PRs. Please read the blog post summarising the contribution phase and welcoming new Outreachy interns: https://zarr.dev/blog/welcoming-outreachy-2022-interns/ + + +Enhancements +~~~~~~~~~~~~ + +* Handle fsspec.FSMap using FSStore store. + By :user:`Rafal Wojdyla ` :issue:`1304`. + +Bug fixes +~~~~~~~~~ + +* Fix bug that caused double counting of groups in ``groups()`` and ``group_keys()`` methods with V3 stores. + By :user:`Ryan Abernathey ` :issue:`1228`. + +* Remove unnecessary calling of `contains_array` for key that ended in `.array.json`. + By :user:`Joe Hamman ` :issue:`1149`. + * Fix bug that caused double counting of groups in ``groups()`` and ``group_keys()`` methods with V3 stores. By :user:`Ryan Abernathey ` :issue:`1228`. -* Handle fsspec.FSMap using FSStore store - By :user:`Rafal Wojdyla ` :issue:`1304`. +Documentation +~~~~~~~~~~~~~ + +* Fix minor indexing errors in tutorial and specification examples of documentation. + By :user:`Kola Babalola ` :issue:`1277`. + +* Add `requirements_rtfd.txt` in `contributing.rst`. + By :user:`AWA BRANDON AWA ` :issue:`1243`. + +* Add documentation for find/findall using visit. + By :user:`Weddy Gikunda ` :issue:`1241`. + +* Refresh of the main landing page. + By :user:`Josh Moore ` :issue:`1173`. Maintenance ~~~~~~~~~~~ @@ -54,9 +92,77 @@ Maintenance * Delete unused files. By :user:`John Kirkham ` :issue:`1251`. -* Uopdate web links: http:// → https:// - By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1313`. +* Skip labeller for bot PRs. + By :user:`Saransh Chopra ` :issue:`1271`. + +* Restore Flake8 configuration. + By :user:`John Kirkham ` :issue:`1249`. + +* Add missing newline at EOF. + By :user:`Dimitri Papadopoulos` :issue:`1253`. + +* Add `license_files` to `pyproject.toml`. + By :user:`John Kirkham ` :issue:`1247`. + +* Adding `pyupgrade` suggestions. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1225`. + +* Fixed some linting errors. + By :user:`Weddy Gikunda ` :issue:`1226`. + +* Added the link to main website in readthedocs sidebar. + By :user:`Stephanie_nkwatoh ` :issue:`1216`. + +* Remove redundant wheel dependency in `pyproject.toml`. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1233`. + +* Turned on `isloated_build` in `tox.ini` file. + By :user:`AWA BRANDON AWA ` :issue:`1210`. + +* Fixed `flake8` alert and avoid duplication of `Zarr Developers`. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1203`. + +* Bump to NumPy 1.20+ in `environment.yml`. + By :user:`John Kirkham ` :issue:`1201`. + +* Bump to NumPy 1.20 in `pyproject.toml`. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1192`. +* Remove LGTM (`.lgtm.yml`) configuration file. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1191`. + +* Codespell will skip `fixture` in pre-commit. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1197`. + +* Add msgpack in `requirements_rtfd.txt`. + By :user:`Emmanuel Bolarinwa ` :issue:`1188`. + +* Added license to docs fixed a typo from `_spec_v2` to `_spec_v3`. + By :user:`AWA BRANDON AWA ` :issue:`1182`. + +* Fixed installation link in `README.md`. + By :user:`AWA BRANDON AWA ` :issue:`1177`. + +* Fixed typos in `installation.rst` and `release.rst`. + By :user:`Chizoba Nweke ` :issue:`1178`. + +* Set `docs/conf.py` language to `en`. + By :user:`AWA BRANDON AWA ` :issue:`1174`. + +* Added `installation.rst` to the docs. + By :user:`AWA BRANDON AWA ` :issue:`1170`. + +* Adjustment of year to `2015-2018` to `2015-2022` in the docs. + By :user:`Emmanuel Bolarinwa ` :issue:`1165`. + +* Updated `Forking the repository` section in `contributing.rst`. + By :user:`AWA BRANDON AWA ` :issue:`1171`. + +* Updated GitHub actions. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1134`. + +* Uopdate web links: `http:// → https://`. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1313`. .. _release_2.13.3: From f0beb454f58331e50e7c34e7320e77ae89dd674e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Jan 2023 14:42:50 +0100 Subject: [PATCH 032/213] Bump pytest from 7.2.0 to 7.2.1 (#1317) Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.2.0 to 7.2.1. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.2.0...7.2.1) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index d5e0798e86..34d7d98e7e 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -5,4 +5,4 @@ numcodecs==0.11.0 msgpack-python==0.5.6 setuptools-scm==7.1.0 # test requirements -pytest==7.2.0 +pytest==7.2.1 From bf1d37293044535d32cb6655d06cc078e66b6a37 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Jan 2023 14:43:03 +0100 Subject: [PATCH 033/213] Bump redis from 4.4.0 to 4.4.2 (#1315) Bumps [redis](https://github.com/redis/redis-py) from 4.4.0 to 4.4.2. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.4.0...v4.4.2) --- updated-dependencies: - dependency-name: redis dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 05cc2b8730..e44c60e9c9 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.0.3 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.14.1 # pyup: ignore -redis==4.4.0 +redis==4.4.2 types-redis types-setuptools pymongo==4.3.3 From c9fe26273511395e9b295f17d17224f2cde52f3b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Jan 2023 14:43:25 +0100 Subject: [PATCH 034/213] Bump numpy from 1.24.0 to 1.24.1 (#1311) Bumps [numpy](https://github.com/numpy/numpy) from 1.24.0 to 1.24.1. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst) - [Commits](https://github.com/numpy/numpy/compare/v1.24.0...v1.24.1) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_numpy.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_numpy.txt b/requirements_dev_numpy.txt index 6b5b91b6c8..7d373a254d 100644 --- a/requirements_dev_numpy.txt +++ b/requirements_dev_numpy.txt @@ -1,4 +1,4 @@ # Break this out into a separate file to allow testing against # different versions of numpy. This file should pin to the latest # numpy version. -numpy==1.24.0 +numpy==1.24.1 From 876ccf2cd66135d53b3962dc6120c98624ba9d3c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Jan 2023 14:43:38 +0100 Subject: [PATCH 035/213] Bump ipywidgets from 8.0.3 to 8.0.4 (#1307) Bumps [ipywidgets](https://github.com/jupyter-widgets/ipywidgets) from 8.0.3 to 8.0.4. - [Release notes](https://github.com/jupyter-widgets/ipywidgets/releases) - [Commits](https://github.com/jupyter-widgets/ipywidgets/compare/8.0.3...8.0.4) --- updated-dependencies: - dependency-name: ipywidgets dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index e44c60e9c9..5d7dc3398c 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -3,7 +3,7 @@ lmdb==1.4.0; sys_platform != 'win32' # optional library requirements for Jupyter ipytree==0.2.2 -ipywidgets==8.0.3 +ipywidgets==8.0.4 # optional library requirements for services # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) From 1fd607a9fc439545d5ed43305c49a1b42b1c3d37 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Mon, 16 Jan 2023 09:53:12 -0600 Subject: [PATCH 036/213] Ensure `zarr.create` uses writeable mode (#1309) * Ensure zarr.create uses writeable mode * Update release.rst Added release notes for [#1309](https://github.com/zarr-developers/zarr-python/pull/1309) * Switch to bug fix Co-authored-by: Josh Moore Co-authored-by: Sanket Verma --- docs/release.rst | 9 +++++++++ zarr/creation.py | 2 +- zarr/tests/test_creation.py | 14 +++++++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 7ffd751696..d6692a01d9 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -16,6 +16,15 @@ Release notes .. _release_2.13.4: +2.13.5 +------ + +Bug fixes +~~~~~~~~~ + +* Ensure ``zarr.create`` uses writeable mode to fix issue with :issue:`1304`. + By :user:`James Bourbeau ` :issue:`1309`. + 2.13.4 ------ diff --git a/zarr/creation.py b/zarr/creation.py index 00d2c40030..cc191e3734 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -145,7 +145,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', zarr_version = getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) # handle polymorphic store arg - store = normalize_store_arg(store, zarr_version=zarr_version) + store = normalize_store_arg(store, zarr_version=zarr_version, mode="w") zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) # API compatibility with h5py diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index 0f12fc5613..4c9c292734 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -19,7 +19,7 @@ from zarr._storage.store import v3_api_available from zarr._storage.v3 import DirectoryStoreV3, KVStoreV3 from zarr.sync import ThreadSynchronizer -from zarr.tests.util import mktemp +from zarr.tests.util import mktemp, have_fsspec _VERSIONS = ((None, 2, 3) if v3_api_available else (None, 2)) _VERSIONS2 = ((2, 3) if v3_api_available else (2, )) @@ -429,6 +429,18 @@ def test_create_in_dict(zarr_version, at_root): assert isinstance(a.store, expected_store_type) +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize('at_root', [False, True]) +def test_create_writeable_mode(zarr_version, at_root, tmp_path): + # Regression test for https://github.com/zarr-developers/zarr-python/issues/1306 + import fsspec + kwargs = _init_creation_kwargs(zarr_version, at_root) + store = fsspec.get_mapper(str(tmp_path)) + z = create(100, store=store, **kwargs) + assert z.store.map == store + + @pytest.mark.parametrize('zarr_version', _VERSIONS) @pytest.mark.parametrize('at_root', [False, True]) def test_empty_like(zarr_version, at_root): From df6e07193f6794b25c0f71d5dc631e7caf449321 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Mon, 16 Jan 2023 17:13:05 +0100 Subject: [PATCH 037/213] Bump pypi action (#1320) 2.13.4 and 2.13.5 were not released due to the following error: ``` Error: Unable to resolve action `pypa/gh-action-pypi-publish@v1`, unable to find version `v1` ``` --- .github/workflows/releases.yml | 2 +- docs/release.rst | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 1bcf79ef5f..d1479d43e1 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -64,7 +64,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1 + - uses: pypa/gh-action-pypi-publish@v1.6.4 with: user: __token__ password: ${{ secrets.pypi_password }} diff --git a/docs/release.rst b/docs/release.rst index d6692a01d9..817bdc4f37 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -14,7 +14,18 @@ Release notes # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. -.. _release_2.13.4: +.. _release_2.13.6: + +2.13.6 +------ + +Maintenance +~~~~~~~~~~~ + +* Bump gh-action-pypi-publish to 1.6.4. + By :user:`Josh Moore ` :issue:`1320`. + +.. _release_2.13.5: 2.13.5 ------ @@ -25,6 +36,8 @@ Bug fixes * Ensure ``zarr.create`` uses writeable mode to fix issue with :issue:`1304`. By :user:`James Bourbeau ` :issue:`1309`. +.. _release_2.13.4: + 2.13.4 ------ From 385b5d3635618e086eb4752f81c652379751a5ad Mon Sep 17 00:00:00 2001 From: Jonathan Striebel Date: Mon, 16 Jan 2023 17:37:33 +0100 Subject: [PATCH 038/213] add storage_transformers and get/set_partial_values (#1096) * add storage_transformers and get/set_partial_values * formatting * add docs and release notes * add test_core testcase * Update zarr/creation.py Co-authored-by: Gregory Lee * apply PR feedback * add comment that storage_transformers=None is the same as storage_transformers=[] * use empty tuple as default for storage_transformers * make mypy happy * better coverage, minor fix, adding rmdir * add missing rmdir to test * increase coverage * improve test coverage * fix TestArrayWithStorageTransformersV3 * Update zarr/creation.py Co-authored-by: Gregory Lee * pick generic storage transformer changes from #1111 * increase coverage * fix order of storage transformers * retrigger CI * minor fixes * make flake8 happy * apply PR feedback Co-authored-by: Gregory Lee Co-authored-by: Josh Moore --- docs/release.rst | 17 ++- zarr/_storage/store.py | 225 +++++++++++++++++++++++++++++++++- zarr/core.py | 26 +++- zarr/creation.py | 12 +- zarr/meta.py | 48 +++++++- zarr/storage.py | 9 +- zarr/tests/test_core.py | 34 ++++- zarr/tests/test_creation.py | 15 +++ zarr/tests/test_storage_v3.py | 125 ++++++++++++++++++- 9 files changed, 493 insertions(+), 18 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 817bdc4f37..f633aea7cc 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,14 +6,20 @@ Release notes # to document your changes. On releases it will be # re-indented so that it does not show up in the notes. - .. _unreleased: +.. _unreleased: - Unreleased - ---------- +Unreleased +---------- .. # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. +* Improve Zarr V3 support, adding partial store read/write and storage transformers. + Add two features of the [v3 spec](https://zarr-specs.readthedocs.io/en/latest/core/v3.0.html): + * storage transformers + * `get_partial_values` and `set_partial_values` + By :user:`Jonathan Striebel `; :issue:`1096`. + .. _release_2.13.6: 2.13.6 @@ -44,7 +50,10 @@ Bug fixes Appreciation ~~~~~~~~~~~~~ -Special thanks to Outreachy participants for contributing to most of the maintenance PRs. Please read the blog post summarising the contribution phase and welcoming new Outreachy interns: https://zarr.dev/blog/welcoming-outreachy-2022-interns/ +Special thanks to Outreachy participants for contributing to most of the +maintenance PRs. Please read the blog post summarising the contribution phase +and welcoming new Outreachy interns: +https://zarr.dev/blog/welcoming-outreachy-2022-interns/ Enhancements diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 9e265cf383..4d813b8e05 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -1,8 +1,10 @@ import abc import os +from collections import defaultdict from collections.abc import MutableMapping +from copy import copy from string import ascii_letters, digits -from typing import Any, List, Mapping, Optional, Union +from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Union from zarr.meta import Metadata2, Metadata3 from zarr.util import normalize_storage_path @@ -254,6 +256,82 @@ def __setitem__(self, key, value): def __getitem__(self, key): """Get a value.""" + @abc.abstractmethod + def rmdir(self, path=None): + """Remove a data path and all its subkeys and related metadata. + Expects a path without the data or meta root prefix.""" + + @property + def supports_efficient_get_partial_values(self): + return False + + def get_partial_values( + self, + key_ranges: Sequence[Tuple[str, Tuple[int, Optional[int]]]] + ) -> List[Union[bytes, memoryview, bytearray]]: + """Get multiple partial values. + key_ranges can be an iterable of key, range pairs, + where a range specifies two integers range_start and range_length + as a tuple, (range_start, range_length). + range_length may be None to indicate to read until the end. + range_start may be negative to start reading range_start bytes + from the end of the file. + A key may occur multiple times with different ranges. + Inserts None for missing keys into the returned list.""" + results: List[Union[bytes, memoryview, bytearray]] = ( + [None] * len(key_ranges) # type: ignore[list-item] + ) + indexed_ranges_by_key: Dict[str, List[Tuple[int, Tuple[int, Optional[int]]]]] = ( + defaultdict(list) + ) + for i, (key, range_) in enumerate(key_ranges): + indexed_ranges_by_key[key].append((i, range_)) + for key, indexed_ranges in indexed_ranges_by_key.items(): + try: + value = self[key] + except KeyError: # pragma: no cover + continue + for i, (range_from, range_length) in indexed_ranges: + if range_length is None: + results[i] = value[range_from:] + else: + results[i] = value[range_from:range_from + range_length] + return results + + def supports_efficient_set_partial_values(self): + return False + + def set_partial_values(self, key_start_values): + """Set multiple partial values. + key_start_values can be an iterable of key, start and value triplets + as tuples, (key, start, value), where start defines the offset in bytes. + A key may occur multiple times with different starts and non-overlapping values. + Also, start may only be beyond the current value if other values fill the gap. + start may be negative to start writing start bytes from the current + end of the file, ending the file with the new value.""" + unique_keys = set(next(zip(*key_start_values))) + values = {} + for key in unique_keys: + old_value = self.get(key) + values[key] = None if old_value is None else bytearray(old_value) + for key, start, value in key_start_values: + if values[key] is None: + assert start == 0 + values[key] = value + else: + if start > len(values[key]): # pragma: no cover + raise ValueError( + f"Cannot set value at start {start}, " + + f"since it is beyond the data at key {key}, " + + f"having length {len(values[key])}." + ) + if start < 0: + values[key][start:] = value + else: + values[key][start:start + len(value)] = value + for key, value in values.items(): + self[key] = value + def clear(self): """Remove all items from store.""" self.erase_prefix("/") @@ -303,6 +381,151 @@ def _ensure_store(store): ) +class StorageTransformer(MutableMapping, abc.ABC): + """Base class for storage transformers. The methods simply pass on the data as-is + and should be overwritten by sub-classes.""" + + _store_version = 3 + _metadata_class = Metadata3 + + def __init__(self, _type) -> None: + if _type not in self.valid_types: # pragma: no cover + raise ValueError( + f"Storage transformer cannot be initialized with type {_type}, " + + f"must be one of {list(self.valid_types)}." + ) + self.type = _type + self._inner_store = None + + def _copy_for_array(self, array, inner_store): + transformer_copy = copy(self) + transformer_copy._inner_store = inner_store + return transformer_copy + + @abc.abstractproperty + def extension_uri(self): + pass # pragma: no cover + + @abc.abstractproperty + def valid_types(self): + pass # pragma: no cover + + def get_config(self): + """Return a dictionary holding configuration parameters for this + storage transformer. All values must be compatible with JSON encoding.""" + # Override in sub-class if need special encoding of config values. + # By default, assume all non-private members are configuration + # parameters except for type . + return { + k: v for k, v in self.__dict__.items() + if not k.startswith('_') and k != "type" + } + + @classmethod + def from_config(cls, _type, config): + """Instantiate storage transformer from a configuration object.""" + # override in sub-class if need special decoding of config values + + # by default, assume constructor accepts configuration parameters as + # keyword arguments without any special decoding + return cls(_type, **config) + + @property + def inner_store(self) -> Union["StorageTransformer", StoreV3]: + assert self._inner_store is not None, ( + "inner_store is not initialized, first get a copy via _copy_for_array." + ) + return self._inner_store + + # The following implementations are usually fine to keep as-is: + + def __eq__(self, other): + return ( + type(self) == type(other) and + self._inner_store == other._inner_store and + self.get_config() == other.get_config() + ) + + def erase(self, key): + self.__delitem__(key) + + def list(self): + return list(self.keys()) + + def list_dir(self, prefix): + return StoreV3.list_dir(self, prefix) + + def is_readable(self): + return self.inner_store.is_readable() + + def is_writeable(self): + return self.inner_store.is_writeable() + + def is_listable(self): + return self.inner_store.is_listable() + + def is_erasable(self): + return self.inner_store.is_erasable() + + def clear(self): + return self.inner_store.clear() + + def __enter__(self): + return self.inner_store.__enter__() + + def __exit__(self, exc_type, exc_value, traceback): + return self.inner_store.__exit__(exc_type, exc_value, traceback) + + def close(self) -> None: + return self.inner_store.close() + + # The following implementations might need to be re-implemented + # by subclasses implementing storage transformers: + + def rename(self, src_path: str, dst_path: str) -> None: + return self.inner_store.rename(src_path, dst_path) + + def list_prefix(self, prefix): + return self.inner_store.list_prefix(prefix) + + def erase_prefix(self, prefix): + return self.inner_store.erase_prefix(prefix) + + def rmdir(self, path=None): + return self.inner_store.rmdir(path) + + def __contains__(self, key): + return self.inner_store.__contains__(key) + + def __setitem__(self, key, value): + return self.inner_store.__setitem__(key, value) + + def __getitem__(self, key): + return self.inner_store.__getitem__(key) + + def __delitem__(self, key): + return self.inner_store.__delitem__(key) + + def __iter__(self): + return self.inner_store.__iter__() + + def __len__(self): + return self.inner_store.__len__() + + @property + def supports_efficient_get_partial_values(self): + return self.inner_store.supports_efficient_get_partial_values + + def get_partial_values(self, key_ranges): + return self.inner_store.get_partial_values(key_ranges) + + def supports_efficient_set_partial_values(self): + return self.inner_store.supports_efficient_set_partial_values() + + def set_partial_values(self, key_start_values): + return self.inner_store.set_partial_values(key_start_values) + + # allow MutableMapping for backwards compatibility StoreLike = Union[BaseStore, MutableMapping] diff --git a/zarr/core.py b/zarr/core.py index e5b2045160..5d37570831 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -189,6 +189,7 @@ def __init__( self._store = store self._chunk_store = chunk_store + self._transformed_chunk_store = None self._path = normalize_storage_path(path) if self._path: self._key_prefix = self._path + '/' @@ -292,6 +293,16 @@ def _load_metadata_nosync(self): filters = [get_codec(config) for config in filters] self._filters = filters + if self._version == 3: + storage_transformers = meta.get('storage_transformers', []) + if storage_transformers: + transformed_store = self._chunk_store or self._store + for storage_transformer in storage_transformers[::-1]: + transformed_store = storage_transformer._copy_for_array( + self, transformed_store + ) + self._transformed_chunk_store = transformed_store + def _refresh_metadata(self): if not self._cache_metadata: self._load_metadata() @@ -371,10 +382,12 @@ def read_only(self, value): @property def chunk_store(self): """A MutableMapping providing the underlying storage for array chunks.""" - if self._chunk_store is None: - return self._store - else: + if self._transformed_chunk_store is not None: + return self._transformed_chunk_store + elif self._chunk_store is not None: return self._chunk_store + else: + return self._store @property def shape(self): @@ -1800,7 +1813,7 @@ def _set_selection(self, indexer, value, fields=None): check_array_shape('value', value, sel_shape) # iterate over chunks in range - if not hasattr(self.store, "setitems") or self._synchronizer is not None \ + if not hasattr(self.chunk_store, "setitems") or self._synchronizer is not None \ or any(map(lambda x: x == 0, self.shape)): # iterative approach for chunk_coords, chunk_selection, out_selection in indexer: @@ -2229,7 +2242,10 @@ def _encode_chunk(self, chunk): cdata = chunk # ensure in-memory data is immutable and easy to compare - if isinstance(self.chunk_store, KVStore): + if ( + isinstance(self.chunk_store, KVStore) + or isinstance(self._chunk_store, KVStore) + ): cdata = ensure_bytes(cdata) return cdata diff --git a/zarr/creation.py b/zarr/creation.py index cc191e3734..a6fa8e44cc 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -22,7 +22,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', overwrite=False, path=None, chunk_store=None, filters=None, cache_metadata=True, cache_attrs=True, read_only=False, object_codec=None, dimension_separator=None, write_empty_chunks=True, - *, zarr_version=None, meta_array=None, **kwargs): + *, zarr_version=None, meta_array=None, storage_transformers=(), **kwargs): """Create an array. Parameters @@ -85,6 +85,14 @@ def create(shape, chunks=True, dtype=None, compressor='default', .. versionadded:: 2.11 + storage_transformers : sequence of StorageTransformers, optional + Setting storage transformers, changes the storage structure and behaviour + of data coming from the underlying store. The transformers are applied in the + order of the given sequence. Supplying an empty sequence is the same as omitting + the argument or setting it to None. May only be set when using zarr_version 3. + + .. versionadded:: 2.13 + zarr_version : {None, 2, 3}, optional The zarr protocol version of the created array. If None, it will be inferred from ``store`` or ``chunk_store`` if they are provided, @@ -170,7 +178,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, overwrite=overwrite, path=path, chunk_store=chunk_store, filters=filters, object_codec=object_codec, - dimension_separator=dimension_separator) + dimension_separator=dimension_separator, storage_transformers=storage_transformers) # instantiate array z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer, diff --git a/zarr/meta.py b/zarr/meta.py index 77c55b9871..41a90101b5 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -9,7 +9,11 @@ from zarr.errors import MetadataError from zarr.util import json_dumps, json_loads -from typing import cast, Union, Any, List, Mapping as MappingType, Optional +from typing import cast, Union, Any, List, Mapping as MappingType, Optional, TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from zarr._storage.store import StorageTransformer + ZARR_FORMAT = 2 ZARR_FORMAT_v3 = 3 @@ -459,6 +463,36 @@ def _decode_codec_metadata(cls, meta: Optional[Mapping]) -> Optional[Codec]: return codec + @classmethod + def _encode_storage_transformer_metadata( + cls, + storage_transformer: "StorageTransformer" + ) -> Optional[Mapping]: + return { + "extension": storage_transformer.extension_uri, + "type": storage_transformer.type, + "configuration": storage_transformer.get_config(), + } + + @classmethod + def _decode_storage_transformer_metadata(cls, meta: Mapping) -> "StorageTransformer": + from zarr.tests.test_storage_v3 import DummyStorageTransfomer + + # This might be changed to a proper registry in the future + KNOWN_STORAGE_TRANSFORMERS = [DummyStorageTransfomer] + + conf = meta.get('configuration', {}) + extension_uri = meta['extension'] + transformer_type = meta['type'] + + for StorageTransformerCls in KNOWN_STORAGE_TRANSFORMERS: + if StorageTransformerCls.extension_uri == extension_uri: + break + else: # pragma: no cover + raise NotImplementedError + + return StorageTransformerCls.from_config(transformer_type, conf) + @classmethod def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: meta = cls.parse_metadata(s) @@ -476,6 +510,10 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A # TODO: remove dimension_separator? compressor = cls._decode_codec_metadata(meta.get("compressor", None)) + storage_transformers = meta.get("storage_transformers", ()) + storage_transformers = [ + cls._decode_storage_transformer_metadata(i) for i in storage_transformers + ] extensions = meta.get("extensions", []) meta = dict( shape=tuple(meta["shape"]), @@ -493,6 +531,8 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A # compressor field should be absent when there is no compression if compressor: meta['compressor'] = compressor + if storage_transformers: + meta['storage_transformers'] = storage_transformers except Exception as e: raise MetadataError("error decoding metadata: %s" % e) @@ -514,6 +554,10 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: object_codec = None compressor = cls._encode_codec_metadata(meta.get("compressor", None)) + storage_transformers = meta.get("storage_transformers", ()) + storage_transformers = [ + cls._encode_storage_transformer_metadata(i) for i in storage_transformers + ] extensions = meta.get("extensions", []) meta = dict( shape=meta["shape"] + sdshape, @@ -532,6 +576,8 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: meta["compressor"] = compressor if dimension_separator: meta["dimension_separator"] = dimension_separator + if storage_transformers: + meta["storage_transformers"] = storage_transformers return json_dumps(meta) diff --git a/zarr/storage.py b/zarr/storage.py index a2a8919d0b..db51cca947 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -311,6 +311,7 @@ def init_array( filters=None, object_codec=None, dimension_separator=None, + storage_transformers=(), ): """Initialize an array store with the given configuration. Note that this is a low-level function and there should be no need to call this directly from user code. @@ -438,7 +439,8 @@ def init_array( order=order, overwrite=overwrite, path=path, chunk_store=chunk_store, filters=filters, object_codec=object_codec, - dimension_separator=dimension_separator) + dimension_separator=dimension_separator, + storage_transformers=storage_transformers) def _init_array_metadata( @@ -455,6 +457,7 @@ def _init_array_metadata( filters=None, object_codec=None, dimension_separator=None, + storage_transformers=(), ): store_version = getattr(store, '_store_version', 2) @@ -576,6 +579,7 @@ def _init_array_metadata( if store_version < 3: meta.update(dict(chunks=chunks, dtype=dtype, order=order, filters=filters_config)) + assert not storage_transformers else: if dimension_separator is None: dimension_separator = "/" @@ -589,7 +593,8 @@ def _init_array_metadata( separator=dimension_separator), chunk_memory_layout=order, data_type=dtype, - attributes=attributes) + attributes=attributes, + storage_transformers=storage_transformers) ) key = _prefix_to_array_key(store, _path_to_prefix(path)) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index e32026e662..ffacefb937 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -49,9 +49,11 @@ KVStoreV3, LMDBStoreV3, LRUStoreCacheV3, + RmdirV3, SQLiteStoreV3, StoreV3, ) +from zarr.tests.test_storage_v3 import DummyStorageTransfomer from zarr.util import buffer_size from zarr.tests.util import abs_container, skip_test_env_var, have_fsspec, mktemp @@ -3098,7 +3100,7 @@ def test_nbytes_stored(self): # Note: this custom mapping doesn't actually have all methods in the # v3 spec (e.g. erase), but they aren't needed here. -class CustomMappingV3(StoreV3): +class CustomMappingV3(RmdirV3, StoreV3): def __init__(self): self.inner = KVStoreV3(dict()) @@ -3359,6 +3361,36 @@ def expected(self): ] +@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") +class TestArrayWithStorageTransformersV3(TestArrayWithChunkStoreV3): + + @staticmethod + def create_array(array_path='arr1', read_only=False, **kwargs): + store = KVStoreV3(dict()) + # separate chunk store + chunk_store = KVStoreV3(dict()) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + write_empty_chunks = kwargs.pop('write_empty_chunks', True) + dummy_storage_transformer = DummyStorageTransfomer( + "dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT + ) + init_array(store, path=array_path, chunk_store=chunk_store, + storage_transformers=[dummy_storage_transformer], **kwargs) + return Array(store, path=array_path, read_only=read_only, + chunk_store=chunk_store, cache_metadata=cache_metadata, + cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + + def expected(self): + return [ + "3fb9a4f8233b09ad02067b6b7fc9fd5caa405c7d", + "89c8eb364beb84919fc9153d2c1ed2696274ec18", + "73307055c3aec095dd1232c38d793ef82a06bd97", + "6152c09255a5efa43b1a115546e35affa00c138c", + "2f8802fc391f67f713302e84fad4fd8f1366d6c2", + ] + + @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") def test_array_mismatched_store_versions(): store_v3 = KVStoreV3(dict()) diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index 4c9c292734..b791bc3952 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -19,8 +19,10 @@ from zarr._storage.store import v3_api_available from zarr._storage.v3 import DirectoryStoreV3, KVStoreV3 from zarr.sync import ThreadSynchronizer +from zarr.tests.test_storage_v3 import DummyStorageTransfomer from zarr.tests.util import mktemp, have_fsspec + _VERSIONS = ((None, 2, 3) if v3_api_available else (None, 2)) _VERSIONS2 = ((2, 3) if v3_api_available else (2, )) @@ -747,3 +749,16 @@ def test_create_read_only(zarr_version, at_root): def test_json_dumps_chunks_numpy_dtype(): z = zeros((10,), chunks=(np.int64(2),)) assert np.all(z[...] == 0) + + +@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") +@pytest.mark.parametrize('at_root', [False, True]) +def test_create_with_storage_transformers(at_root): + kwargs = _init_creation_kwargs(zarr_version=3, at_root=at_root) + transformer = DummyStorageTransfomer( + "dummy_type", + test_value=DummyStorageTransfomer.TEST_CONSTANT + ) + z = create(1000000000, chunks=True, storage_transformers=[transformer], **kwargs) + assert isinstance(z.chunk_store, DummyStorageTransfomer) + assert z.chunk_store.test_value == DummyStorageTransfomer.TEST_CONSTANT diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 4f6215135c..9f18c89361 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -1,6 +1,7 @@ import array import atexit import copy +import inspect import os import tempfile @@ -8,7 +9,7 @@ import pytest import zarr -from zarr._storage.store import _get_hierarchy_metadata, v3_api_available +from zarr._storage.store import _get_hierarchy_metadata, v3_api_available, StorageTransformer from zarr.meta import _default_entry_point_metadata_v3 from zarr.storage import (atexit_rmglob, atexit_rmtree, data_root, default_compressor, getsize, init_array, meta_root, @@ -88,6 +89,18 @@ def keys(self): """keys""" +class DummyStorageTransfomer(StorageTransformer): + TEST_CONSTANT = "test1234" + + extension_uri = "https://purl.org/zarr/spec/storage_transformers/dummy/1.0" + valid_types = ["dummy_type"] + + def __init__(self, _type, test_value) -> None: + super().__init__(_type) + assert test_value == self.TEST_CONSTANT + self.test_value = test_value + + def test_ensure_store_v3(): class InvalidStore: pass @@ -190,8 +203,11 @@ def test_init_array(self, dimension_separator_fixture_v3): store = self.create_store() path = 'arr1' + transformer = DummyStorageTransfomer( + "dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT + ) init_array(store, path=path, shape=1000, chunks=100, - dimension_separator=pass_dim_sep) + dimension_separator=pass_dim_sep, storage_transformers=[transformer]) # check metadata mkey = meta_root + path + '.array.json' @@ -204,6 +220,9 @@ def test_init_array(self, dimension_separator_fixture_v3): assert meta['fill_value'] is None # Missing MUST be assumed to be "/" assert meta['chunk_grid']['separator'] is want_dim_sep + assert len(meta["storage_transformers"]) == 1 + assert isinstance(meta["storage_transformers"][0], DummyStorageTransfomer) + assert meta["storage_transformers"][0].test_value == DummyStorageTransfomer.TEST_CONSTANT store.close() def test_list_prefix(self): @@ -235,6 +254,67 @@ def test_rename_nonexisting(self): with pytest.raises(NotImplementedError): store.rename('a', 'b') + def test_get_partial_values(self): + store = self.create_store() + store.supports_efficient_get_partial_values in [True, False] + store[data_root + 'foo'] = b'abcdefg' + store[data_root + 'baz'] = b'z' + assert [b'a'] == store.get_partial_values( + [ + (data_root + 'foo', (0, 1)) + ] + ) + assert [ + b'd', b'b', b'z', b'abc', b'defg', b'defg', b'g', b'ef' + ] == store.get_partial_values( + [ + (data_root + 'foo', (3, 1)), + (data_root + 'foo', (1, 1)), + (data_root + 'baz', (0, 1)), + (data_root + 'foo', (0, 3)), + (data_root + 'foo', (3, 4)), + (data_root + 'foo', (3, None)), + (data_root + 'foo', (-1, None)), + (data_root + 'foo', (-3, 2)), + ] + ) + + def test_set_partial_values(self): + store = self.create_store() + store.supports_efficient_set_partial_values() + store[data_root + 'foo'] = b'abcdefg' + store.set_partial_values( + [ + (data_root + 'foo', 0, b'hey') + ] + ) + assert store[data_root + 'foo'] == b'heydefg' + + store.set_partial_values( + [ + (data_root + 'baz', 0, b'z') + ] + ) + assert store[data_root + 'baz'] == b'z' + store.set_partial_values( + [ + (data_root + 'foo', 1, b'oo'), + (data_root + 'baz', 1, b'zzz'), + (data_root + 'baz', 4, b'aaaa'), + (data_root + 'foo', 6, b'done'), + ] + ) + assert store[data_root + 'foo'] == b'hoodefdone' + assert store[data_root + 'baz'] == b'zzzzaaaa' + store.set_partial_values( + [ + (data_root + 'foo', -2, b'NE'), + (data_root + 'baz', -5, b'q'), + ] + ) + assert store[data_root + 'foo'] == b'hoodefdoNE' + assert store[data_root + 'baz'] == b'zzzq' + class TestMappingStoreV3(StoreV3Tests): @@ -443,6 +523,31 @@ def create_store(self, **kwargs): return store +class TestStorageTransformerV3(TestMappingStoreV3): + + def create_store(self, **kwargs): + inner_store = super().create_store(**kwargs) + storage_transformer = DummyStorageTransfomer( + "dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT + ) + return storage_transformer._copy_for_array(None, inner_store) + + def test_method_forwarding(self): + store = self.create_store() + assert store.list() == store.inner_store.list() + assert store.list_dir(data_root) == store.inner_store.list_dir(data_root) + + assert store.is_readable() + assert store.is_writeable() + assert store.is_listable() + store.inner_store._readable = False + store.inner_store._writeable = False + store.inner_store._listable = False + assert not store.is_readable() + assert not store.is_writeable() + assert not store.is_listable() + + class TestLRUStoreCacheV3(_TestLRUStoreCache, StoreV3Tests): CountingClass = CountingDictV3 @@ -535,3 +640,19 @@ def test_top_level_imports(): assert hasattr(zarr, store_name) # pragma: no cover else: assert not hasattr(zarr, store_name) # pragma: no cover + + +def _get_public_and_dunder_methods(some_class): + return set( + name for name, _ in inspect.getmembers(some_class, predicate=inspect.isfunction) + if not name.startswith("_") or name.startswith("__") + ) + + +def test_storage_transformer_interface(): + store_v3_methods = _get_public_and_dunder_methods(StoreV3) + store_v3_methods.discard("__init__") + storage_transformer_methods = _get_public_and_dunder_methods(StorageTransformer) + storage_transformer_methods.discard("__init__") + storage_transformer_methods.discard("get_config") + assert storage_transformer_methods == store_v3_methods From b9e9f5aaa44ca564835c3c242937236ea04aecb2 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 19 Jan 2023 19:55:53 +0100 Subject: [PATCH 039/213] FSStore: use `ensure_bytes()` (#1285) --- zarr/storage.py | 12 +++++++++--- zarr/tests/test_core.py | 25 +++++++++++++++++++++++++ zarr/util.py | 36 +++++++++++++++++++++++++++++++++--- 3 files changed, 67 insertions(+), 6 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index db51cca947..5f7b991aef 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -55,8 +55,8 @@ from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks, normalize_dimension_separator, normalize_dtype, normalize_fill_value, normalize_order, - normalize_shape, normalize_storage_path, retry_call - ) + normalize_shape, normalize_storage_path, retry_call, + ensure_contiguous_ndarray_or_bytes) from zarr._storage.absstore import ABSStore # noqa: F401 from zarr._storage.store import (_get_hierarchy_metadata, # noqa: F401 @@ -1395,13 +1395,19 @@ def __getitem__(self, key): def setitems(self, values): if self.mode == 'r': raise ReadOnlyError() - values = {self._normalize_key(key): val for key, val in values.items()} + + # Normalize keys and make sure the values are bytes + values = { + self._normalize_key(key): ensure_contiguous_ndarray_or_bytes(val) + for key, val in values.items() + } self.map.setitems(values) def __setitem__(self, key, value): if self.mode == 'r': raise ReadOnlyError() key = self._normalize_key(key) + value = ensure_contiguous_ndarray_or_bytes(value) path = self.dir_path(key) try: if self.fs.isdir(path): diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index ffacefb937..a9d674e2d9 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -17,6 +17,7 @@ from numpy.testing import assert_array_almost_equal, assert_array_equal from pkg_resources import parse_version +import zarr from zarr._storage.store import ( v3_api_available, ) @@ -3409,3 +3410,27 @@ def test_array_mismatched_store_versions(): Array(store_v3, path='dataset', read_only=False, chunk_store=chunk_store_v2) with pytest.raises(ValueError): Array(store_v2, path='dataset', read_only=False, chunk_store=chunk_store_v3) + + +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +def test_issue_1279(tmpdir): + """See """ + + data = np.arange(25).reshape((5, 5)) + ds = zarr.create( + shape=data.shape, + chunks=(5, 5), + dtype=data.dtype, + compressor=(None), + store=FSStore(url=str(tmpdir), mode="a"), + order="F", + ) + + ds[:] = data + + ds_reopened = zarr.open_array( + store=FSStore(url=str(tmpdir), mode="r") + ) + + written_data = ds_reopened[:] + assert_array_equal(data, written_data) diff --git a/zarr/util.py b/zarr/util.py index 9fcdac9df7..dfbb551651 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -5,17 +5,22 @@ from textwrap import TextWrapper import mmap import time +from typing import Any, Callable, Dict, Optional, Tuple, Union import numpy as np from asciitree import BoxStyle, LeftAligned from asciitree.traversal import Traversal from collections.abc import Iterable -from numcodecs.compat import ensure_text, ensure_ndarray_like +from numcodecs.compat import ( + ensure_text, + ensure_ndarray_like, + ensure_bytes, + ensure_contiguous_ndarray_like +) +from numcodecs.ndarray_like import NDArrayLike from numcodecs.registry import codec_registry from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo -from typing import Any, Callable, Dict, Optional, Tuple, Union - def flatten(arg: Iterable) -> Iterable: for element in arg: @@ -696,3 +701,28 @@ def all_equal(value: Any, array: Any): # using == raises warnings from numpy deprecated pattern, but # using np.equal() raises type errors for structured dtypes... return np.all(value == array) + + +def ensure_contiguous_ndarray_or_bytes(buf) -> Union[NDArrayLike, bytes]: + """Convenience function to coerce `buf` to ndarray-like array or bytes. + + First check if `buf` can be zero-copy converted to a contiguous array. + If not, `buf` will be copied to a newly allocated `bytes` object. + + Parameters + ---------- + buf : ndarray-like, array-like, or bytes-like + A numpy array like object such as numpy.ndarray, cupy.ndarray, or + any object exporting a buffer interface. + + Returns + ------- + arr : NDArrayLike or bytes + A ndarray-like or bytes object + """ + + try: + return ensure_contiguous_ndarray_like(buf) + except TypeError: + # An error is raised if `buf` couldn't be zero-copy converted + return ensure_bytes(buf) From c45e8709f2f55d5635ff8587f0295e334d8872ee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Jan 2023 16:42:32 +0100 Subject: [PATCH 040/213] Bump fsspec from 2022.11.0 to 2023.1.0 (#1327) * Bump fsspec from 2022.11.0 to 2023.1.0 Bumps [fsspec](https://github.com/fsspec/filesystem_spec) from 2022.11.0 to 2023.1.0. - [Release notes](https://github.com/fsspec/filesystem_spec/releases) - [Commits](https://github.com/fsspec/filesystem_spec/compare/2022.11.0...2023.1.0) --- updated-dependencies: - dependency-name: fsspec dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Bump s3fs as well Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 5d7dc3398c..0c7cbf44ca 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,6 +18,6 @@ pytest-cov==4.0.0 pytest-doctestplus==0.12.1 pytest-timeout==2.1.0 h5py==3.7.0 -fsspec==2022.11.0 -s3fs==2022.11.0 +fsspec==2023.1.0 +s3fs==2023.1.0 moto[server]>=4.0.8 From 1793da01edb9890955a383fc84121334e3aa4cc0 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Fri, 20 Jan 2023 10:48:28 -0800 Subject: [PATCH 041/213] Add FSStore contiguous bug fix note (#1325) --- docs/release.rst | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index f633aea7cc..f82c0730dd 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -14,12 +14,21 @@ Unreleased # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. -* Improve Zarr V3 support, adding partial store read/write and storage transformers. - Add two features of the [v3 spec](https://zarr-specs.readthedocs.io/en/latest/core/v3.0.html): - * storage transformers - * `get_partial_values` and `set_partial_values` + +Major changes +~~~~~~~~~~~~~ + +* Improve `Zarr V3 support `_ + adding partial store read/write and storage transformers. By :user:`Jonathan Striebel `; :issue:`1096`. + +Bug fixes +~~~~~~~~~ + +* Ensure contiguous data is give to ``FSStore``. Only copying if needed. + By :user:`Mads R. B. Kristensen ` :issue:`1285`. + .. _release_2.13.6: 2.13.6 From 0bf0b3b3444dd57debc8a6b5eacb6eb5c082c668 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Jan 2023 10:43:17 +0100 Subject: [PATCH 042/213] Bump h5py from 3.7.0 to 3.8.0 (#1330) Bumps [h5py](https://github.com/h5py/h5py) from 3.7.0 to 3.8.0. - [Release notes](https://github.com/h5py/h5py/releases) - [Changelog](https://github.com/h5py/h5py/blob/master/docs/release_guide.rst) - [Commits](https://github.com/h5py/h5py/compare/3.7.0...3.8.0) --- updated-dependencies: - dependency-name: h5py dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 0c7cbf44ca..0cf6661d1e 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -17,7 +17,7 @@ coverage pytest-cov==4.0.0 pytest-doctestplus==0.12.1 pytest-timeout==2.1.0 -h5py==3.7.0 +h5py==3.8.0 fsspec==2023.1.0 s3fs==2023.1.0 moto[server]>=4.0.8 From 6f11ae78b142e242d2ac8bc67019e7528539fe73 Mon Sep 17 00:00:00 2001 From: Brett Graham Date: Thu, 26 Jan 2023 18:29:08 -0500 Subject: [PATCH 043/213] use store dimension separtor in DirectoryStore.listdir (#1335) * use store dimension separtor in DirectoryStore.listdir NestedDirectoryStore which inherits from DirectoryStore only supports '/' as a dimension separator. However listdir uses the parent DirectoryStore.listdir which produces keys with an incorrect separator '.' Fixes #1334 * update release note --- docs/release.rst | 2 ++ zarr/storage.py | 4 +++- zarr/tests/test_storage.py | 7 +++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index f82c0730dd..5ebd77c94f 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -28,6 +28,8 @@ Bug fixes * Ensure contiguous data is give to ``FSStore``. Only copying if needed. By :user:`Mads R. B. Kristensen ` :issue:`1285`. +* NestedDirectoryStore.listdir now returns chunk keys with the correct '/' dimension_separator. + By :user:`Brett Graham ` :issue:`1334`. .. _release_2.13.6: diff --git a/zarr/storage.py b/zarr/storage.py index 5f7b991aef..fae9530716 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1204,7 +1204,9 @@ def _nested_listdir(self, path=None): for file_name in file_names: file_path = os.path.join(dir_path, file_name) rel_path = file_path.split(root_path + os.path.sep)[1] - new_children.append(rel_path.replace(os.path.sep, '.')) + new_children.append(rel_path.replace( + os.path.sep, + self._dimension_separator or '.')) else: new_children.append(entry) return sorted(new_children) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 7c23735f36..0b21dfbd88 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1442,6 +1442,13 @@ def test_chunk_nesting(self): store[self.root + '42'] = b'zzz' assert b'zzz' == store[self.root + '42'] + def test_listdir(self): + store = self.create_store() + z = zarr.zeros((10, 10), chunks=(5, 5), store=store) + z[:] = 1 # write to all chunks + for k in store.listdir(): + assert store.get(k) is not None + class TestNestedDirectoryStoreNone: From c714d2b9d3d27086572090690e5119d76ac5e7fc Mon Sep 17 00:00:00 2001 From: Jonathan Striebel Date: Thu, 2 Feb 2023 15:12:45 +0100 Subject: [PATCH 044/213] Sharding storage transformer for v3 (#1111) * add storage_transformers and get/set_partial_values * formatting * add docs and release notes * add test_core testcase * Update zarr/creation.py Co-authored-by: Gregory Lee * apply PR feedback * add comment that storage_transformers=None is the same as storage_transformers=[] * use empty tuple as default for storage_transformers * make mypy happy * better coverage, minor fix, adding rmdir * add missing rmdir to test * increase coverage * improve test coverage * fix TestArrayWithStorageTransformersV3 * Update zarr/creation.py Co-authored-by: Gregory Lee * add sharding storage transformer * add actual transformer * fixe, and allow partial reads for uncompressed v3 arrays * pick generic storage transformer changes from #1111 * increase coverage * make lgtm happy * add release note * better coverage * fix hexdigest * improve tests * fix order of storage transformers * fix order of storage transformers * retrigger CI * minor test improvement * minor test update * apply PR feedback * minor fixes * make flake8 happy * call ensure_bytes in sharding transformer * minor fixes * apply PR feedback * adapt to supports_efficient_get_partial_values property * add ZARR_V3_SHARDING flag for sharding usage * fix release notes * fix release notes --------- Co-authored-by: Gregory Lee Co-authored-by: Josh Moore --- .github/workflows/minimal.yml | 2 + .github/workflows/python-package.yml | 1 + .github/workflows/windows-testing.yml | 1 + docs/release.rst | 11 +- zarr/_storage/v3.py | 29 ++ zarr/_storage/v3_storage_transformers.py | 383 +++++++++++++++++++++++ zarr/core.py | 57 +++- zarr/meta.py | 3 +- zarr/tests/test_core.py | 113 ++++++- zarr/tests/test_storage_v3.py | 28 +- zarr/util.py | 19 ++ 11 files changed, 623 insertions(+), 24 deletions(-) create mode 100644 zarr/_storage/v3_storage_transformers.py diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml index 2cde38e081..4de5aca273 100644 --- a/.github/workflows/minimal.yml +++ b/.github/workflows/minimal.yml @@ -24,6 +24,7 @@ jobs: shell: "bash -l {0}" env: ZARR_V3_EXPERIMENTAL_API: 1 + ZARR_V3_SHARDING: 1 run: | conda activate minimal python -m pip install . @@ -32,6 +33,7 @@ jobs: shell: "bash -l {0}" env: ZARR_V3_EXPERIMENTAL_API: 1 + ZARR_V3_SHARDING: 1 run: | conda activate minimal rm -rf fixture/ diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 872ce52343..cee2ca7aef 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -70,6 +70,7 @@ jobs: ZARR_TEST_MONGO: 1 ZARR_TEST_REDIS: 1 ZARR_V3_EXPERIMENTAL_API: 1 + ZARR_V3_SHARDING: 1 run: | conda activate zarr-env mkdir ~/blob_emulator diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index ea1d0f64c9..2f8922b447 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -52,6 +52,7 @@ jobs: env: ZARR_TEST_ABS: 1 ZARR_V3_EXPERIMENTAL_API: 1 + ZARR_V3_SHARDING: 1 - name: Conda info shell: bash -l {0} run: conda info diff --git a/docs/release.rst b/docs/release.rst index 5ebd77c94f..dcec2872fb 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -14,13 +14,16 @@ Unreleased # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. - Major changes ~~~~~~~~~~~~~ -* Improve `Zarr V3 support `_ - adding partial store read/write and storage transformers. - By :user:`Jonathan Striebel `; :issue:`1096`. +* Improve Zarr V3 support, adding partial store read/write and storage transformers. + Add two features of the [v3 spec](https://zarr-specs.readthedocs.io/en/latest/core/v3.0.html): + * storage transformers + * `get_partial_values` and `set_partial_values` + * efficient `get_partial_values` implementation for `FSStoreV3` + * sharding storage transformer + By :user:`Jonathan Striebel `; :issue:`1096`, :issue:`1111`. Bug fixes diff --git a/zarr/_storage/v3.py b/zarr/_storage/v3.py index a0a1870ffc..5f8964fb5d 100644 --- a/zarr/_storage/v3.py +++ b/zarr/_storage/v3.py @@ -182,6 +182,35 @@ def rmdir(self, path=None): if self.fs.isdir(store_path): self.fs.rm(store_path, recursive=True) + @property + def supports_efficient_get_partial_values(self): + return True + + def get_partial_values(self, key_ranges): + """Get multiple partial values. + key_ranges can be an iterable of key, range pairs, + where a range specifies two integers range_start and range_length + as a tuple, (range_start, range_length). + range_length may be None to indicate to read until the end. + range_start may be negative to start reading range_start bytes + from the end of the file. + A key may occur multiple times with different ranges. + Inserts None for missing keys into the returned list.""" + results = [] + for key, (range_start, range_length) in key_ranges: + key = self._normalize_key(key) + path = self.dir_path(key) + try: + if range_start is None or range_length is None: + end = None + else: + end = range_start + range_length + result = self.fs.cat_file(path, start=range_start, end=end) + except self.map.missing_exceptions: + result = None + results.append(result) + return results + class MemoryStoreV3(MemoryStore, StoreV3): diff --git a/zarr/_storage/v3_storage_transformers.py b/zarr/_storage/v3_storage_transformers.py new file mode 100644 index 0000000000..3675d42c38 --- /dev/null +++ b/zarr/_storage/v3_storage_transformers.py @@ -0,0 +1,383 @@ +import functools +import itertools +import os +from typing import NamedTuple, Tuple, Optional, Union, Iterator + +from numcodecs.compat import ensure_bytes +import numpy as np + +from zarr._storage.store import StorageTransformer, StoreV3, _rmdir_from_keys_v3 +from zarr.util import normalize_storage_path + + +MAX_UINT_64 = 2 ** 64 - 1 + + +v3_sharding_available = os.environ.get('ZARR_V3_SHARDING', '0').lower() not in ['0', 'false'] + + +def assert_zarr_v3_sharding_available(): + if not v3_sharding_available: + raise NotImplementedError( + "Using V3 sharding is experimental and not yet finalized! To enable support, set:\n" + "ZARR_V3_SHARDING=1" + ) # pragma: no cover + + +class _ShardIndex(NamedTuple): + store: "ShardingStorageTransformer" + # dtype uint64, shape (chunks_per_shard_0, chunks_per_shard_1, ..., 2) + offsets_and_lengths: np.ndarray + + def __localize_chunk__(self, chunk: Tuple[int, ...]) -> Tuple[int, ...]: + return tuple( + chunk_i % shard_i + for chunk_i, shard_i in zip(chunk, self.store.chunks_per_shard) + ) + + def is_all_empty(self) -> bool: + return np.array_equiv(self.offsets_and_lengths, MAX_UINT_64) + + def get_chunk_slice(self, chunk: Tuple[int, ...]) -> Optional[slice]: + localized_chunk = self.__localize_chunk__(chunk) + chunk_start, chunk_len = self.offsets_and_lengths[localized_chunk] + if (chunk_start, chunk_len) == (MAX_UINT_64, MAX_UINT_64): + return None + else: + return slice(int(chunk_start), int(chunk_start + chunk_len)) + + def set_chunk_slice( + self, chunk: Tuple[int, ...], chunk_slice: Optional[slice] + ) -> None: + localized_chunk = self.__localize_chunk__(chunk) + if chunk_slice is None: + self.offsets_and_lengths[localized_chunk] = (MAX_UINT_64, MAX_UINT_64) + else: + self.offsets_and_lengths[localized_chunk] = ( + chunk_slice.start, + chunk_slice.stop - chunk_slice.start, + ) + + def to_bytes(self) -> bytes: + return self.offsets_and_lengths.tobytes(order="C") + + @classmethod + def from_bytes( + cls, buffer: Union[bytes, bytearray], store: "ShardingStorageTransformer" + ) -> "_ShardIndex": + try: + return cls( + store=store, + offsets_and_lengths=np.frombuffer(bytearray(buffer), dtype=" None: + assert_zarr_v3_sharding_available() + super().__init__(_type) + if isinstance(chunks_per_shard, int): + chunks_per_shard = (chunks_per_shard, ) + else: + chunks_per_shard = tuple(int(i) for i in chunks_per_shard) + if chunks_per_shard == (): + chunks_per_shard = (1, ) + self.chunks_per_shard = chunks_per_shard + self._num_chunks_per_shard = functools.reduce( + lambda x, y: x * y, chunks_per_shard, 1 + ) + self._dimension_separator = None + self._data_key_prefix = None + + def _copy_for_array(self, array, inner_store): + transformer_copy = super()._copy_for_array(array, inner_store) + transformer_copy._dimension_separator = array._dimension_separator + transformer_copy._data_key_prefix = array._data_key_prefix + if len(array._shape) > len(self.chunks_per_shard): + # The array shape might be longer when initialized with subdtypes. + # subdtypes dimensions come last, therefore padding chunks_per_shard + # with ones, effectively disabling sharding on the unlisted dimensions. + transformer_copy.chunks_per_shard += ( + (1, ) * (len(array._shape) - len(self.chunks_per_shard)) + ) + return transformer_copy + + @property + def dimension_separator(self) -> str: + assert self._dimension_separator is not None, ( + "dimension_separator is not initialized, first get a copy via _copy_for_array." + ) + return self._dimension_separator + + def _is_data_key(self, key: str) -> bool: + assert self._data_key_prefix is not None, ( + "data_key_prefix is not initialized, first get a copy via _copy_for_array." + ) + return key.startswith(self._data_key_prefix) + + def _key_to_shard(self, chunk_key: str) -> Tuple[str, Tuple[int, ...]]: + prefix, _, chunk_string = chunk_key.rpartition("c") + chunk_subkeys = tuple( + map(int, chunk_string.split(self.dimension_separator)) + ) if chunk_string else (0, ) + shard_key_tuple = ( + subkey // shard_i + for subkey, shard_i in zip(chunk_subkeys, self.chunks_per_shard) + ) + shard_key = ( + prefix + "c" + self.dimension_separator.join(map(str, shard_key_tuple)) + ) + return shard_key, chunk_subkeys + + def _get_index_from_store(self, shard_key: str) -> _ShardIndex: + # At the end of each shard 2*64bit per chunk for offset and length define the index: + index_bytes = self.inner_store.get_partial_values( + [(shard_key, (-16 * self._num_chunks_per_shard, None))] + )[0] + if index_bytes is None: + raise KeyError(shard_key) + return _ShardIndex.from_bytes( + index_bytes, + self, + ) + + def _get_index_from_buffer(self, buffer: Union[bytes, bytearray]) -> _ShardIndex: + # At the end of each shard 2*64bit per chunk for offset and length define the index: + return _ShardIndex.from_bytes(buffer[-16 * self._num_chunks_per_shard:], self) + + def _get_chunks_in_shard(self, shard_key: str) -> Iterator[Tuple[int, ...]]: + _, _, chunk_string = shard_key.rpartition("c") + shard_key_tuple = tuple( + map(int, chunk_string.split(self.dimension_separator)) + ) if chunk_string else (0, ) + for chunk_offset in itertools.product( + *(range(i) for i in self.chunks_per_shard) + ): + yield tuple( + shard_key_i * shards_i + offset_i + for shard_key_i, offset_i, shards_i in zip( + shard_key_tuple, chunk_offset, self.chunks_per_shard + ) + ) + + def __getitem__(self, key): + if self._is_data_key(key): + if self.supports_efficient_get_partial_values: + # Use the partial implementation, which fetches the index separately + value = self.get_partial_values([(key, (0, None))])[0] + if value is None: + raise KeyError(key) + else: + return value + shard_key, chunk_subkey = self._key_to_shard(key) + try: + full_shard_value = self.inner_store[shard_key] + except KeyError: + raise KeyError(key) + index = self._get_index_from_buffer(full_shard_value) + chunk_slice = index.get_chunk_slice(chunk_subkey) + if chunk_slice is not None: + return full_shard_value[chunk_slice] + else: + raise KeyError(key) + else: + return self.inner_store.__getitem__(key) + + def __setitem__(self, key, value): + value = ensure_bytes(value) + if self._is_data_key(key): + shard_key, chunk_subkey = self._key_to_shard(key) + chunks_to_read = set(self._get_chunks_in_shard(shard_key)) + chunks_to_read.remove(chunk_subkey) + new_content = {chunk_subkey: value} + try: + if self.supports_efficient_get_partial_values: + index = self._get_index_from_store(shard_key) + full_shard_value = None + else: + full_shard_value = self.inner_store[shard_key] + index = self._get_index_from_buffer(full_shard_value) + except KeyError: + index = _ShardIndex.create_empty(self) + else: + chunk_slices = [ + (chunk_to_read, index.get_chunk_slice(chunk_to_read)) + for chunk_to_read in chunks_to_read + ] + valid_chunk_slices = [ + (chunk_to_read, chunk_slice) + for chunk_to_read, chunk_slice in chunk_slices + if chunk_slice is not None + ] + # use get_partial_values if less than half of the available chunks must be read: + # (This can be changed when set_partial_values can be used efficiently.) + use_partial_get = ( + self.supports_efficient_get_partial_values + and len(valid_chunk_slices) < len(chunk_slices) / 2 + ) + + if use_partial_get: + chunk_values = self.inner_store.get_partial_values( + [ + ( + shard_key, + ( + chunk_slice.start, + chunk_slice.stop - chunk_slice.start, + ), + ) + for _, chunk_slice in valid_chunk_slices + ] + ) + for chunk_value, (chunk_to_read, _) in zip( + chunk_values, valid_chunk_slices + ): + new_content[chunk_to_read] = chunk_value + else: + if full_shard_value is None: + full_shard_value = self.inner_store[shard_key] + for chunk_to_read, chunk_slice in valid_chunk_slices: + if chunk_slice is not None: + new_content[chunk_to_read] = full_shard_value[chunk_slice] + + shard_content = b"" + for chunk_subkey, chunk_content in new_content.items(): + chunk_slice = slice( + len(shard_content), len(shard_content) + len(chunk_content) + ) + index.set_chunk_slice(chunk_subkey, chunk_slice) + shard_content += chunk_content + # Appending the index at the end of the shard: + shard_content += index.to_bytes() + self.inner_store[shard_key] = shard_content + else: # pragma: no cover + self.inner_store[key] = value + + def __delitem__(self, key): + if self._is_data_key(key): + shard_key, chunk_subkey = self._key_to_shard(key) + try: + index = self._get_index_from_store(shard_key) + except KeyError: + raise KeyError(key) + + index.set_chunk_slice(chunk_subkey, None) + + if index.is_all_empty(): + del self.inner_store[shard_key] + else: + index_bytes = index.to_bytes() + self.inner_store.set_partial_values([(shard_key, -len(index_bytes), index_bytes)]) + else: # pragma: no cover + del self.inner_store[key] + + def _shard_key_to_original_keys(self, key: str) -> Iterator[str]: + if self._is_data_key(key): + index = self._get_index_from_store(key) + prefix, _, _ = key.rpartition("c") + for chunk_tuple in self._get_chunks_in_shard(key): + if index.get_chunk_slice(chunk_tuple) is not None: + yield prefix + "c" + self.dimension_separator.join( + map(str, chunk_tuple) + ) + else: + yield key + + def __iter__(self) -> Iterator[str]: + for key in self.inner_store: + yield from self._shard_key_to_original_keys(key) + + def __len__(self): + return sum(1 for _ in self.keys()) + + def get_partial_values(self, key_ranges): + if self.supports_efficient_get_partial_values: + transformed_key_ranges = [] + cached_indices = {} + none_indices = [] + for i, (key, range_) in enumerate(key_ranges): + if self._is_data_key(key): + shard_key, chunk_subkey = self._key_to_shard(key) + try: + index = cached_indices[shard_key] + except KeyError: + try: + index = self._get_index_from_store(shard_key) + except KeyError: + none_indices.append(i) + continue + cached_indices[shard_key] = index + chunk_slice = index.get_chunk_slice(chunk_subkey) + if chunk_slice is None: + none_indices.append(i) + continue + range_start, range_length = range_ + if range_length is None: + range_length = chunk_slice.stop - chunk_slice.start + transformed_key_ranges.append( + (shard_key, (range_start + chunk_slice.start, range_length)) + ) + else: # pragma: no cover + transformed_key_ranges.append((key, range_)) + values = self.inner_store.get_partial_values(transformed_key_ranges) + for i in none_indices: + values.insert(i, None) + return values + else: + return StoreV3.get_partial_values(self, key_ranges) + + def supports_efficient_set_partial_values(self): + return False + + def set_partial_values(self, key_start_values): + # This does not yet implement efficient set_partial_values + StoreV3.set_partial_values(self, key_start_values) + + def rename(self, src_path: str, dst_path: str) -> None: + StoreV3.rename(self, src_path, dst_path) # type: ignore[arg-type] + + def list_prefix(self, prefix): + return StoreV3.list_prefix(self, prefix) + + def erase_prefix(self, prefix): + if self._is_data_key(prefix): + StoreV3.erase_prefix(self, prefix) + else: + self.inner_store.erase_prefix(prefix) + + def rmdir(self, path=None): + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) # type: ignore + + def __contains__(self, key): + if self._is_data_key(key): + shard_key, chunk_subkeys = self._key_to_shard(key) + try: + index = self._get_index_from_store(shard_key) + except KeyError: + return False + chunk_slice = index.get_chunk_slice(chunk_subkeys) + return chunk_slice is not None + else: + return self._inner_store.__contains__(key) diff --git a/zarr/core.py b/zarr/core.py index 5d37570831..b9db6cb2c8 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -51,7 +51,8 @@ normalize_shape, normalize_storage_path, PartialReadBuffer, - ensure_ndarray_like + UncompressedPartialReadBufferV3, + ensure_ndarray_like, ) @@ -1271,8 +1272,12 @@ def _get_selection(self, indexer, out=None, fields=None): check_array_shape('out', out, out_shape) # iterate over chunks - if not hasattr(self.chunk_store, "getitems") or \ - any(map(lambda x: x == 0, self.shape)): + if ( + not hasattr(self.chunk_store, "getitems") and not ( + hasattr(self.chunk_store, "get_partial_values") and + self.chunk_store.supports_efficient_get_partial_values + ) + ) or any(map(lambda x: x == 0, self.shape)): # sequentially get one key at a time from storage for chunk_coords, chunk_selection, out_selection in indexer: @@ -1898,6 +1903,8 @@ def _process_chunk( cdata = cdata.read_full() self._compressor.decode(cdata, dest) else: + if isinstance(cdata, UncompressedPartialReadBufferV3): + cdata = cdata.read_full() chunk = ensure_ndarray_like(cdata).view(self._dtype) chunk = chunk.reshape(self._chunks, order=self._order) np.copyto(dest, chunk) @@ -1919,13 +1926,21 @@ def _process_chunk( else dim for i, dim in enumerate(self.chunks) ] - cdata.read_part(start, nitems) - chunk_partial = self._decode_chunk( - cdata.buff, - start=start, - nitems=nitems, - expected_shape=expected_shape, - ) + if isinstance(cdata, UncompressedPartialReadBufferV3): + chunk_partial = self._decode_chunk( + cdata.read_part(start, nitems), + start=start, + nitems=nitems, + expected_shape=expected_shape, + ) + else: + cdata.read_part(start, nitems) + chunk_partial = self._decode_chunk( + cdata.buff, + start=start, + nitems=nitems, + expected_shape=expected_shape, + ) tmp[partial_out_selection] = chunk_partial out[out_selection] = tmp[chunk_selection] return @@ -2020,9 +2035,29 @@ def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, for ckey in ckeys if ckey in self.chunk_store } + elif ( + self._partial_decompress + and not self._compressor + and not fields + and self.dtype != object + and hasattr(self.chunk_store, "get_partial_values") + and self.chunk_store.supports_efficient_get_partial_values + ): + partial_read_decode = True + cdatas = { + ckey: UncompressedPartialReadBufferV3( + ckey, self.chunk_store, itemsize=self.itemsize + ) + for ckey in ckeys + if ckey in self.chunk_store + } else: partial_read_decode = False - cdatas = self.chunk_store.getitems(ckeys, on_error="omit") + if not hasattr(self.chunk_store, "getitems"): + values = self.chunk_store.get_partial_values([(ckey, (0, None)) for ckey in ckeys]) + cdatas = {key: value for key, value in zip(ckeys, values) if value is not None} + else: + cdatas = self.chunk_store.getitems(ckeys, on_error="omit") for ckey, chunk_select, out_select in zip(ckeys, lchunk_selection, lout_selection): if ckey in cdatas: self._process_chunk( diff --git a/zarr/meta.py b/zarr/meta.py index 41a90101b5..b493e833f0 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -477,9 +477,10 @@ def _encode_storage_transformer_metadata( @classmethod def _decode_storage_transformer_metadata(cls, meta: Mapping) -> "StorageTransformer": from zarr.tests.test_storage_v3 import DummyStorageTransfomer + from zarr._storage.v3_storage_transformers import ShardingStorageTransformer # This might be changed to a proper registry in the future - KNOWN_STORAGE_TRANSFORMERS = [DummyStorageTransfomer] + KNOWN_STORAGE_TRANSFORMERS = [DummyStorageTransfomer, ShardingStorageTransformer] conf = meta.get('configuration', {}) extension_uri = meta['extension'] diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index a9d674e2d9..24d6ebbc49 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -21,6 +21,7 @@ from zarr._storage.store import ( v3_api_available, ) +from .._storage.v3_storage_transformers import ShardingStorageTransformer, v3_sharding_available from zarr.core import Array from zarr.errors import ArrayNotFoundError, ContainsGroupError from zarr.meta import json_loads @@ -830,7 +831,6 @@ def test_pickle(self): attrs_cache = z.attrs.cache a = np.random.randint(0, 1000, 1000) z[:] = a - # round trip through pickle dump = pickle.dumps(z) # some stores cannot be opened twice at the same time, need to close @@ -3299,6 +3299,60 @@ def expected(self): ] +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") +@pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") +class TestArrayWithFSStoreV3PartialReadUncompressedSharded( + TestArrayWithPathV3, TestArrayWithFSStorePartialRead +): + + @staticmethod + def create_array(array_path='arr1', read_only=False, **kwargs): + path = mkdtemp() + atexit.register(shutil.rmtree, path) + store = FSStoreV3(path) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop('write_empty_chunks', True) + kwargs.setdefault('compressor', None) + num_dims = 1 if isinstance(kwargs["shape"], int) else len(kwargs["shape"]) + sharding_transformer = ShardingStorageTransformer( + "indexed", chunks_per_shard=(2, ) * num_dims + ) + init_array(store, path=array_path, storage_transformers=[sharding_transformer], **kwargs) + return Array( + store, + path=array_path, + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + partial_decompress=True, + write_empty_chunks=write_empty_chunks, + ) + + def test_nbytes_stored(self): + z = self.create_array(shape=1000, chunks=100) + expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != 'zarr.json') + assert expect_nbytes_stored == z.nbytes_stored + z[:] = 42 + expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != 'zarr.json') + assert expect_nbytes_stored == z.nbytes_stored + + def test_supports_efficient_get_set_partial_values(self): + z = self.create_array(shape=100, chunks=10) + assert z.chunk_store.supports_efficient_get_partial_values + assert not z.chunk_store.supports_efficient_set_partial_values() + + def expected(self): + return [ + "90109fc2a4e17efbcb447003ea1c08828b91f71e", + "2b73519f7260dba3ddce0d2b70041888856fec6b", + "bca5798be2ed71d444f3045b05432d937682b7dd", + "9ff1084501e28520e577662a6e3073f1116c76a2", + "882a97cad42417f90f111d0cb916a21579650467", + ] + + @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestArrayWithFSStoreV3Nested(TestArrayWithPathV3, TestArrayWithFSStoreNested): @@ -3392,6 +3446,63 @@ def expected(self): ] +@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") +@pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") +class TestArrayWithShardingStorageTransformerV3(TestArrayWithPathV3): + + @staticmethod + def create_array(array_path='arr1', read_only=False, **kwargs): + store = KVStoreV3(dict()) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + write_empty_chunks = kwargs.pop('write_empty_chunks', True) + kwargs.setdefault('compressor', None) + num_dims = 1 if isinstance(kwargs["shape"], int) else len(kwargs["shape"]) + sharding_transformer = ShardingStorageTransformer( + "indexed", chunks_per_shard=(2, ) * num_dims + ) + init_array(store, path=array_path, storage_transformers=[sharding_transformer], **kwargs) + return Array(store, path=array_path, read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + + def test_nbytes_stored(self): + z = self.create_array(shape=1000, chunks=100) + expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != 'zarr.json') + assert expect_nbytes_stored == z.nbytes_stored + z[:] = 42 + expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != 'zarr.json') + assert expect_nbytes_stored == z.nbytes_stored + + # mess with store + z.store[data_root + z._key_prefix + 'foo'] = list(range(10)) + assert -1 == z.nbytes_stored + + def test_keys_inner_store(self): + z = self.create_array(shape=1000, chunks=100) + assert z.chunk_store.keys() == z._store.keys() + meta_keys = set(z.store.keys()) + z[:] = 42 + assert len(z.chunk_store.keys() - meta_keys) == 10 + # inner store should have half the data keys, + # since chunks_per_shard is 2: + assert len(z._store.keys() - meta_keys) == 5 + + def test_supports_efficient_get_set_partial_values(self): + z = self.create_array(shape=100, chunks=10) + assert not z.chunk_store.supports_efficient_get_partial_values + assert not z.chunk_store.supports_efficient_set_partial_values() + + def expected(self): + return [ + '90109fc2a4e17efbcb447003ea1c08828b91f71e', + '2b73519f7260dba3ddce0d2b70041888856fec6b', + 'bca5798be2ed71d444f3045b05432d937682b7dd', + '9ff1084501e28520e577662a6e3073f1116c76a2', + '882a97cad42417f90f111d0cb916a21579650467', + ] + + @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") def test_array_mismatched_store_versions(): store_v3 = KVStoreV3(dict()) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 9f18c89361..cc031f0db4 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -10,6 +10,8 @@ import zarr from zarr._storage.store import _get_hierarchy_metadata, v3_api_available, StorageTransformer +from zarr._storage.v3_storage_transformers import ShardingStorageTransformer, v3_sharding_available +from zarr.core import Array from zarr.meta import _default_entry_point_metadata_v3 from zarr.storage import (atexit_rmglob, atexit_rmtree, data_root, default_compressor, getsize, init_array, meta_root, @@ -523,26 +525,38 @@ def create_store(self, **kwargs): return store +@pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") class TestStorageTransformerV3(TestMappingStoreV3): def create_store(self, **kwargs): inner_store = super().create_store(**kwargs) - storage_transformer = DummyStorageTransfomer( + dummy_transformer = DummyStorageTransfomer( "dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT ) - return storage_transformer._copy_for_array(None, inner_store) + sharding_transformer = ShardingStorageTransformer( + "indexed", chunks_per_shard=2, + ) + path = 'bla' + init_array(inner_store, path=path, shape=1000, chunks=100, + dimension_separator=".", + storage_transformers=[dummy_transformer, sharding_transformer]) + store = Array(store=inner_store, path=path).chunk_store + store.erase_prefix("data/root/bla/") + store.clear() + return store def test_method_forwarding(self): store = self.create_store() - assert store.list() == store.inner_store.list() - assert store.list_dir(data_root) == store.inner_store.list_dir(data_root) + inner_store = store.inner_store.inner_store + assert store.list() == inner_store.list() + assert store.list_dir(data_root) == inner_store.list_dir(data_root) assert store.is_readable() assert store.is_writeable() assert store.is_listable() - store.inner_store._readable = False - store.inner_store._writeable = False - store.inner_store._listable = False + inner_store._readable = False + inner_store._writeable = False + inner_store._listable = False assert not store.is_readable() assert not store.is_writeable() assert not store.is_listable() diff --git a/zarr/util.py b/zarr/util.py index dfbb551651..5b307b7c5c 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -644,6 +644,25 @@ def read_full(self): return self.chunk_store[self.store_key] +class UncompressedPartialReadBufferV3: + def __init__(self, store_key, chunk_store, itemsize): + assert chunk_store.supports_efficient_get_partial_values + self.chunk_store = chunk_store + self.store_key = store_key + self.itemsize = itemsize + + def prepare_chunk(self): + pass + + def read_part(self, start, nitems): + return self.chunk_store.get_partial_values( + [(self.store_key, (start * self.itemsize, nitems * self.itemsize))] + )[0] + + def read_full(self): + return self.chunk_store[self.store_key] + + def retry_call(callabl: Callable, args=None, kwargs=None, From e9fb1f33f339d046e41a76fee46be07d8e4f39a4 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 2 Feb 2023 09:56:33 -0500 Subject: [PATCH 045/213] remove blosc warnings from n5 compressor handling (#1331) * remove blosc warnings from n5 compressor handling * release notes * remove reference to n5 warnings in pytest ini options * remove blosc from list of warned compressors in tests, and restore lzma warning --- docs/release.rst | 4 +++- pyproject.toml | 1 - zarr/n5.py | 6 ------ zarr/tests/test_core.py | 4 +--- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index dcec2872fb..fdff400266 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -25,7 +25,9 @@ Major changes * sharding storage transformer By :user:`Jonathan Striebel `; :issue:`1096`, :issue:`1111`. - +* Remove warnings emitted when using N5Store or N5FSStore with a blosc-compressed array. + By :user:`Davis Bennett `; :issue:`1331`. + Bug fixes ~~~~~~~~~ diff --git a/pyproject.toml b/pyproject.toml index 1592b9887a..3277e9da7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,7 +87,6 @@ addopts = [ ] filterwarnings = [ "error:::zarr.*", - "ignore:Not all N5 implementations support blosc compression.*:RuntimeWarning", "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning", "ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning", ] diff --git a/zarr/n5.py b/zarr/n5.py index 978cade1b8..4c93ce4acb 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -735,12 +735,6 @@ def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict elif codec_id == 'blosc': - warnings.warn( - "Not all N5 implementations support blosc compression (yet). You " - "might not be able to open the dataset with another N5 library.", - RuntimeWarning - ) - n5_config['cname'] = _compressor_config['cname'] n5_config['clevel'] = _compressor_config['clevel'] n5_config['shuffle'] = _compressor_config['shuffle'] diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 24d6ebbc49..b54fe3ddf0 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -2022,9 +2022,7 @@ def test_compressors(self): a1[:] = 1 assert np.all(a1[:] == 1) - compressors_warn = [ - Blosc() - ] + compressors_warn = [] if LZMA: compressors_warn.append(LZMA(2)) # Try lzma.FORMAT_ALONE, which N5 doesn't support. for compressor in compressors_warn: From 4dc6f1f5046708648b5e6b82d0a64a24d1a40566 Mon Sep 17 00:00:00 2001 From: AWA BRANDON AWA Date: Thu, 2 Feb 2023 16:01:58 +0100 Subject: [PATCH 046/213] changed documentation theme to pydata_sphinx_theme (#1242) * changed documentation theme to pydata_sphinx_theme * updated documentation layout * Update docs/index.rst Co-authored-by: Sanket Verma * Updated Acknowledgements section and added twitter icon * Added client-side javascript redirects * Add more redirects * Minor tweaks and added acknowledgments.rst * Added acknowledgments.html to redirect * Add indices from the old main page --------- Co-authored-by: Sanket Verma Co-authored-by: Josh Moore --- docs/_static/custom.css | 133 ++++++++++++--- docs/_static/custom.js | 18 ++ docs/_static/index_api.svg | 97 +++++++++++ docs/_static/index_contribute.svg | 76 +++++++++ docs/_static/index_getting_started.svg | 66 ++++++++ docs/_static/index_user_guide.svg | 67 ++++++++ docs/acknowledgments.rst | 76 +++++++++ docs/api.rst | 7 + docs/conf.py | 28 +++- docs/getting_started.rst | 46 +++++ docs/index.rst | 224 ++++++++++--------------- requirements_rtfd.txt | 2 + 12 files changed, 681 insertions(+), 159 deletions(-) create mode 100644 docs/_static/custom.js create mode 100644 docs/_static/index_api.svg create mode 100644 docs/_static/index_contribute.svg create mode 100644 docs/_static/index_getting_started.svg create mode 100644 docs/_static/index_user_guide.svg create mode 100644 docs/acknowledgments.rst create mode 100644 docs/getting_started.rst diff --git a/docs/_static/custom.css b/docs/_static/custom.css index a0e3929e87..487addfbbd 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,36 +1,123 @@ -/* override text color */ -.wy-menu-vertical a { - color: #000000; +@import url('https://fonts.googleapis.com/css2?family=Lato:ital,wght@0,400;0,700;0,900;1,400;1,700;1,900&family=Open+Sans:ital,wght@0,400;0,600;1,400;1,600&display=swap'); + +.navbar-brand img { + height: 75px; +} +.navbar-brand { + height: 75px; +} + +body { + font-family: 'Open Sans', sans-serif; +} + +pre, code { + font-size: 100%; + line-height: 155%; +} + +/* Style the active version button. + +- dev: orange +- stable: green +- old, PR: red + +Colors from: + +Wong, B. Points of view: Color blindness. +Nat Methods 8, 441 (2011). https://doi.org/10.1038/nmeth.1618 +*/ + +/* If the active version has the name "dev", style it orange */ +#version_switcher_button[data-active-version-name*="dev"] { + background-color: #E69F00; + border-color: #E69F00; + color:#000000; +} + +/* green for `stable` */ +#version_switcher_button[data-active-version-name*="stable"] { + background-color: #009E73; + border-color: #009E73; +} + +/* red for `old` */ +#version_switcher_button:not([data-active-version-name*="stable"], [data-active-version-name*="dev"], [data-active-version-name=""]) { + background-color: #980F0F; + border-color: #980F0F; } -/* Sidebar background color */ -.wy-nav-side, div.wy-side-nav-search { - background-color: rgb(198, 197, 213, 0); /* full alpha */ +/* Main page overview cards */ + +.sd-card { + background: #fff; + border-radius: 0; + padding: 30px 10px 20px 10px; + margin: 10px 0px; +} + +.sd-card .sd-card-header { + text-align: center; +} + +.sd-card .sd-card-header .sd-card-text { + margin: 0px; +} + +.sd-card .sd-card-img-top { + height: 52px; + width: 52px; + margin-left: auto; + margin-right: auto; +} + +.sd-card .sd-card-header { + border: none; + background-color: white; + color: #150458 !important; + font-size: var(--pst-font-size-h5); + font-weight: bold; + padding: 2.5rem 0rem 0.5rem 0rem; +} + +.sd-card .sd-card-footer { + border: none; + background-color: white; +} + +.sd-card .sd-card-footer .sd-card-text { + max-width: 220px; + margin-left: auto; + margin-right: auto; +} + +/* Dark theme tweaking */ +html[data-theme=dark] .sd-card img[src*='.svg'] { + filter: invert(0.82) brightness(0.8) contrast(1.2); } -/* Sidebar link click color */ -.wy-menu-vertical .toctree-l1 > a:active { - background-color: rgb(198, 197, 213); - color: rgb(0, 0, 0); +/* Main index page overview cards */ +html[data-theme=dark] .sd-card { + background-color:var(--pst-color-background); } -/* Link color is darker to make hovering more clear */ -.wy-menu-vertical .toctree-l1 > a:hover { - background-color: rgb(198, 197, 213); - color: rgb(0, 0, 0); +html[data-theme=dark] .sd-shadow-sm { + box-shadow: 0 .1rem 1rem rgba(250, 250, 250, .6) !important } -.wy-menu-vertical li.current > a:hover, .wy-menu-vertical li.current > a:active { - color: #404040; - background-color: #F5F5F5; +html[data-theme=dark] .sd-card .sd-card-header { + background-color:var(--pst-color-background); + color: #150458 !important; } -/* On hover over logo */ -.wy-side-nav-search > a:hover, .wy-side-nav-search .wy-dropdown > a:hover { - background: inherit; +html[data-theme=dark] .sd-card .sd-card-footer { + background-color:var(--pst-color-background); } -/* Border around search box */ -.wy-side-nav-search input[type="text"] { - border: 0px; +html[data-theme=dark] h1 { + color: var(--pst-color-primary); } + +html[data-theme=dark] h3 { + color: #0a6774; +} \ No newline at end of file diff --git a/docs/_static/custom.js b/docs/_static/custom.js new file mode 100644 index 0000000000..06b2d019b1 --- /dev/null +++ b/docs/_static/custom.js @@ -0,0 +1,18 @@ +// handle redirects +(() => { + let anchorMap = { + "installation": "installation.html", + "getting-started": "getting_started.html#getting-started", + "highlights": "getting_started.html#highlights", + "contributing": "contributing.html", + "projects-using-zarr": "getting_started.html#projects-using-zarr", + "acknowledgments": "acknowledgments.html", + "contents": "getting_started.html#contents", + "indices-and-tables": "api.html#indices-and-tables" + } + + let hash = window.location.hash.substring(1); + if (hash) { + window.location.replace(anchorMap[hash]); + } +})(); diff --git a/docs/_static/index_api.svg b/docs/_static/index_api.svg new file mode 100644 index 0000000000..69f7ba1d2d --- /dev/null +++ b/docs/_static/index_api.svg @@ -0,0 +1,97 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + diff --git a/docs/_static/index_contribute.svg b/docs/_static/index_contribute.svg new file mode 100644 index 0000000000..de3d902379 --- /dev/null +++ b/docs/_static/index_contribute.svg @@ -0,0 +1,76 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + diff --git a/docs/_static/index_getting_started.svg b/docs/_static/index_getting_started.svg new file mode 100644 index 0000000000..2d36622cb7 --- /dev/null +++ b/docs/_static/index_getting_started.svg @@ -0,0 +1,66 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/docs/_static/index_user_guide.svg b/docs/_static/index_user_guide.svg new file mode 100644 index 0000000000..bd17053517 --- /dev/null +++ b/docs/_static/index_user_guide.svg @@ -0,0 +1,67 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/docs/acknowledgments.rst b/docs/acknowledgments.rst new file mode 100644 index 0000000000..36cd1f5646 --- /dev/null +++ b/docs/acknowledgments.rst @@ -0,0 +1,76 @@ +Acknowledgments +=============== + +The following people have contributed to the development of Zarr by contributing code, +documentation, code reviews, comments and/or ideas: + +* :user:`Alistair Miles ` +* :user:`Altay Sansal ` +* :user:`Anderson Banihirwe ` +* :user:`Andrew Fulton ` +* :user:`Andrew Thomas ` +* :user:`Anthony Scopatz ` +* :user:`Attila Bergou ` +* :user:`BGCMHou ` +* :user:`Ben Jeffery ` +* :user:`Ben Williams ` +* :user:`Boaz Mohar ` +* :user:`Charles Noyes ` +* :user:`Chris Barnes ` +* :user:`David Baddeley ` +* :user:`Davis Bennett ` +* :user:`Dimitri Papadopoulos Orfanos ` +* :user:`Eduardo Gonzalez ` +* :user:`Elliott Sales de Andrade ` +* :user:`Eric Prestat ` +* :user:`Eric Younkin ` +* :user:`Francesc Alted ` +* :user:`Greggory Lee ` +* :user:`Gregory R. Lee ` +* :user:`Ian Hunt-Isaak ` +* :user:`James Bourbeau ` +* :user:`Jan Funke ` +* :user:`Jerome Kelleher ` +* :user:`Joe Hamman ` +* :user:`Joe Jevnik ` +* :user:`John Kirkham ` +* :user:`Josh Moore ` +* :user:`Juan Nunez-Iglesias ` +* :user:`Justin Swaney ` +* :user:`Mads R. B. Kristensen ` +* :user:`Mamy Ratsimbazafy ` +* :user:`Martin Durant ` +* :user:`Matthew Rocklin ` +* :user:`Matthias Bussonnier ` +* :user:`Mattia Almansi ` +* :user:`Noah D Brenowitz ` +* :user:`Oren Watson ` +* :user:`Pavithra Eswaramoorthy ` +* :user:`Poruri Sai Rahul ` +* :user:`Prakhar Goel ` +* :user:`Raphael Dussin ` +* :user:`Ray Bell ` +* :user:`Richard Scott ` +* :user:`Richard Shaw ` +* :user:`Ryan Abernathey ` +* :user:`Ryan Williams ` +* :user:`Saransh Chopra ` +* :user:`Sebastian Grill ` +* :user:`Shikhar Goenka ` +* :user:`Shivank Chaudhary ` +* :user:`Stephan Hoyer ` +* :user:`Stephan Saalfeld ` +* :user:`Tarik Onalan ` +* :user:`Tim Crone ` +* :user:`Tobias Kölling ` +* :user:`Tom Augspurger ` +* :user:`Tom White ` +* :user:`Tommy Tran ` +* :user:`Trevor Manz ` +* :user:`Vincent Schut ` +* :user:`Vyas Ramasubramani ` +* :user:`Zain Patel ` +* :user:`gsakkis` +* :user:`hailiangzhang ` +* :user:`pmav99 ` +* :user:`sbalmer ` \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst index 8162ada965..2b6e7ea516 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -13,3 +13,10 @@ API reference api/codecs api/attrs api/sync + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/conf.py b/docs/conf.py index 2639f765ee..413d648732 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -45,6 +45,7 @@ 'numpydoc', 'sphinx_issues', "sphinx_copybutton", + "sphinx_design" ] numpydoc_show_class_members = False @@ -124,12 +125,26 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'sphinx_rtd_theme' +html_theme = 'pydata_sphinx_theme' + +html_favicon = '_static/logo1.png' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -html_theme_options = {'logo_only': True} +html_theme_options = { + "github_url": "https://github.com/zarr-developers/zarr-python", + "twitter_url": "https://twitter.com/zarr_dev", + "icon_links": [ + { + "name": "Zarr Dev", + "url": "https://zarr.dev/", + "icon": "_static/logo1.png", + "type": "local" + }, + ], + "collapse_navigation": True +} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] @@ -160,6 +175,9 @@ def setup(app): # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] +html_js_files = [ + 'custom.js', +] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied @@ -246,7 +264,7 @@ def setup(app): # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (main_doc, 'zarr.tex', 'zarr Documentation', + (main_doc, 'zarr.tex', 'Zarr-Python', author, 'manual'), ] @@ -276,7 +294,7 @@ def setup(app): # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (main_doc, 'zarr', 'zarr Documentation', + (main_doc, 'zarr', 'Zarr-Python', [author], 1) ] @@ -290,7 +308,7 @@ def setup(app): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (main_doc, 'zarr', 'zarr Documentation', + (main_doc, 'zarr', 'Zarr-Python', author, 'zarr', 'One line description of project.', 'Miscellaneous'), ] diff --git a/docs/getting_started.rst b/docs/getting_started.rst new file mode 100644 index 0000000000..77d45325e4 --- /dev/null +++ b/docs/getting_started.rst @@ -0,0 +1,46 @@ +Getting Started +=============== + +Zarr is a format for the storage of chunked, compressed, N-dimensional arrays +inspired by `HDF5 `_, `h5py +`_ and `bcolz `_. + +The project is fiscally sponsored by `NumFOCUS `_, a US +501(c)(3) public charity, and development is supported by the +`MRC Centre for Genomics and Global Health `_ +and the `Chan Zuckerberg Initiative `_. + +These documents describe the Zarr Python implementation. More information +about the Zarr format can be found on the `main website `_. + +Highlights +---------- + +* Create N-dimensional arrays with any NumPy dtype. +* Chunk arrays along any dimension. +* Compress and/or filter chunks using any NumCodecs_ codec. +* Store arrays in memory, on disk, inside a Zip file, on S3, ... +* Read an array concurrently from multiple threads or processes. +* Write to an array concurrently from multiple threads or processes. +* Organize arrays into hierarchies via groups. + +Contributing +------------ + +Feedback and bug reports are very welcome, please get in touch via +the `GitHub issue tracker `_. See +:doc:`contributing` for further information about contributing to Zarr. + +Projects using Zarr +------------------- + +If you are using Zarr, we would `love to hear about it +`_. + +.. toctree:: + :caption: Getting Started + :hidden: + + installation + +.. _NumCodecs: https://numcodecs.readthedocs.io/ diff --git a/docs/index.rst b/docs/index.rst index dd6abc1862..50060d10cc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,142 +1,104 @@ -.. zarr documentation main file, created by - sphinx-quickstart on Mon May 2 21:40:09 2016. - -Zarr -==== - -Zarr is a format for the storage of chunked, compressed, N-dimensional arrays -inspired by `HDF5 `_, `h5py -`_ and `bcolz `_. - -The project is fiscally sponsored by `NumFOCUS `_, a US -501(c)(3) public charity, and development is supported by the -`MRC Centre for Genomics and Global Health `_ -and the `Chan Zuckerberg Initiative `_. - -These documents describe the Zarr Python implementation. More information -about the Zarr format can be found on the `main website `_. - -Highlights ----------- - - * Create N-dimensional arrays with any NumPy dtype. - * Chunk arrays along any dimension. - * Compress and/or filter chunks using any NumCodecs_ codec. - * Store arrays in memory, on disk, inside a Zip file, on S3, ... - * Read an array concurrently from multiple threads or processes. - * Write to an array concurrently from multiple threads or processes. - * Organize arrays into hierarchies via groups. - -Contributing ------------- - -Feedback and bug reports are very welcome, please get in touch via -the `GitHub issue tracker `_. See -:doc:`contributing` for further information about contributing to Zarr. - -Projects using Zarr -------------------- - -If you are using Zarr, we would `love to hear about it -`_. - -Acknowledgments ---------------- - -The following people have contributed to the development of Zarr by contributing code, -documentation, code reviews, comments and/or ideas: - -:user:`Alistair Miles ` -:user:`Altay Sansal ` -:user:`Anderson Banihirwe ` -:user:`Andrew Fulton ` -:user:`Andrew Thomas ` -:user:`Anthony Scopatz ` -:user:`Attila Bergou ` -:user:`BGCMHou ` -:user:`Ben Jeffery ` -:user:`Ben Williams ` -:user:`Boaz Mohar ` -:user:`Charles Noyes ` -:user:`Chris Barnes ` -:user:`David Baddeley ` -:user:`Davis Bennett ` -:user:`Dimitri Papadopoulos Orfanos ` -:user:`Eduardo Gonzalez ` -:user:`Elliott Sales de Andrade ` -:user:`Eric Prestat ` -:user:`Eric Younkin ` -:user:`Francesc Alted ` -:user:`Greggory Lee ` -:user:`Gregory R. Lee ` -:user:`Ian Hunt-Isaak ` -:user:`James Bourbeau ` -:user:`Jan Funke ` -:user:`Jerome Kelleher ` -:user:`Joe Hamman ` -:user:`Joe Jevnik ` -:user:`John Kirkham ` -:user:`Josh Moore ` -:user:`Juan Nunez-Iglesias ` -:user:`Justin Swaney ` -:user:`Mads R. B. Kristensen ` -:user:`Mamy Ratsimbazafy ` -:user:`Martin Durant ` -:user:`Matthew Rocklin ` -:user:`Matthias Bussonnier ` -:user:`Mattia Almansi ` -:user:`Noah D Brenowitz ` -:user:`Oren Watson ` -:user:`Pavithra Eswaramoorthy ` -:user:`Poruri Sai Rahul ` -:user:`Prakhar Goel ` -:user:`Raphael Dussin ` -:user:`Ray Bell ` -:user:`Richard Scott ` -:user:`Richard Shaw ` -:user:`Ryan Abernathey ` -:user:`Ryan Williams ` -:user:`Saransh Chopra ` -:user:`Sebastian Grill ` -:user:`Shikhar Goenka ` -:user:`Shivank Chaudhary ` -:user:`Stephan Hoyer ` -:user:`Stephan Saalfeld ` -:user:`Tarik Onalan ` -:user:`Tim Crone ` -:user:`Tobias Kölling ` -:user:`Tom Augspurger ` -:user:`Tom White ` -:user:`Tommy Tran ` -:user:`Trevor Manz ` -:user:`Vincent Schut ` -:user:`Vyas Ramasubramani ` -:user:`Zain Patel ` -:user:`gsakkis` -:user:`hailiangzhang ` -:user:`pmav99 ` -:user:`sbalmer ` - -Contents --------- +.. _zarr_docs_mainpage: + +*********** +Zarr-Python +*********** .. toctree:: - :maxdepth: 2 + :maxdepth: 1 + :hidden: - installation + getting_started tutorial api spec - contributing release license - View homepage + acknowledgments + contributing + +**Version**: |version| + +**Download documentation**: `Zipped HTML `_ + +**Useful links**: +`Installation `_ | +`Source Repository `_ | +`Issue Tracker `_ | +`Gitter `_ + +Zarr is a file storage format for chunked, compressed, N-dimensional arrays based on an open-source specification. + +.. grid:: 2 + + .. grid-item-card:: + :img-top: _static/index_getting_started.svg + + Getting Started + ^^^^^^^^^^^^^^^ + + New to Zarr? Check out the getting started guide. It contains an + introduction to Zarr's main concepts and links to additional tutorials. + + +++ + + .. button-ref:: getting_started + :expand: + :color: dark + :click-parent: + + To the getting started guide + + .. grid-item-card:: + :img-top: _static/index_user_guide.svg + + Tutorial + ^^^^^^^^ + + The tutorial provides working examples of Zarr classes and functions. + + +++ + + .. button-ref:: tutorial + :expand: + :color: dark + :click-parent: + + To the Tutorial + + .. grid-item-card:: + :img-top: _static/index_api.svg + + API Reference + ^^^^^^^^^^^^^ + + The reference guide contains a detailed description of the functions, + modules, and objects included in Zarr. The reference describes how the + methods work and which parameters can be used. It assumes that you have an + understanding of the key concepts. + + +++ + + .. button-ref:: api + :expand: + :color: dark + :click-parent: + + To the api reference guide + + .. grid-item-card:: + :img-top: _static/index_contribute.svg + + Contributor's Guide + ^^^^^^^^^^^^^^^^^^^ + + Want to contribute to Zarr? We welcome contributions in the form of bug reports, bug fixes, documentation, enhancement proposals and more. The contributing guidelines will guide you through the process of improving Zarr. -Indices and tables ------------------- + +++ -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` + .. button-ref:: contributing + :expand: + :color: dark + :click-parent: -.. _NumCodecs: https://numcodecs.readthedocs.io/ + To the contributor's guide \ No newline at end of file diff --git a/requirements_rtfd.txt b/requirements_rtfd.txt index 553384e0bd..5d7fec369a 100644 --- a/requirements_rtfd.txt +++ b/requirements_rtfd.txt @@ -2,9 +2,11 @@ asciitree setuptools setuptools_scm sphinx +sphinx_design sphinx-issues sphinx-copybutton sphinx-rtd-theme +pydata-sphinx-theme numpydoc numpy!=1.21.0 msgpack-python==0.5.6 From 280d9695990b73153127083dd640bcb5a69e8f8f Mon Sep 17 00:00:00 2001 From: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> Date: Thu, 9 Feb 2023 05:43:03 -0500 Subject: [PATCH 047/213] Allow reading utf-8 encoded json files (#1312) * read utf-8 in json * update release * Update zarr/util.py Co-authored-by: jakirkham * allow str --------- Co-authored-by: jakirkham --- docs/release.rst | 3 +++ fixture/utf8attrs/.zattrs | 1 + fixture/utf8attrs/.zgroup | 3 +++ zarr/meta.py | 12 ++++++------ zarr/tests/test_attrs.py | 11 +++++++++-- zarr/util.py | 4 ++-- 6 files changed, 24 insertions(+), 10 deletions(-) create mode 100644 fixture/utf8attrs/.zattrs create mode 100644 fixture/utf8attrs/.zgroup diff --git a/docs/release.rst b/docs/release.rst index fdff400266..905ccd2ebb 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -31,6 +31,9 @@ Major changes Bug fixes ~~~~~~~~~ +* Allow reading utf-8 encoded json files + By :user:`Nathan Zimmerberg ` :issue:`1308`. + * Ensure contiguous data is give to ``FSStore``. Only copying if needed. By :user:`Mads R. B. Kristensen ` :issue:`1285`. * NestedDirectoryStore.listdir now returns chunk keys with the correct '/' dimension_separator. diff --git a/fixture/utf8attrs/.zattrs b/fixture/utf8attrs/.zattrs new file mode 100644 index 0000000000..7f85af5d3a --- /dev/null +++ b/fixture/utf8attrs/.zattrs @@ -0,0 +1 @@ +{"foo": "た"} \ No newline at end of file diff --git a/fixture/utf8attrs/.zgroup b/fixture/utf8attrs/.zgroup new file mode 100644 index 0000000000..3b7daf227c --- /dev/null +++ b/fixture/utf8attrs/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/zarr/meta.py b/zarr/meta.py index b493e833f0..59c56abf3d 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -92,7 +92,7 @@ class Metadata2: ZARR_FORMAT = ZARR_FORMAT @classmethod - def parse_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + def parse_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: # Here we allow that a store may return an already-parsed metadata object, # or a string of JSON that we will parse here. We allow for an already-parsed @@ -110,7 +110,7 @@ def parse_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: return meta @classmethod - def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: meta = cls.parse_metadata(s) # check metadata format @@ -198,7 +198,7 @@ def decode_dtype(cls, d) -> np.dtype: return np.dtype(d) @classmethod - def decode_group_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + def decode_group_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: meta = cls.parse_metadata(s) # check metadata format version @@ -351,7 +351,7 @@ def encode_dtype(cls, d): return get_extended_dtype_info(np.dtype(d)) @classmethod - def decode_group_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + def decode_group_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: meta = cls.parse_metadata(s) # 1 / 0 # # check metadata format version @@ -390,7 +390,7 @@ def encode_hierarchy_metadata(cls, meta=None) -> bytes: @classmethod def decode_hierarchy_metadata( - cls, s: Union[MappingType, str] + cls, s: Union[MappingType, bytes, str] ) -> MappingType[str, Any]: meta = cls.parse_metadata(s) # check metadata format @@ -495,7 +495,7 @@ def _decode_storage_transformer_metadata(cls, meta: Mapping) -> "StorageTransfor return StorageTransformerCls.from_config(transformer_type, conf) @classmethod - def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: meta = cls.parse_metadata(s) # extract array metadata fields diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index e4baf182b2..27ec8fea8d 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -4,9 +4,10 @@ from zarr._storage.store import meta_root from zarr.attrs import Attributes -from zarr.storage import KVStore +from zarr.storage import KVStore, DirectoryStore from zarr._storage.v3 import KVStoreV3 from zarr.tests.util import CountingDict, CountingDictV3 +from zarr.hierarchy import group @pytest.fixture(params=[2, 3]) @@ -42,11 +43,17 @@ def test_storage(self, zarr_version): a['baz'] = 42 assert attrs_key in store assert isinstance(store[attrs_key], bytes) - d = json.loads(str(store[attrs_key], 'ascii')) + d = json.loads(str(store[attrs_key], 'utf-8')) if zarr_version == 3: d = d['attributes'] assert dict(foo='bar', baz=42) == d + def test_utf8_encoding(self, zarr_version): + + # fixture data + fixture = group(store=DirectoryStore('fixture')) + assert fixture['utf8attrs'].attrs.asdict() == dict(foo='た') + def test_get_set_del_contains(self, zarr_version): store = _init_store(zarr_version) diff --git a/zarr/util.py b/zarr/util.py index 5b307b7c5c..be5f174aab 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -56,9 +56,9 @@ def json_dumps(o: Any) -> bytes: separators=(',', ': '), cls=NumberEncoder).encode('ascii') -def json_loads(s: str) -> Dict[str, Any]: +def json_loads(s: Union[bytes, str]) -> Dict[str, Any]: """Read JSON in a consistent way.""" - return json.loads(ensure_text(s, 'ascii')) + return json.loads(ensure_text(s, 'utf-8')) def normalize_shape(shape) -> Tuple[int]: From 277e4b200edd275e991b321a4d735859ffd555a9 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Fri, 10 Feb 2023 09:28:01 +0100 Subject: [PATCH 048/213] Prepare 2.14 changelog (#1337) * Prepare 2.14 changelog * .rst fixes --- docs/release.rst | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 905ccd2ebb..0965109935 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,28 +6,35 @@ Release notes # to document your changes. On releases it will be # re-indented so that it does not show up in the notes. -.. _unreleased: + .. _unreleased: + + Unreleased + ---------- -Unreleased ----------- .. # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. +.. _release_2.14.0: + +2.14.0 +------ + Major changes ~~~~~~~~~~~~~ * Improve Zarr V3 support, adding partial store read/write and storage transformers. - Add two features of the [v3 spec](https://zarr-specs.readthedocs.io/en/latest/core/v3.0.html): - * storage transformers - * `get_partial_values` and `set_partial_values` - * efficient `get_partial_values` implementation for `FSStoreV3` - * sharding storage transformer + Add new features from the `v3 spec `_: + * storage transformers + * `get_partial_values` and `set_partial_values` + * efficient `get_partial_values` implementation for `FSStoreV3` + * sharding storage transformer By :user:`Jonathan Striebel `; :issue:`1096`, :issue:`1111`. -* Remove warnings emitted when using N5Store or N5FSStore with a blosc-compressed array. +* N5 nows supports Blosc. + Remove warnings emitted when using N5Store or N5FSStore with a blosc-compressed array. By :user:`Davis Bennett `; :issue:`1331`. - + Bug fixes ~~~~~~~~~ @@ -36,6 +43,7 @@ Bug fixes * Ensure contiguous data is give to ``FSStore``. Only copying if needed. By :user:`Mads R. B. Kristensen ` :issue:`1285`. + * NestedDirectoryStore.listdir now returns chunk keys with the correct '/' dimension_separator. By :user:`Brett Graham ` :issue:`1334`. From 4e8b84b46937ba14f9cf818065740e95a2a2b554 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 10 Feb 2023 09:31:02 +0100 Subject: [PATCH 049/213] Bump redis from 4.4.2 to 4.5.1 (#1344) Bumps [redis](https://github.com/redis/redis-py) from 4.4.2 to 4.5.1. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.4.2...v4.5.1) --- updated-dependencies: - dependency-name: redis dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 0cf6661d1e..07ca6d743d 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.0.4 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.14.1 # pyup: ignore -redis==4.4.2 +redis==4.5.1 types-redis types-setuptools pymongo==4.3.3 From 76fce142174b4b57e4fb0fd8141d7515aae81dcf Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Sun, 12 Feb 2023 09:26:05 +0100 Subject: [PATCH 050/213] Generate fixture for #1312 if it is missing (#1348) This is a temporary fix for the larger issue of out-of-tree testing described in #1347, but this should allow a release of 2.14.1 which passes on conda. --- zarr/tests/test_attrs.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index 27ec8fea8d..a329f463f0 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -1,7 +1,9 @@ import json +import pathlib import pytest +import zarr from zarr._storage.store import meta_root from zarr.attrs import Attributes from zarr.storage import KVStore, DirectoryStore @@ -50,6 +52,16 @@ def test_storage(self, zarr_version): def test_utf8_encoding(self, zarr_version): + project_root = pathlib.Path(zarr.__file__).resolve().parent.parent + fixdir = project_root / "fixture" / "utf8attrs" + if not fixdir.exists(): # pragma: no cover + # store the data - should be one-time operation + fixdir.mkdir() + with (fixdir / ".zattrs").open("w", encoding="utf-8") as f: + f.write('{"foo": "た"}') + with (fixdir / ".zgroup").open("w", encoding="utf-8") as f: + f.write("""{\n "zarr_format": 2\n}""") + # fixture data fixture = group(store=DirectoryStore('fixture')) assert fixture['utf8attrs'].attrs.asdict() == dict(foo='た') From 87c48d80c33ebe92661a05e981cce48b11b5c66f Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Sun, 12 Feb 2023 09:43:35 +0100 Subject: [PATCH 051/213] Fix 2.14.0 redirects (#1346) * Fix 2.14.0 redirects Check for hash in anchormap * Add release notes * Add note about conda-forge release --- docs/_static/custom.js | 4 ++-- docs/release.rst | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/_static/custom.js b/docs/_static/custom.js index 06b2d019b1..dcb584ecd5 100644 --- a/docs/_static/custom.js +++ b/docs/_static/custom.js @@ -12,7 +12,7 @@ } let hash = window.location.hash.substring(1); - if (hash) { - window.location.replace(anchorMap[hash]); + if (hash && hash in anchorMap) { + window.location.replace(anchorMap[hash]); } })(); diff --git a/docs/release.rst b/docs/release.rst index 0965109935..e7802fecbb 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -15,6 +15,20 @@ Release notes # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. +.. _release_2.14.1: + +2.14.1 +------ + +Documentation +~~~~~~~~~~~~~ + +* Fix API links. + By :user:`Josh Moore ` :issue:`1346`. + +* Fix unit tests which prevented the conda-forge release. + By :user:`Josh Moore ` :issue:`1348`. + .. _release_2.14.0: 2.14.0 From c3750302f71ebb0b1506db05815b38c2c097c3de Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Feb 2023 08:40:10 +0100 Subject: [PATCH 052/213] Bump numpy from 1.24.1 to 1.24.2 (#1350) Bumps [numpy](https://github.com/numpy/numpy) from 1.24.1 to 1.24.2. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst) - [Commits](https://github.com/numpy/numpy/compare/v1.24.1...v1.24.2) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_numpy.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_numpy.txt b/requirements_dev_numpy.txt index 7d373a254d..e094d4fcd4 100644 --- a/requirements_dev_numpy.txt +++ b/requirements_dev_numpy.txt @@ -1,4 +1,4 @@ # Break this out into a separate file to allow testing against # different versions of numpy. This file should pin to the latest # numpy version. -numpy==1.24.1 +numpy==1.24.2 From d7d88158c5f4e61d926675f2af9df66e7848bb68 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Mon, 20 Feb 2023 03:00:12 -0500 Subject: [PATCH 053/213] Fix N5Store dtype wrong behavior (#1340) * create a test case for the bug * fix dtype bug * fix formatting * optimize code by using create function * use at exit delete * optimize import * update n5 test hexdigest * skip dtype decode if no fsspec * add contribution --- docs/release.rst | 3 +++ zarr/n5.py | 1 + zarr/tests/test_core.py | 13 ++++++------- zarr/tests/test_n5.py | 21 +++++++++++++++++++-- 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index e7802fecbb..0098d2e50b 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -61,6 +61,9 @@ Bug fixes * NestedDirectoryStore.listdir now returns chunk keys with the correct '/' dimension_separator. By :user:`Brett Graham ` :issue:`1334`. +* N5Store/N5FSStore dtype returns zarr Stores readable dtype. + By :user:`Marwan Zouinkhi ` :issue:`1339`. + .. _release_2.13.6: 2.13.6 diff --git a/zarr/n5.py b/zarr/n5.py index 4c93ce4acb..1eb6ef2b33 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -689,6 +689,7 @@ def array_metadata_to_zarr(array_metadata: Dict[str, Any], array_metadata['order'] = 'C' array_metadata['filters'] = [] array_metadata['dimension_separator'] = '.' + array_metadata['dtype'] = np.dtype(array_metadata['dtype']).str compressor_config = array_metadata['compressor'] compressor_config = compressor_config_to_zarr(compressor_config) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index b54fe3ddf0..ba89db3b06 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -2034,13 +2034,12 @@ def test_compressors(self): assert np.all(a2[:] == 1) def expected(self): - return [ - '4e9cf910000506455f82a70938a272a3fce932e5', - 'f9d4cbf1402901f63dea7acf764d2546e4b6aa38', - '1d8199f5f7b70d61aa0d29cc375212c3df07d50a', - '874880f91aa6736825584509144afe6b06b0c05c', - 'e2258fedc74752196a8c8383db49e27193c995e2', - ] + return ['8811a77d54caaa1901d5cc4452d946ae433c8d90', + 'd880b007d9779db5f2cdbe13274eb1cbac4a425a', + 'd80eb66d5521744f051e816ab368d8ccfc2e3edf', + '568f9f837e4b682a3819cb122988e2eebeb6572b', + '4fdf4475d786d6694110db5619acd30c80dfc372' + ] @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") diff --git a/zarr/tests/test_n5.py b/zarr/tests/test_n5.py index a1a0a83e36..8f6d97dd51 100644 --- a/zarr/tests/test_n5.py +++ b/zarr/tests/test_n5.py @@ -1,10 +1,15 @@ - import pytest -from zarr.n5 import N5ChunkWrapper +from zarr.n5 import N5ChunkWrapper, N5FSStore +from zarr.creation import create +from zarr.storage import atexit_rmtree from numcodecs import GZip import numpy as np from typing import Tuple +import json +import atexit + +from zarr.tests.util import have_fsspec def test_make_n5_chunk_wrapper(): @@ -35,3 +40,15 @@ def test_partial_chunk_decode(chunk_shape: Tuple[int, ...]): chunk[subslices] = 1 subchunk = np.ascontiguousarray(chunk[subslices]) assert np.array_equal(codec_wrapped.decode(codec_wrapped.encode(subchunk)), chunk) + + +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +def test_dtype_decode(): + path = 'data/array.n5' + atexit_rmtree(path) + atexit.register(atexit_rmtree, path) + n5_store = N5FSStore(path) + create(100, store=n5_store) + dtype_n5 = json.loads(n5_store[".zarray"])["dtype"] + dtype_zarr = json.loads(create(100).store[".zarray"])["dtype"] + assert dtype_n5 == dtype_zarr From 5ece3e6971595feec5fce37ce801dd54e961c728 Mon Sep 17 00:00:00 2001 From: Brandur Thorgrimsson <11929039+Swordcat@users.noreply.github.com> Date: Thu, 23 Feb 2023 15:23:23 +0000 Subject: [PATCH 054/213] Ensure `zarr.group` uses writable mode (#1354) * Fix creating a group with fsmap per issue #1353, regression test added * Update release notes --- docs/release.rst | 11 +++++++++++ zarr/hierarchy.py | 2 +- zarr/tests/test_hierarchy.py | 11 +++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index 0098d2e50b..a6c32100ba 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -15,6 +15,17 @@ Release notes # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. +.. _release_2.14.2: + +2.14.2 +------ + +Bug fixes +~~~~~~~~~ + +* Ensure ``zarr.group`` uses writeable mode to fix issue with :issue:`1304`. + By :user:`Brandur Thorgrimsson ` :issue:`1354`. + .. _release_2.14.1: 2.14.1 diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 0dae921500..18e7ac7863 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1336,7 +1336,7 @@ def group(store=None, overwrite=False, chunk_store=None, """ # handle polymorphic store arg - store = _normalize_store_arg(store, zarr_version=zarr_version) + store = _normalize_store_arg(store, zarr_version=zarr_version, mode='w') if zarr_version is None: zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 7d87b6d404..d0833457fb 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -1591,6 +1591,17 @@ def test_group(zarr_version): assert store is g.store +@pytest.mark.skipif(have_fsspec is False, reason='needs fsspec') +@pytest.mark.parametrize('zarr_version', _VERSIONS) +def test_group_writeable_mode(zarr_version, tmp_path): + # Regression test for https://github.com/zarr-developers/zarr-python/issues/1353 + import fsspec + + store = fsspec.get_mapper(str(tmp_path)) + zg = group(store=store) + assert zg.store.map == store + + @pytest.mark.parametrize('zarr_version', _VERSIONS) def test_open_group(zarr_version): # test the open_group() convenience function From 7018cf1127b14e9b21a8a0169bd7da95405c4577 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 7 Mar 2023 14:24:00 +0100 Subject: [PATCH 055/213] Bump pytest from 7.2.1 to 7.2.2 (#1361) Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.2.1 to 7.2.2. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.2.1...7.2.2) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index 34d7d98e7e..1217ee620e 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -5,4 +5,4 @@ numcodecs==0.11.0 msgpack-python==0.5.6 setuptools-scm==7.1.0 # test requirements -pytest==7.2.1 +pytest==7.2.2 From 55a875d16a7cb9f67dd652a5e921d5fe4bc37ac0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 7 Mar 2023 15:46:49 +0100 Subject: [PATCH 056/213] Bump fsspec from 2023.1.0 to 2023.3.0 (#1360) * Bump fsspec from 2023.1.0 to 2023.3.0 Bumps [fsspec](https://github.com/fsspec/filesystem_spec) from 2023.1.0 to 2023.3.0. - [Release notes](https://github.com/fsspec/filesystem_spec/releases) - [Commits](https://github.com/fsspec/filesystem_spec/compare/2023.1.0...2023.3.0) --- updated-dependencies: - dependency-name: fsspec dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update s3fs as well --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 07ca6d743d..0599ef05ff 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,6 +18,6 @@ pytest-cov==4.0.0 pytest-doctestplus==0.12.1 pytest-timeout==2.1.0 h5py==3.8.0 -fsspec==2023.1.0 -s3fs==2023.1.0 +fsspec==2023.3.0 +s3fs==2023.3.0 moto[server]>=4.0.8 From c66b35b8c1f6839ec596f4b7c87bf7b76b0c0818 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Mar 2023 08:52:46 +0100 Subject: [PATCH 057/213] chore: update pre-commit hooks (#1351) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-mypy: v0.991 → v1.0.1](https://github.com/pre-commit/mirrors-mypy/compare/v0.991...v1.0.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1f629ccf76..cd1bc44361 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.991 + rev: v1.0.1 hooks: - id: mypy files: zarr From 0195567038308a3674ae94ae1dceadf136ab34c6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 9 Mar 2023 08:53:00 +0100 Subject: [PATCH 058/213] Bump azure-storage-blob from 12.14.1 to 12.15.0 (#1357) Bumps [azure-storage-blob](https://github.com/Azure/azure-sdk-for-python) from 12.14.1 to 12.15.0. - [Release notes](https://github.com/Azure/azure-sdk-for-python/releases) - [Commits](https://github.com/Azure/azure-sdk-for-python/compare/azure-storage-blob_12.14.1...azure-storage-blob_12.15.0) --- updated-dependencies: - dependency-name: azure-storage-blob dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 0599ef05ff..8a59af3e17 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -7,7 +7,7 @@ ipywidgets==8.0.4 # optional library requirements for services # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) -azure-storage-blob==12.14.1 # pyup: ignore +azure-storage-blob==12.15.0 # pyup: ignore redis==4.5.1 types-redis types-setuptools From c77f9cd6fb29bdbc1c03b7b645dac946eed6b577 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 9 Mar 2023 10:10:39 +0100 Subject: [PATCH 059/213] Bump actions/setup-python from 4.3.0 to 4.5.0 (#1318) * Bump actions/setup-python from 4.3.0 to 4.5.0 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4.3.0 to 4.5.0. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4.3.0...v4.5.0) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Get the username * Test `push` too * Revert changes to GHA --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: jakirkham Co-authored-by: Josh Moore --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index d1479d43e1..ea388b24b6 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -16,7 +16,7 @@ jobs: submodules: true fetch-depth: 0 - - uses: actions/setup-python@v4.3.0 + - uses: actions/setup-python@v4.5.0 name: Install Python with: python-version: '3.8' From eacda8dc205c4b1e1785f5cebef0862ef46e69f7 Mon Sep 17 00:00:00 2001 From: Andreas Albert <103571926+AndreasAlbertQC@users.noreply.github.com> Date: Fri, 10 Mar 2023 14:10:44 +0100 Subject: [PATCH 060/213] More extensive orthogonal indexing in get/setitem (#1333) * More extensive orthogonal indexing in get/setitem Added pass-through to orthogonal indexing for the following cases: * index is iterable of integers * index is iterable of length ndim, with each element being a slice, integer, or list. Maximum one list. * Add test cases for indexing with single integer iterable --------- Co-authored-by: Josh Moore --- docs/release.rst | 9 +- docs/tutorial.rst | 7 ++ zarr/core.py | 5 + zarr/indexing.py | 20 ++++ zarr/tests/test_indexing.py | 214 +++++++++++++++++++++++++++++++++--- 5 files changed, 237 insertions(+), 18 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index a6c32100ba..f056f621bf 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,15 +6,18 @@ Release notes # to document your changes. On releases it will be # re-indented so that it does not show up in the notes. - .. _unreleased: +.. _unreleased: - Unreleased - ---------- +Unreleased +---------- .. # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. +* Implement more extensive fallback of getitem/setitem for orthogonal indexing. + By :user:`Andreas Albert ` :issue:`1029`. + .. _release_2.14.2: 2.14.2 diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 43e42faf6b..0f2e1c7345 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -634,6 +634,13 @@ For convenience, the orthogonal indexing functionality is also available via the Any combination of integer, slice, 1D integer array and/or 1D Boolean array can be used for orthogonal indexing. +If the index contains at most one iterable, and otherwise contains only slices and integers, +orthogonal indexing is also available directly on the array: + + >>> z = zarr.array(np.arange(15).reshape(3, 5)) + >>> all(z.oindex[[0, 2], :] == z[[0, 2], :]) + True + Indexing fields in structured arrays ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/zarr/core.py b/zarr/core.py index b9db6cb2c8..521de80e17 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -28,6 +28,7 @@ err_too_many_indices, is_contiguous_selection, is_pure_fancy_indexing, + is_pure_orthogonal_indexing, is_scalar, pop_fields, ) @@ -817,6 +818,8 @@ def __getitem__(self, selection): fields, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, self.ndim): result = self.vindex[selection] + elif is_pure_orthogonal_indexing(pure_selection, self.ndim): + result = self.get_orthogonal_selection(pure_selection, fields=fields) else: result = self.get_basic_selection(pure_selection, fields=fields) return result @@ -1387,6 +1390,8 @@ def __setitem__(self, selection, value): fields, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, self.ndim): self.vindex[selection] = value + elif is_pure_orthogonal_indexing(pure_selection, self.ndim): + self.set_orthogonal_selection(pure_selection, value, fields=fields) else: self.set_basic_selection(pure_selection, value, fields=fields) diff --git a/zarr/indexing.py b/zarr/indexing.py index 268b487105..2f8144fd08 100644 --- a/zarr/indexing.py +++ b/zarr/indexing.py @@ -101,6 +101,26 @@ def is_pure_fancy_indexing(selection, ndim): ) +def is_pure_orthogonal_indexing(selection, ndim): + if not ndim: + return False + + # Case 1: Selection is a single iterable of integers + if is_integer_list(selection) or is_integer_array(selection, ndim=1): + return True + + # Case two: selection contains either zero or one integer iterables. + # All other selection elements are slices or integers + return ( + isinstance(selection, tuple) and len(selection) == ndim and + sum(is_integer_list(elem) or is_integer_array(elem) for elem in selection) <= 1 and + all( + is_integer_list(elem) or is_integer_array(elem) + or isinstance(elem, slice) or isinstance(elem, int) for + elem in selection) + ) + + def normalize_integer_selection(dim_sel, dim_len): # normalize type to int diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py index 5c4c580636..f5f57be010 100644 --- a/zarr/tests/test_indexing.py +++ b/zarr/tests/test_indexing.py @@ -283,8 +283,6 @@ def test_get_basic_selection_2d(): for selection in bad_selections: with pytest.raises(IndexError): z.get_basic_selection(selection) - with pytest.raises(IndexError): - z[selection] # check fallback on fancy indexing fancy_selection = ([0, 1], [0, 1]) np.testing.assert_array_equal(z[fancy_selection], [0, 11]) @@ -317,14 +315,179 @@ def test_fancy_indexing_fallback_on_get_setitem(): ) -def test_fancy_indexing_doesnt_mix_with_slicing(): - z = zarr.zeros((20, 20)) - with pytest.raises(IndexError): - z[[1, 2, 3], :] = 2 - with pytest.raises(IndexError): - np.testing.assert_array_equal( - z[[1, 2, 3], :], 0 +@pytest.mark.parametrize("index,expected_result", + [ + # Single iterable of integers + ( + [0, 1], + [[0, 1, 2], + [3, 4, 5]] + ), + # List first, then slice + ( + ([0, 1], slice(None)), + [[0, 1, 2], + [3, 4, 5]] + ), + # List first, then slice + ( + ([0, 1], slice(1, None)), + [[1, 2], + [4, 5]] + ), + # Slice first, then list + ( + (slice(0, 2), [0, 2]), + [[0, 2], + [3, 5]] + ), + # Slices only + ( + (slice(0, 2), slice(0, 2)), + [[0, 1], + [3, 4]] + ), + # List with repeated index + ( + ([1, 0, 1], slice(1, None)), + [[4, 5], + [1, 2], + [4, 5]] + ), + # 1D indexing + ( + ([1, 0, 1]), + [ + [3, 4, 5], + [0, 1, 2], + [3, 4, 5] + ] + ) + + ]) +def test_orthogonal_indexing_fallback_on_getitem_2d(index, expected_result): + """ + Tests the orthogonal indexing fallback on __getitem__ for a 2D matrix. + + In addition to checking expected behavior, all indexing + is also checked against numpy. + """ + # [0, 1, 2], + # [3, 4, 5], + # [6, 7, 8] + a = np.arange(9).reshape(3, 3) + z = zarr.array(a) + + np.testing.assert_array_equal(z[index], a[index], err_msg="Indexing disagrees with numpy") + np.testing.assert_array_equal(z[index], expected_result) + + +@pytest.mark.parametrize("index,expected_result", + [ + # Single iterable of integers + ( + [0, 1], + [[[0, 1, 2], + [3, 4, 5], + [6, 7, 8]], + [[9, 10, 11], + [12, 13, 14], + [15, 16, 17]]] + ), + # One slice, two integers + ( + (slice(0, 2), 1, 1), + [4, 13] + ), + # One integer, two slices + ( + (slice(0, 2), 1, slice(0, 2)), + [[3, 4], [12, 13]] + ), + # Two slices and a list + ( + (slice(0, 2), [1, 2], slice(0, 2)), + [[[3, 4], [6, 7]], [[12, 13], [15, 16]]] + ), + ]) +def test_orthogonal_indexing_fallback_on_getitem_3d(index, expected_result): + """ + Tests the orthogonal indexing fallback on __getitem__ for a 3D matrix. + + In addition to checking expected behavior, all indexing + is also checked against numpy. + """ + # [[[ 0, 1, 2], + # [ 3, 4, 5], + # [ 6, 7, 8]], + + # [[ 9, 10, 11], + # [12, 13, 14], + # [15, 16, 17]], + + # [[18, 19, 20], + # [21, 22, 23], + # [24, 25, 26]]] + a = np.arange(27).reshape(3, 3, 3) + z = zarr.array(a) + + np.testing.assert_array_equal(z[index], a[index], err_msg="Indexing disagrees with numpy") + np.testing.assert_array_equal(z[index], expected_result) + + +@pytest.mark.parametrize( + "index,expected_result", + [ + # Single iterable of integers + ( + [0, 1], + [ + [1, 1, 1], + [1, 1, 1], + [0, 0, 0] + ] + ), + # List and slice combined + ( + ([0, 1], slice(1, 3)), + [[0, 1, 1], + [0, 1, 1], + [0, 0, 0]] + ), + # Index repetition is ignored on setitem + ( + ([0, 1, 1, 1, 1, 1, 1], slice(1, 3)), + [[0, 1, 1], + [0, 1, 1], + [0, 0, 0]] + ), + # Slice with step + ( + ([0, 2], slice(None, None, 2)), + [[1, 0, 1], + [0, 0, 0], + [1, 0, 1]] ) + ] +) +def test_orthogonal_indexing_fallback_on_setitem_2d(index, expected_result): + """ + Tests the orthogonal indexing fallback on __setitem__ for a 3D matrix. + + In addition to checking expected behavior, all indexing + is also checked against numpy. + """ + # Slice + fancy index + a = np.zeros((3, 3)) + z = zarr.array(a) + z[index] = 1 + a[index] = 1 + np.testing.assert_array_equal( + z, expected_result + ) + np.testing.assert_array_equal( + z, a, err_msg="Indexing disagrees with numpy" + ) def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): @@ -335,12 +498,6 @@ def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): np.testing.assert_array_equal( z2[[1, 2, 3], [1, 2, 3]], 0 ) - with pytest.raises(IndexError): - z2[[1, 2, 3]] = 2 - with pytest.raises(IndexError): - np.testing.assert_array_equal( - z2[[1, 2, 3]], 0 - ) with pytest.raises(IndexError): z2[..., [1, 2, 3]] = 2 with pytest.raises(IndexError): @@ -770,6 +927,33 @@ def test_set_orthogonal_selection_3d(): _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) +def test_orthogonal_indexing_fallback_on_get_setitem(): + z = zarr.zeros((20, 20)) + z[[1, 2, 3], [1, 2, 3]] = 1 + np.testing.assert_array_equal( + z[:4, :4], + [ + [0, 0, 0, 0], + [0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1], + ], + ) + np.testing.assert_array_equal( + z[[1, 2, 3], [1, 2, 3]], 1 + ) + # test broadcasting + np.testing.assert_array_equal( + z[1, [1, 2, 3]], [1, 0, 0] + ) + # test 1D fancy indexing + z2 = zarr.zeros(5) + z2[[1, 2, 3]] = 1 + np.testing.assert_array_equal( + z2, [0, 1, 1, 1, 0] + ) + + def _test_get_coordinate_selection(a, z, selection): expect = a[selection] actual = z.get_coordinate_selection(selection) From 6e63fe93abdbf984093a83a00a673405e259e023 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Fri, 10 Mar 2023 16:46:10 +0100 Subject: [PATCH 061/213] Don't use relative fixture path (#1364) see: #1312 see: #1347 see: #1348 --- zarr/tests/test_attrs.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index a329f463f0..d741c17837 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -53,17 +53,18 @@ def test_storage(self, zarr_version): def test_utf8_encoding(self, zarr_version): project_root = pathlib.Path(zarr.__file__).resolve().parent.parent - fixdir = project_root / "fixture" / "utf8attrs" - if not fixdir.exists(): # pragma: no cover + fixdir = project_root / "fixture" + testdir = fixdir / "utf8attrs" + if not testdir.exists(): # pragma: no cover # store the data - should be one-time operation - fixdir.mkdir() - with (fixdir / ".zattrs").open("w", encoding="utf-8") as f: + testdir.mkdir() + with (testdir / ".zattrs").open("w", encoding="utf-8") as f: f.write('{"foo": "た"}') - with (fixdir / ".zgroup").open("w", encoding="utf-8") as f: + with (testdir / ".zgroup").open("w", encoding="utf-8") as f: f.write("""{\n "zarr_format": 2\n}""") # fixture data - fixture = group(store=DirectoryStore('fixture')) + fixture = group(store=DirectoryStore(str(fixdir))) assert fixture['utf8attrs'].attrs.asdict() == dict(foo='た') def test_get_set_del_contains(self, zarr_version): From d17d8d9b03924d9ec0baa296b6d9f8aa5f935341 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Mar 2023 12:06:47 +0100 Subject: [PATCH 062/213] Bump pypa/gh-action-pypi-publish from 1.6.4 to 1.7.1 (#1365) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.6.4 to 1.7.1. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.6.4...v1.7.1) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index ea388b24b6..4d3b03ed25 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -64,7 +64,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.6.4 + - uses: pypa/gh-action-pypi-publish@v1.7.1 with: user: __token__ password: ${{ secrets.pypi_password }} From f7ef424a65a7274e4aa0dfbb75795143db609857 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Thu, 16 Mar 2023 17:18:58 +0100 Subject: [PATCH 063/213] One more fix for missing directories (#1367) --- zarr/tests/test_attrs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index d741c17837..d6151b4f29 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -57,7 +57,7 @@ def test_utf8_encoding(self, zarr_version): testdir = fixdir / "utf8attrs" if not testdir.exists(): # pragma: no cover # store the data - should be one-time operation - testdir.mkdir() + testdir.mkdir(parents=True, exist_ok=True) with (testdir / ".zattrs").open("w", encoding="utf-8") as f: f.write('{"foo": "た"}') with (testdir / ".zgroup").open("w", encoding="utf-8") as f: From 2ff887548496855706c69a3c5983b00f17025af6 Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Fri, 17 Mar 2023 17:25:06 +0530 Subject: [PATCH 064/213] Add API reference for V3 Implementation in the docs (#1345) * Add API reference for V3 Implementation in the docs * Minor fix * Minor fix * Minor indentation fix * Update docs/api/v3.rst Co-authored-by: Jonathan Striebel * Update docs/api/v3.rst Co-authored-by: Jonathan Striebel * Update docs/api/v3.rst Co-authored-by: Jonathan Striebel * Fix broken links --------- Co-authored-by: Jonathan Striebel --- docs/api.rst | 1 + docs/api/v3.rst | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 docs/api/v3.rst diff --git a/docs/api.rst b/docs/api.rst index 2b6e7ea516..e200dd908d 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -13,6 +13,7 @@ API reference api/codecs api/attrs api/sync + api/v3 Indices and tables ------------------ diff --git a/docs/api/v3.rst b/docs/api/v3.rst new file mode 100644 index 0000000000..7665b2ddd1 --- /dev/null +++ b/docs/api/v3.rst @@ -0,0 +1,77 @@ +V3 Specification Implementation(``zarr._storage.v3``) +===================================================== + +This module contains the implementation of the `Zarr V3 Specification `_. + +.. warning:: + Since Zarr Python 2.12 release, this module provides experimental infrastructure for reading and + writing the upcoming V3 spec of the Zarr format. Users wishing to prepare for the migration can set + the environment variable ``ZARR_V3_EXPERIMENTAL_API=1`` to begin experimenting, however data + written with this API should be expected to become stale, as the implementation will still change. + +The new ``zarr._store.v3`` package has the necessary classes and functions for evaluating Zarr V3. +Since the design is not finalised, the classes and functions are not automatically imported into +the regular Zarr namespace. + +Code snippet for creating Zarr V3 arrays:: + + >>> import zarr + >>> z = zarr.create((10000, 10000), + >>> chunks=(100, 100), + >>> dtype='f8', + >>> compressor='default', + >>> path='path-where-you-want-zarr-v3-array', + >>> zarr_version=3) + +Further, you can use `z.info` to see details about the array you just created:: + + >>> z.info + Name : path-where-you-want-zarr-v3-array + Type : zarr.core.Array + Data type : float64 + Shape : (10000, 10000) + Chunk shape : (100, 100) + Order : C + Read-only : False + Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) + Store type : zarr._storage.v3.KVStoreV3 + No. bytes : 800000000 (762.9M) + No. bytes stored : 557 + Storage ratio : 1436265.7 + Chunks initialized : 0/10000 + +You can also check ``Store type`` here (which indicates Zarr V3). + +.. module:: zarr._storage.v3 + +.. autoclass:: RmdirV3 +.. autoclass:: KVStoreV3 +.. autoclass:: FSStoreV3 +.. autoclass:: MemoryStoreV3 +.. autoclass:: DirectoryStoreV3 +.. autoclass:: ZipStoreV3 +.. autoclass:: RedisStoreV3 +.. autoclass:: MongoDBStoreV3 +.. autoclass:: DBMStoreV3 +.. autoclass:: LMDBStoreV3 +.. autoclass:: SQLiteStoreV3 +.. autoclass:: LRUStoreCacheV3 +.. autoclass:: ConsolidatedMetadataStoreV3 + +In v3 `storage transformers `_ +can be set via ``zarr.create(…, storage_transformers=[…])``. +The experimental sharding storage transformer can be tested by setting +the environment variable ``ZARR_V3_SHARDING=1``. Data written with this flag +enabled should be expected to become stale until +`ZEP 2 `_ is approved +and fully implemented. + +.. module:: zarr._storage.v3_storage_transformers + +.. autoclass:: ShardingStorageTransformer + +The abstract base class for storage transformers is + +.. module:: zarr._storage.store + +.. autoclass:: StorageTransformer From fe8ef26ab0bf644c52c116403916bc14aa0c17fc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Mar 2023 15:05:49 +0100 Subject: [PATCH 065/213] Bump pypa/gh-action-pypi-publish from 1.7.1 to 1.8.1 (#1369) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.7.1 to 1.8.1. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.7.1...v1.8.1) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 4d3b03ed25..2f561dc512 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -64,7 +64,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.7.1 + - uses: pypa/gh-action-pypi-publish@v1.8.1 with: user: __token__ password: ${{ secrets.pypi_password }} From 7c0113a489a75278724a7581fa91136d1b1f0251 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 22 Mar 2023 22:55:52 -0700 Subject: [PATCH 066/213] Bump redis from 4.5.1 to 4.5.3 (#1373) --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 8a59af3e17..a5081b3c57 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.0.4 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.15.0 # pyup: ignore -redis==4.5.1 +redis==4.5.3 types-redis types-setuptools pymongo==4.3.3 From 0a6dcee998106f9468846480d8560a3bbf31a210 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 29 Mar 2023 02:05:08 -0700 Subject: [PATCH 067/213] Bump ipywidgets from 8.0.4 to 8.0.6 (#1379) Bumps [ipywidgets](https://github.com/jupyter-widgets/ipywidgets) from 8.0.4 to 8.0.6. - [Release notes](https://github.com/jupyter-widgets/ipywidgets/releases) - [Commits](https://github.com/jupyter-widgets/ipywidgets/compare/8.0.4...8.0.6) --- updated-dependencies: - dependency-name: ipywidgets dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index a5081b3c57..3ee7cbe5d3 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -3,7 +3,7 @@ lmdb==1.4.0; sys_platform != 'win32' # optional library requirements for Jupyter ipytree==0.2.2 -ipywidgets==8.0.4 +ipywidgets==8.0.6 # optional library requirements for services # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) From 4b0705c6d85cdfc20a33cbb39a90c8c9d11006c1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 29 Mar 2023 02:05:37 -0700 Subject: [PATCH 068/213] Bump pypa/gh-action-pypi-publish from 1.8.1 to 1.8.3 (#1376) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.1 to 1.8.3. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.1...v1.8.3) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 2f561dc512..be7a3b19cc 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -64,7 +64,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.8.1 + - uses: pypa/gh-action-pypi-publish@v1.8.3 with: user: __token__ password: ${{ secrets.pypi_password }} From b14f15ff300aeb78973dd9468558527c14d8db69 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 13 Apr 2023 18:26:45 +0200 Subject: [PATCH 069/213] Getitems: support `meta_array` (#1131) * Use _chunk_getitems() always * Implement getitems() always * FSStore.getitems(): accept meta_array and on_error * getitems(): handle on_error="omit" * Removed the `on_error argument` * remove redundant check * getitems(): use Sequence instead of Iterable * Typo Co-authored-by: Josh Moore * Introduce a contexts argument * CountingDict: impl. getitems() * added test_getitems() * Introduce Context * doc * support the new get_partial_values() method * Resolve conflict with get_partial_values() * make contexts keyword-only * Introduce ConstantMap * use typing.Mapping * test_constant_map --------- Co-authored-by: jakirkham Co-authored-by: Josh Moore --- zarr/_storage/store.py | 28 +++++++++++ zarr/context.py | 19 ++++++++ zarr/core.py | 92 +++++++++++------------------------ zarr/storage.py | 8 ++- zarr/tests/test_storage.py | 35 ++++++++++++- zarr/tests/test_storage_v3.py | 2 + zarr/tests/test_util.py | 15 +++++- zarr/tests/util.py | 9 ++++ zarr/util.py | 52 +++++++++++++++++++- 9 files changed, 190 insertions(+), 70 deletions(-) create mode 100644 zarr/context.py diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 4d813b8e05..0594dc22de 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -8,6 +8,7 @@ from zarr.meta import Metadata2, Metadata3 from zarr.util import normalize_storage_path +from zarr.context import Context # v2 store keys array_meta_key = '.zarray' @@ -131,6 +132,33 @@ def _ensure_store(store: Any): f"wrap it in Zarr.storage.KVStore. Got {store}" ) + def getitems( + self, keys: Sequence[str], *, contexts: Mapping[str, Context] + ) -> Mapping[str, Any]: + """Retrieve data from multiple keys. + + Parameters + ---------- + keys : Iterable[str] + The keys to retrieve + contexts: Mapping[str, Context] + A mapping of keys to their context. Each context is a mapping of store + specific information. E.g. a context could be a dict telling the store + the preferred output array type: `{"meta_array": cupy.empty(())}` + + Returns + ------- + Mapping + A collection mapping the input keys to their results. + + Notes + ----- + This default implementation uses __getitem__() to read each key sequentially and + ignores contexts. Overwrite this method to implement concurrent reads of multiple + keys and/or to utilize the contexts. + """ + return {k: self[k] for k in keys if k in self} + class Store(BaseStore): """Abstract store class used by implementations following the Zarr v2 spec. diff --git a/zarr/context.py b/zarr/context.py new file mode 100644 index 0000000000..83fbaafa9b --- /dev/null +++ b/zarr/context.py @@ -0,0 +1,19 @@ + +from typing import TypedDict + +from numcodecs.compat import NDArrayLike + + +class Context(TypedDict, total=False): + """ A context for component specific information + + All keys are optional. Any component reading the context must provide + a default implementation in the case a key cannot be found. + + Items + ----- + meta_array : array-like, optional + An array-like instance to use for determining the preferred output + array type. + """ + meta_array: NDArrayLike diff --git a/zarr/core.py b/zarr/core.py index 521de80e17..5537733b4b 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -13,6 +13,7 @@ from zarr._storage.store import _prefix_to_attrs_key, assert_zarr_v3_api_available from zarr.attrs import Attributes from zarr.codecs import AsType, get_codec +from zarr.context import Context from zarr.errors import ArrayNotFoundError, ReadOnlyError, ArrayIndexError from zarr.indexing import ( BasicIndexer, @@ -41,6 +42,7 @@ normalize_store_arg, ) from zarr.util import ( + ConstantMap, all_equal, InfoReporter, check_array_shape, @@ -1275,24 +1277,14 @@ def _get_selection(self, indexer, out=None, fields=None): check_array_shape('out', out, out_shape) # iterate over chunks - if ( - not hasattr(self.chunk_store, "getitems") and not ( - hasattr(self.chunk_store, "get_partial_values") and - self.chunk_store.supports_efficient_get_partial_values - ) - ) or any(map(lambda x: x == 0, self.shape)): - # sequentially get one key at a time from storage - for chunk_coords, chunk_selection, out_selection in indexer: - # load chunk selection into output array - self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection, - drop_axes=indexer.drop_axes, fields=fields) - else: + if math.prod(out_shape) > 0: # allow storage to get multiple items at once lchunk_coords, lchunk_selection, lout_selection = zip(*indexer) - self._chunk_getitems(lchunk_coords, lchunk_selection, out, lout_selection, - drop_axes=indexer.drop_axes, fields=fields) - + self._chunk_getitems( + lchunk_coords, lchunk_selection, out, lout_selection, + drop_axes=indexer.drop_axes, fields=fields + ) if out.shape: return out else: @@ -1963,68 +1955,36 @@ def _process_chunk( # store selected data in output out[out_selection] = tmp - def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, - drop_axes=None, fields=None): - """Obtain part or whole of a chunk. + def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, + drop_axes=None, fields=None): + """Obtain part or whole of chunks. Parameters ---------- - chunk_coords : tuple of ints - Indices of the chunk. - chunk_selection : selection - Location of region within the chunk to extract. + chunk_coords : list of tuple of ints + Indices of the chunks. + chunk_selection : list of selections + Location of region within the chunks to extract. out : ndarray Array to store result in. - out_selection : selection - Location of region within output array to store results in. + out_selection : list of selections + Location of regions within output array to store results in. drop_axes : tuple of ints Axes to squeeze out of the chunk. fields TODO - """ - out_is_ndarray = True - try: - out = ensure_ndarray_like(out) - except TypeError: - out_is_ndarray = False - - assert len(chunk_coords) == len(self._cdata_shape) - - # obtain key for chunk - ckey = self._chunk_key(chunk_coords) - try: - # obtain compressed data for chunk - cdata = self.chunk_store[ckey] - - except KeyError: - # chunk not initialized - if self._fill_value is not None: - if fields: - fill_value = self._fill_value[fields] - else: - fill_value = self._fill_value - out[out_selection] = fill_value - - else: - self._process_chunk(out, cdata, chunk_selection, drop_axes, - out_is_ndarray, fields, out_selection) - - def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, - drop_axes=None, fields=None): - """As _chunk_getitem, but for lists of chunks - - This gets called where the storage supports ``getitems``, so that - it can decide how to fetch the keys, allowing concurrency. - """ out_is_ndarray = True try: out = ensure_ndarray_like(out) except TypeError: # pragma: no cover out_is_ndarray = False + # Keys to retrieve ckeys = [self._chunk_key(ch) for ch in lchunk_coords] + + # Check if we can do a partial read if ( self._partial_decompress and self._compressor @@ -2056,13 +2016,17 @@ def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, for ckey in ckeys if ckey in self.chunk_store } + elif hasattr(self.chunk_store, "get_partial_values"): + partial_read_decode = False + values = self.chunk_store.get_partial_values([(ckey, (0, None)) for ckey in ckeys]) + cdatas = {key: value for key, value in zip(ckeys, values) if value is not None} else: partial_read_decode = False - if not hasattr(self.chunk_store, "getitems"): - values = self.chunk_store.get_partial_values([(ckey, (0, None)) for ckey in ckeys]) - cdatas = {key: value for key, value in zip(ckeys, values) if value is not None} - else: - cdatas = self.chunk_store.getitems(ckeys, on_error="omit") + contexts = {} + if not isinstance(self._meta_array, np.ndarray): + contexts = ConstantMap(ckeys, constant=Context(meta_array=self._meta_array)) + cdatas = self.chunk_store.getitems(ckeys, contexts=contexts) + for ckey, chunk_select, out_select in zip(ckeys, lchunk_selection, lout_selection): if ckey in cdatas: self._process_chunk( diff --git a/zarr/storage.py b/zarr/storage.py index fae9530716..e6c3f62faf 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -31,7 +31,7 @@ from os import scandir from pickle import PicklingError from threading import Lock, RLock -from typing import Optional, Union, List, Tuple, Dict, Any +from typing import Sequence, Mapping, Optional, Union, List, Tuple, Dict, Any import uuid import time @@ -42,6 +42,7 @@ ensure_contiguous_ndarray_like ) from numcodecs.registry import codec_registry +from zarr.context import Context from zarr.errors import ( MetadataError, @@ -1380,7 +1381,10 @@ def _normalize_key(self, key): return key.lower() if self.normalize_keys else key - def getitems(self, keys, **kwargs): + def getitems( + self, keys: Sequence[str], *, contexts: Mapping[str, Context] + ) -> Mapping[str, Any]: + keys_transformed = [self._normalize_key(key) for key in keys] results = self.map.getitems(keys_transformed, on_error="omit") # The function calling this method may not recognize the transformed keys diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 0b21dfbd88..f157e2a3d2 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -20,6 +20,7 @@ import zarr from zarr._storage.store import _get_hierarchy_metadata from zarr.codecs import BZ2, AsType, Blosc, Zlib +from zarr.context import Context from zarr.convenience import consolidate_metadata from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataError from zarr.hierarchy import group @@ -37,7 +38,7 @@ from zarr.storage import FSStore, rename, listdir from zarr._storage.v3 import KVStoreV3 from zarr.tests.util import CountingDict, have_fsspec, skip_test_env_var, abs_container, mktemp -from zarr.util import json_dumps +from zarr.util import ConstantMap, json_dumps @contextmanager @@ -2584,3 +2585,35 @@ def test_meta_prefix_6853(): fixtures = group(store=DirectoryStore(str(fixture))) assert list(fixtures.arrays()) + + +def test_getitems_contexts(): + + class MyStore(CountingDict): + def __init__(self): + super().__init__() + self.last_contexts = None + + def getitems(self, keys, *, contexts): + self.last_contexts = contexts + return super().getitems(keys, contexts=contexts) + + store = MyStore() + z = zarr.create(shape=(10,), chunks=1, store=store) + + # By default, not contexts are given to the store's getitems() + z[0] + assert len(store.last_contexts) == 0 + + # Setting a non-default meta_array, will create contexts for the store's getitems() + z._meta_array = "my_meta_array" + z[0] + assert store.last_contexts == {'0': {'meta_array': 'my_meta_array'}} + assert isinstance(store.last_contexts, ConstantMap) + # Accseeing different chunks should trigger different key request + z[1] + assert store.last_contexts == {'1': {'meta_array': 'my_meta_array'}} + assert isinstance(store.last_contexts, ConstantMap) + z[2:4] + assert store.last_contexts == ConstantMap(['2', '3'], Context({'meta_array': 'my_meta_array'})) + assert isinstance(store.last_contexts, ConstantMap) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index cc031f0db4..418f7d506b 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -666,6 +666,8 @@ def _get_public_and_dunder_methods(some_class): def test_storage_transformer_interface(): store_v3_methods = _get_public_and_dunder_methods(StoreV3) store_v3_methods.discard("__init__") + # Note, getitems() isn't mandatory when get_partial_values() is available + store_v3_methods.discard("getitems") storage_transformer_methods = _get_public_and_dunder_methods(StorageTransformer) storage_transformer_methods.discard("__init__") storage_transformer_methods.discard("get_config") diff --git a/zarr/tests/test_util.py b/zarr/tests/test_util.py index e9e1786abe..0a717b8f28 100644 --- a/zarr/tests/test_util.py +++ b/zarr/tests/test_util.py @@ -5,7 +5,7 @@ import pytest from zarr.core import Array -from zarr.util import (all_equal, flatten, guess_chunks, human_readable_size, +from zarr.util import (ConstantMap, all_equal, flatten, guess_chunks, human_readable_size, info_html_report, info_text_report, is_total_slice, json_dumps, normalize_chunks, normalize_dimension_separator, @@ -248,3 +248,16 @@ def test_json_dumps_numpy_dtype(): # Check that we raise the error of the superclass for unsupported object with pytest.raises(TypeError): json_dumps(Array) + + +def test_constant_map(): + val = object() + m = ConstantMap(keys=[1, 2], constant=val) + assert len(m) == 2 + assert m[1] is val + assert m[2] is val + assert 1 in m + assert 0 not in m + with pytest.raises(KeyError): + m[0] + assert repr(m) == repr({1: val, 2: val}) diff --git a/zarr/tests/util.py b/zarr/tests/util.py index faa2f35d25..19ac8c0bfa 100644 --- a/zarr/tests/util.py +++ b/zarr/tests/util.py @@ -1,6 +1,8 @@ import collections import os import tempfile +from typing import Any, Mapping, Sequence +from zarr.context import Context from zarr.storage import Store from zarr._storage.v3 import StoreV3 @@ -42,6 +44,13 @@ def __delitem__(self, key): self.counter['__delitem__', key] += 1 del self.wrapped[key] + def getitems( + self, keys: Sequence[str], *, contexts: Mapping[str, Context] + ) -> Mapping[str, Any]: + for key in keys: + self.counter['__getitem__', key] += 1 + return {k: self.wrapped[k] for k in keys if k in self.wrapped} + class CountingDictV3(CountingDict, StoreV3): pass diff --git a/zarr/util.py b/zarr/util.py index be5f174aab..68a238fbe4 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -5,12 +5,22 @@ from textwrap import TextWrapper import mmap import time -from typing import Any, Callable, Dict, Optional, Tuple, Union +from typing import ( + Any, + Callable, + Dict, + Iterator, + Mapping, + Optional, + Tuple, + TypeVar, + Union, + Iterable +) import numpy as np from asciitree import BoxStyle, LeftAligned from asciitree.traversal import Traversal -from collections.abc import Iterable from numcodecs.compat import ( ensure_text, ensure_ndarray_like, @@ -21,6 +31,9 @@ from numcodecs.registry import codec_registry from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo +KeyType = TypeVar('KeyType') +ValueType = TypeVar('ValueType') + def flatten(arg: Iterable) -> Iterable: for element in arg: @@ -745,3 +758,38 @@ def ensure_contiguous_ndarray_or_bytes(buf) -> Union[NDArrayLike, bytes]: except TypeError: # An error is raised if `buf` couldn't be zero-copy converted return ensure_bytes(buf) + + +class ConstantMap(Mapping[KeyType, ValueType]): + """A read-only map that maps all keys to the same constant value + + Useful if you want to call `getitems()` with the same context for all keys. + + Parameters + ---------- + keys + The keys of the map. Will be copied to a frozenset if it isn't already. + constant + The constant that all keys are mapping to. + """ + + def __init__(self, keys: Iterable[KeyType], constant: ValueType) -> None: + self._keys = keys if isinstance(keys, frozenset) else frozenset(keys) + self._constant = constant + + def __getitem__(self, key: KeyType) -> ValueType: + if key not in self._keys: + raise KeyError(repr(key)) + return self._constant + + def __iter__(self) -> Iterator[KeyType]: + return iter(self._keys) + + def __len__(self) -> int: + return len(self._keys) + + def __contains__(self, key: object) -> bool: + return key in self._keys + + def __repr__(self) -> str: + return repr({k: v for k, v in self.items()}) From 80fc1fe57062e0b6be9f2712d88373aaae1c6b18 Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Thu, 13 Apr 2023 22:02:58 +0530 Subject: [PATCH 070/213] Update release.rst for 2.15.0 (#1378) --- docs/release.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index f056f621bf..b79d0a5456 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -15,9 +15,29 @@ Unreleased # .. warning:: # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. +.. _release_2.15.0: + +2.15.0 +------ + +Enhancements +~~~~~~~~~~~~ + * Implement more extensive fallback of getitem/setitem for orthogonal indexing. By :user:`Andreas Albert ` :issue:`1029`. +Documentation +~~~~~~~~~~~~~ + +* Add API reference for V3 Implementation in the docs. + By :user:`Sanket Verma ` :issue:`1345`. + +Bug fixes +~~~~~~~~~ + +* Fix the conda-forge error. Read :issue:`1347` for detailed info. + By :user:`Josh Moore ` :issue:`1364` and :issue:`1367`. + .. _release_2.14.2: 2.14.2 From a66f40bb7f3013bec4c4a768df82ae2e2652c720 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 13 Apr 2023 09:49:33 -0700 Subject: [PATCH 071/213] Add release note for #1131 --- docs/release.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index b79d0a5456..01c1a2f895 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -26,6 +26,9 @@ Enhancements * Implement more extensive fallback of getitem/setitem for orthogonal indexing. By :user:`Andreas Albert ` :issue:`1029`. +* Getitems supports ``meta_array``. + By :user: 'Mads R. B. Kristensen ' :issue:`1131`. + Documentation ~~~~~~~~~~~~~ From e1e556108e5cac6edd8a5ea511f5c15b3ac12363 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 13 Apr 2023 13:52:08 -0700 Subject: [PATCH 072/213] Remove `codecov (#1391) --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index cee2ca7aef..fb410762be 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -58,7 +58,7 @@ jobs: run: | conda activate zarr-env python -m pip install --upgrade pip - python -m pip install -U pip setuptools wheel codecov line_profiler + python -m pip install -U pip setuptools wheel line_profiler python -m pip install -rrequirements_dev_minimal.txt numpy${{matrix.numpy_version}} -rrequirements_dev_optional.txt pymongo redis python -m pip install . python -m pip freeze From edd8a680b51f41c831bc223ce3178bc295817fcb Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 13 Apr 2023 14:23:49 -0700 Subject: [PATCH 073/213] fix for readonly error in _normalize_store_arg_v3 (#1383) * fix for readonly error in _normalize_store_arg_v3 * add test * add release note --------- Co-authored-by: Ryan Abernathey --- .pre-commit-config.yaml | 2 +- docs/release.rst | 3 +++ zarr/_storage/v3.py | 10 ++++------ zarr/tests/test_storage_v3.py | 10 ++++++++++ 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cd1bc44361..a420662b5b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: rev: v2.2.2 hooks: - id: codespell - args: ["-L", "ba,ihs,kake,nd,noe,nwo,te", "-S", "fixture"] + args: ["-L", "ba,ihs,kake,nd,noe,nwo,te,fo", "-S", "fixture"] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: diff --git a/docs/release.rst b/docs/release.rst index 01c1a2f895..3105e2c67f 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -41,6 +41,9 @@ Bug fixes * Fix the conda-forge error. Read :issue:`1347` for detailed info. By :user:`Josh Moore ` :issue:`1364` and :issue:`1367`. +* Fix ``ReadOnlyError`` when opening V3 store via fsspec reference file system. + By :user:`Joe Hamman ` :issue:`1383`. + .. _release_2.14.2: 2.14.2 diff --git a/zarr/_storage/v3.py b/zarr/_storage/v3.py index 5f8964fb5d..094deed02e 100644 --- a/zarr/_storage/v3.py +++ b/zarr/_storage/v3.py @@ -618,12 +618,10 @@ def _normalize_store_arg_v3(store: Any, storage_options=None, mode="r") -> BaseS # return N5StoreV3(store) else: store = DirectoryStoreV3(store) - # add default zarr.json metadata - store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) - return store else: store = StoreV3._ensure_store(store) - if 'zarr.json' not in store: - # add default zarr.json metadata - store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) + + if 'zarr.json' not in store: + # add default zarr.json metadata + store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) return store diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 418f7d506b..3e9c0a05f7 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -596,6 +596,16 @@ def test_normalize_store_arg_v3(tmpdir): store = normalize_store_arg(fsspec.get_mapper("file://" + path), zarr_version=3) assert isinstance(store, FSStoreV3) + # regression for https://github.com/zarr-developers/zarr-python/issues/1382 + # contents of zarr.json are not important for this test + out = {"version": 1, "refs": {"zarr.json": "{...}"}} + store = normalize_store_arg( + "reference://", + storage_options={"fo": out, "remote_protocol": "memory"}, + zarr_version=3 + ) + assert isinstance(store, FSStoreV3) + fn = tmpdir.join('store.n5') with pytest.raises(NotImplementedError): normalize_store_arg(str(fn), zarr_version=3, mode='w') From 8f11656959c920099d8a6dec5c0abf4663a862b5 Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Fri, 14 Apr 2023 05:27:49 +0530 Subject: [PATCH 074/213] Update release.rst for 2.15.0 (#1392) --- docs/release.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index 3105e2c67f..06d656fa46 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -27,7 +27,13 @@ Enhancements By :user:`Andreas Albert ` :issue:`1029`. * Getitems supports ``meta_array``. - By :user: 'Mads R. B. Kristensen ' :issue:`1131`. + By :user:`Mads R. B. Kristensen ` :issue:`1131`. + +Maintenance +~~~~~~~~~~~ + +* Remove ``codecov`` from GitHub actions. + By :user:`John A. Kirkham ` :issue:`1391`. Documentation ~~~~~~~~~~~~~ From bb35962953fdacfbcf0a0dfa43ec6fddba0433d3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 28 Apr 2023 16:20:12 +0200 Subject: [PATCH 075/213] Bump actions/setup-python from 4.5.0 to 4.6.0 (#1399) Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4.5.0 to 4.6.0. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4.5.0...v4.6.0) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index be7a3b19cc..97547491f8 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -16,7 +16,7 @@ jobs: submodules: true fetch-depth: 0 - - uses: actions/setup-python@v4.5.0 + - uses: actions/setup-python@v4.6.0 name: Install Python with: python-version: '3.8' From 86a54d17e114542b34dae10e5eda1b594d40bf99 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 28 Apr 2023 16:20:26 +0200 Subject: [PATCH 076/213] Bump pytest from 7.2.2 to 7.3.1 (#1393) Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.2.2 to 7.3.1. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.2.2...7.3.1) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index 1217ee620e..e4ada6385b 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -5,4 +5,4 @@ numcodecs==0.11.0 msgpack-python==0.5.6 setuptools-scm==7.1.0 # test requirements -pytest==7.2.2 +pytest==7.3.1 From a392e30e1d3ba525c28ad6d1131bae761de6f4aa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 May 2023 09:24:20 +0200 Subject: [PATCH 077/213] Bump azure-storage-blob from 12.15.0 to 12.16.0 (#1403) Bumps [azure-storage-blob](https://github.com/Azure/azure-sdk-for-python) from 12.15.0 to 12.16.0. - [Release notes](https://github.com/Azure/azure-sdk-for-python/releases) - [Changelog](https://github.com/Azure/azure-sdk-for-python/blob/main/doc/esrp_release.md) - [Commits](https://github.com/Azure/azure-sdk-for-python/compare/azure-storage-blob_12.15.0...azure-storage-blob_12.16.0) --- updated-dependencies: - dependency-name: azure-storage-blob dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 3ee7cbe5d3..f24fb8dc40 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -7,7 +7,7 @@ ipywidgets==8.0.6 # optional library requirements for services # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) -azure-storage-blob==12.15.0 # pyup: ignore +azure-storage-blob==12.16.0 # pyup: ignore redis==4.5.3 types-redis types-setuptools From c12ee031767827a4d8f3c92d98476257fb2b3707 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 May 2023 09:24:33 +0200 Subject: [PATCH 078/213] Bump numpy from 1.24.2 to 1.24.3 (#1402) Bumps [numpy](https://github.com/numpy/numpy) from 1.24.2 to 1.24.3. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst) - [Commits](https://github.com/numpy/numpy/compare/v1.24.2...v1.24.3) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_numpy.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_numpy.txt b/requirements_dev_numpy.txt index e094d4fcd4..a6135bd831 100644 --- a/requirements_dev_numpy.txt +++ b/requirements_dev_numpy.txt @@ -1,4 +1,4 @@ # Break this out into a separate file to allow testing against # different versions of numpy. This file should pin to the latest # numpy version. -numpy==1.24.2 +numpy==1.24.3 From 1deee736f8be537027bbc0c867dd50952ff40979 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Tue, 2 May 2023 09:47:53 +0200 Subject: [PATCH 079/213] Mark for 2.15.0a1 pre-release (#1404) * Mark for 2.15.0a1 pre-release * Move warning down --- docs/release.rst | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 06d656fa46..cdf4622fe5 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,20 +6,19 @@ Release notes # to document your changes. On releases it will be # re-indented so that it does not show up in the notes. -.. _unreleased: + .. _unreleased: -Unreleased ----------- - -.. - # .. warning:: - # Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. + Unreleased + ---------- .. _release_2.15.0: 2.15.0 ------ +.. warning:: + Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. + Enhancements ~~~~~~~~~~~~ From a2e864c14dbf2f7ab2ddba61f90fed8e38406b72 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 May 2023 11:01:15 +0200 Subject: [PATCH 080/213] Bump fsspec from 2023.3.0 to 2023.4.0 (#1387) * Bump fsspec from 2023.3.0 to 2023.4.0 Bumps [fsspec](https://github.com/fsspec/filesystem_spec) from 2023.3.0 to 2023.4.0. - [Release notes](https://github.com/fsspec/filesystem_spec/releases) - [Commits](https://github.com/fsspec/filesystem_spec/compare/2023.3.0...2023.4.0) --- updated-dependencies: - dependency-name: fsspec dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update s3fs as well --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index f24fb8dc40..306225ea99 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,6 +18,6 @@ pytest-cov==4.0.0 pytest-doctestplus==0.12.1 pytest-timeout==2.1.0 h5py==3.8.0 -fsspec==2023.3.0 -s3fs==2023.3.0 +fsspec==2023.4.0 +s3fs==2023.4.0 moto[server]>=4.0.8 From d54f25c460f8835a0ec9a7b4bc3482159a5608f9 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Wed, 3 May 2023 12:24:54 -0500 Subject: [PATCH 081/213] Avoid deprecated `product` in ``numpy=1.25`` (#1405) * Avoid deprecated product in numpy=1.25 * Add changelog for np.prod --------- Co-authored-by: Josh Moore --- docs/release.rst | 3 +++ zarr/tests/test_core.py | 4 ++-- zarr/util.py | 6 +++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index cdf4622fe5..7442e519e8 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -34,6 +34,9 @@ Maintenance * Remove ``codecov`` from GitHub actions. By :user:`John A. Kirkham ` :issue:`1391`. +* Replace ``np.product`` with ``np.prod`` due to deprecation. + By :user:`James Bourbeau ` :issue:`1405`. + Documentation ~~~~~~~~~~~~~ diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index ba89db3b06..1cac51ba0d 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1479,7 +1479,7 @@ def test_iter(self): ) for shape, chunks in params: z = self.create_array(shape=shape, chunks=chunks, dtype=int) - a = np.arange(np.product(shape)).reshape(shape) + a = np.arange(np.prod(shape)).reshape(shape) z[:] = a for expect, actual in zip_longest(a, z): assert_array_equal(expect, actual) @@ -1500,7 +1500,7 @@ def test_islice(self): ) for shape, chunks, start, end in params: z = self.create_array(shape=shape, chunks=chunks, dtype=int) - a = np.arange(np.product(shape)).reshape(shape) + a = np.arange(np.prod(shape)).reshape(shape) z[:] = a end_array = min(end, a.shape[0]) for expect, actual in zip_longest(a[start:end_array], diff --git a/zarr/util.py b/zarr/util.py index 68a238fbe4..b661f5f6b4 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -111,7 +111,7 @@ def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: # Determine the optimal chunk size in bytes using a PyTables expression. # This is kept as a float. - dset_size = np.product(chunks)*typesize + dset_size = np.prod(chunks)*typesize target_size = CHUNK_BASE * (2**np.log10(dset_size/(1024.*1024))) if target_size > CHUNK_MAX: @@ -126,14 +126,14 @@ def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: # 1b. We're within 50% of the target chunk size, AND # 2. The chunk is smaller than the maximum chunk size - chunk_bytes = np.product(chunks)*typesize + chunk_bytes = np.prod(chunks)*typesize if (chunk_bytes < target_size or abs(chunk_bytes-target_size)/target_size < 0.5) and \ chunk_bytes < CHUNK_MAX: break - if np.product(chunks) == 1: + if np.prod(chunks) == 1: break # Element size larger than CHUNK_MAX chunks[idx % ndims] = math.ceil(chunks[idx % ndims] / 2.0) From 25e6036414070d78ea6b7186427f6336b1d89c5e Mon Sep 17 00:00:00 2001 From: Alan Du Date: Thu, 4 May 2023 12:27:24 -0400 Subject: [PATCH 082/213] Fix `normalize_fill_value` for structured arrays (#1397) * Add failing test case for normalize_fill_value * Fix normalize_fill_value for structured arrays * Add changelog --------- Co-authored-by: Josh Moore --- docs/release.rst | 3 +++ zarr/tests/test_util.py | 1 + zarr/util.py | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index 7442e519e8..83588bb3d7 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -52,6 +52,9 @@ Bug fixes * Fix ``ReadOnlyError`` when opening V3 store via fsspec reference file system. By :user:`Joe Hamman ` :issue:`1383`. +* Fix ``normalize_fill_value`` for structured arrays. + By :user:`Alan Du ` :issue:`1397`. + .. _release_2.14.2: 2.14.2 diff --git a/zarr/tests/test_util.py b/zarr/tests/test_util.py index 0a717b8f28..e01aa6711a 100644 --- a/zarr/tests/test_util.py +++ b/zarr/tests/test_util.py @@ -119,6 +119,7 @@ def test_normalize_fill_value(): structured_dtype = np.dtype([('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) expect = np.array((b'', 0, 0.), dtype=structured_dtype)[()] assert expect == normalize_fill_value(0, dtype=structured_dtype) + assert expect == normalize_fill_value(expect, dtype=structured_dtype) assert '' == normalize_fill_value(0, dtype=np.dtype('U1')) diff --git a/zarr/util.py b/zarr/util.py index b661f5f6b4..6ba20b96c2 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -295,7 +295,7 @@ def normalize_fill_value(fill_value, dtype: np.dtype): if fill_value is None or dtype.hasobject: # no fill value pass - elif fill_value == 0: + elif not isinstance(fill_value, np.void) and fill_value == 0: # this should be compatible across numpy versions for any array type, including # structured arrays fill_value = np.zeros((), dtype=dtype)[()] From 88f68a5320a13e6eb744fd30478c93fc41610409 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Sun, 7 May 2023 12:42:43 +0200 Subject: [PATCH 083/213] Merge isinstance calls (#1409) --- zarr/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/indexing.py b/zarr/indexing.py index 2f8144fd08..3fb3e2f204 100644 --- a/zarr/indexing.py +++ b/zarr/indexing.py @@ -116,7 +116,7 @@ def is_pure_orthogonal_indexing(selection, ndim): sum(is_integer_list(elem) or is_integer_array(elem) for elem in selection) <= 1 and all( is_integer_list(elem) or is_integer_array(elem) - or isinstance(elem, slice) or isinstance(elem, int) for + or isinstance(elem, (int, slice)) for elem in selection) ) From b8db120c2743d2c188e0531a0f9be93de77396e6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 May 2023 09:28:07 +0200 Subject: [PATCH 084/213] Bump pypa/gh-action-pypi-publish from 1.8.3 to 1.8.6 (#1412) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.3 to 1.8.6. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.3...v1.8.6) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 97547491f8..8b1d5ccb83 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -64,7 +64,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.8.3 + - uses: pypa/gh-action-pypi-publish@v1.8.6 with: user: __token__ password: ${{ secrets.pypi_password }} From e1e290f0efe42444845c78c47d4b4db8f6907e96 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 May 2023 09:38:58 +0200 Subject: [PATCH 085/213] Bump lmdb from 1.4.0 to 1.4.1 (#1384) Bumps [lmdb](https://github.com/jnwatson/py-lmdb) from 1.4.0 to 1.4.1. - [Release notes](https://github.com/jnwatson/py-lmdb/releases) - [Changelog](https://github.com/jnwatson/py-lmdb/blob/master/ChangeLog) - [Commits](https://github.com/jnwatson/py-lmdb/compare/py-lmdb_1.4.0...py-lmdb_1.4.1) --- updated-dependencies: - dependency-name: lmdb dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 306225ea99..a6a79a499a 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -1,6 +1,6 @@ # optional library requirements # bsddb3==6.2.6; sys_platform != 'win32' -lmdb==1.4.0; sys_platform != 'win32' +lmdb==1.4.1; sys_platform != 'win32' # optional library requirements for Jupyter ipytree==0.2.2 ipywidgets==8.0.6 From 43fdc3dde64972c1a08b3d6d5b71433a05a7a5c2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 May 2023 09:39:39 +0200 Subject: [PATCH 086/213] chore: update pre-commit hooks (#1366) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/codespell-project/codespell: v2.2.2 → v2.2.4](https://github.com/codespell-project/codespell/compare/v2.2.2...v2.2.4) - [github.com/pre-commit/mirrors-mypy: v1.0.1 → v1.2.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.0.1...v1.2.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a420662b5b..e090ddd6d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: exclude: ^(venv/|docs/) types: ['python'] - repo: https://github.com/codespell-project/codespell - rev: v2.2.2 + rev: v2.2.4 hooks: - id: codespell args: ["-L", "ba,ihs,kake,nd,noe,nwo,te,fo", "-S", "fixture"] @@ -24,7 +24,7 @@ repos: hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.0.1 + rev: v1.2.0 hooks: - id: mypy files: zarr From 90da1f35526083d6d605511a75a85cd150bd8afd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 May 2023 11:30:37 +0200 Subject: [PATCH 087/213] Bump redis from 4.5.3 to 4.5.4 (#1380) Bumps [redis](https://github.com/redis/redis-py) from 4.5.3 to 4.5.4. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.5.3...v4.5.4) --- updated-dependencies: - dependency-name: redis dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index a6a79a499a..91db666cae 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.0.6 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.16.0 # pyup: ignore -redis==4.5.3 +redis==4.5.4 types-redis types-setuptools pymongo==4.3.3 From e2ab17f03e2d4e5ceb1a0ba32a62019a4fb55721 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 May 2023 22:00:41 +0200 Subject: [PATCH 088/213] Bump fsspec from 2023.4.0 to 2023.5.0 (#1411) * Bump fsspec from 2023.4.0 to 2023.5.0 Bumps [fsspec](https://github.com/fsspec/filesystem_spec) from 2023.4.0 to 2023.5.0. - [Commits](https://github.com/fsspec/filesystem_spec/compare/2023.4.0...2023.5.0) --- updated-dependencies: - dependency-name: fsspec dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Bumping s3fs as well --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 91db666cae..f5125e0c3f 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,6 +18,6 @@ pytest-cov==4.0.0 pytest-doctestplus==0.12.1 pytest-timeout==2.1.0 h5py==3.8.0 -fsspec==2023.4.0 -s3fs==2023.4.0 +fsspec==2023.5.0 +s3fs==2023.5.0 moto[server]>=4.0.8 From 0891bf9c8049631000dab5934be77598c41c31f1 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 11 May 2023 03:21:09 -0400 Subject: [PATCH 089/213] test(ci): Run tests against python 3.11 (#1415) * test(ci): Run tests against python 3.11 * exclude numpy 1.20 for 3.11 * update release notes --- .github/workflows/python-package.yml | 4 +++- .github/workflows/windows-testing.yml | 2 +- docs/release.rst | 3 +++ pyproject.toml | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index fb410762be..f8fe9ab379 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,11 +15,13 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10', '3.11'] numpy_version: ['>=1.22.0', '==1.20.*'] exclude: - python-version: '3.10' numpy_version: '==1.20.*' + - python-version: '3.11' + numpy_version: '==1.20.*' services: redis: image: redis diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index 2f8922b447..b17eece058 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: True matrix: - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 with: diff --git a/docs/release.rst b/docs/release.rst index 83588bb3d7..f6e6e614ae 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -37,6 +37,9 @@ Maintenance * Replace ``np.product`` with ``np.prod`` due to deprecation. By :user:`James Bourbeau ` :issue:`1405`. +* Activate Py 3.11 builds. + By :user:`Joe Hamman ` :issue:`1415`. + Documentation ~~~~~~~~~~~~~ diff --git a/pyproject.toml b/pyproject.toml index 3277e9da7c..4beb357bb0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ classifiers = [ 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', ] license = { text = "MIT" } From 4fe109175845b4bbcb243583fc63e2548a53033e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 May 2023 17:03:36 +0200 Subject: [PATCH 090/213] Bump redis from 4.5.4 to 4.5.5 (#1413) Bumps [redis](https://github.com/redis/redis-py) from 4.5.4 to 4.5.5. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.5.4...v4.5.5) --- updated-dependencies: - dependency-name: redis dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index f5125e0c3f..5c4b6ac266 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.0.6 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.16.0 # pyup: ignore -redis==4.5.4 +redis==4.5.5 types-redis types-setuptools pymongo==4.3.3 From cc16d8c8cbafc2dd736f11b9c1ad28a58529e40d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 May 2023 13:10:10 +0200 Subject: [PATCH 091/213] chore: update pre-commit hooks (#1416) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-mypy: v1.2.0 → v1.3.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.2.0...v1.3.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e090ddd6d8..194e8b1d5f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.2.0 + rev: v1.3.0 hooks: - id: mypy files: zarr From 3649b9b23d87cfd7141045bdf8684ecc1cdfbd90 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 25 May 2023 21:23:49 +0200 Subject: [PATCH 092/213] `open_array()`: adding the `meta_array` argument (#1396) * open_array(): adding the meta_array argument * updated release.txt * doc: fixed versionadded --- docs/release.rst | 5 ++++- zarr/creation.py | 8 +++++++- zarr/tests/test_meta_array.py | 19 ++++++++++++++++++- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index f6e6e614ae..833bfbf7ba 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -28,6 +28,9 @@ Enhancements * Getitems supports ``meta_array``. By :user:`Mads R. B. Kristensen ` :issue:`1131`. +* ``open_array()`` now takes the ``meta_array`` argument. + By :user:`Mads R. B. Kristensen ` :issue:`1396`. + Maintenance ~~~~~~~~~~~ @@ -176,7 +179,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -* Fix minor indexing errors in tutorial and specification examples of documentation. +* Fix minor indexing errors in tutorial and specification examples of documentation. By :user:`Kola Babalola ` :issue:`1277`. * Add `requirements_rtfd.txt` in `contributing.rst`. diff --git a/zarr/creation.py b/zarr/creation.py index a6fa8e44cc..dc8b8a157d 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -424,6 +424,7 @@ def open_array( *, zarr_version=None, dimension_separator=None, + meta_array=None, **kwargs ): """Open an array using file-mode-like semantics. @@ -498,6 +499,11 @@ def open_array( ('/') format. If None, the appropriate value will be read from `store` when present. Otherwise, defaults to '.' when ``zarr_version == 2`` and `/` otherwise. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + .. versionadded:: 2.15 Returns ------- @@ -607,7 +613,7 @@ def open_array( # instantiate array z = Array(store, read_only=read_only, synchronizer=synchronizer, cache_metadata=cache_metadata, cache_attrs=cache_attrs, path=path, - chunk_store=chunk_store, write_empty_chunks=write_empty_chunks) + chunk_store=chunk_store, write_empty_chunks=write_empty_chunks, meta_array=meta_array) return z diff --git a/zarr/tests/test_meta_array.py b/zarr/tests/test_meta_array.py index 6172af3be9..5ff6fae3f3 100644 --- a/zarr/tests/test_meta_array.py +++ b/zarr/tests/test_meta_array.py @@ -8,7 +8,7 @@ import zarr.codecs from zarr.core import Array -from zarr.creation import array, empty, full, ones, zeros +from zarr.creation import array, empty, full, ones, open_array, zeros from zarr.hierarchy import open_group from zarr.storage import DirectoryStore, MemoryStore, Store, ZipStore @@ -148,6 +148,23 @@ def test_array(tmp_path, module, compressor, store_type): assert z.dtype == z2.dtype xp.testing.assert_array_equal(z[:], z2[:]) + store = init_store(tmp_path / "open_array", store_type) + a = xp.arange(100) + z = open_array( + store, + shape=a.shape, + dtype=a.dtype, + chunks=10, + compressor=compressor, + meta_array=xp.empty(()) + ) + z[:] = a + assert a.shape == z.shape + assert a.dtype == z.dtype + assert isinstance(a, type(z[:])) + assert isinstance(z.meta_array, type(xp.empty(()))) + xp.testing.assert_array_equal(a, z[:]) + @pytest.mark.parametrize("module, compressor", param_module_and_compressor) def test_empty(module, compressor): From 4132f360616a4c8bfa3dd4e979a4793c5d84cdfc Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Mon, 29 May 2023 09:18:08 +0200 Subject: [PATCH 093/213] Fix typo introduced by dcce26e (#1420) --- docs/release.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index 833bfbf7ba..2f64454c97 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -293,7 +293,7 @@ Maintenance * Updated GitHub actions. By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1134`. -* Uopdate web links: `http:// → https://`. +* Update web links: `http:// → https://`. By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1313`. .. _release_2.13.3: From 5d7e287d23145b1f0f355b14e9111aa81c79cd22 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Wed, 14 Jun 2023 16:40:02 +0200 Subject: [PATCH 094/213] 2.15.0: Remove pre-release warning (#1419) --- docs/release.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 2f64454c97..67f33a8770 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -1,6 +1,13 @@ Release notes ============= +.. + # Copy the warning statement _under_ the latest release version + # and unindent for pre-releases. + + .. warning:: + Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. + .. # Unindent the section between releases in order # to document your changes. On releases it will be @@ -16,9 +23,6 @@ Release notes 2.15.0 ------ -.. warning:: - Pre-release! Use :command:`pip install --pre zarr` to evaluate this release. - Enhancements ~~~~~~~~~~~~ From 77e985e888c17c2b52d11fba330a735b48e4733e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 15 Jun 2023 08:45:19 +0200 Subject: [PATCH 095/213] Bump pytest-doctestplus from 0.12.1 to 0.13.0 (#1429) Bumps [pytest-doctestplus](https://github.com/astropy/pytest-doctestplus) from 0.12.1 to 0.13.0. - [Release notes](https://github.com/astropy/pytest-doctestplus/releases) - [Changelog](https://github.com/scientific-python/pytest-doctestplus/blob/main/CHANGES.rst) - [Commits](https://github.com/astropy/pytest-doctestplus/compare/v0.12.1...v0.13.0) --- updated-dependencies: - dependency-name: pytest-doctestplus dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 5c4b6ac266..bb2fe63a86 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -15,7 +15,7 @@ pymongo==4.3.3 # optional test requirements coverage pytest-cov==4.0.0 -pytest-doctestplus==0.12.1 +pytest-doctestplus==0.13.0 pytest-timeout==2.1.0 h5py==3.8.0 fsspec==2023.5.0 From 9081cc7f8e0b4f051a32b80999c7d011816790bf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 15 Jun 2023 08:45:31 +0200 Subject: [PATCH 096/213] Bump pytest from 7.3.1 to 7.3.2 (#1432) Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.3.1 to 7.3.2. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.3.1...7.3.2) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index e4ada6385b..df1ca11677 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -5,4 +5,4 @@ numcodecs==0.11.0 msgpack-python==0.5.6 setuptools-scm==7.1.0 # test requirements -pytest==7.3.1 +pytest==7.3.2 From 139e1c465ba159b4fb41f793c49b6d5b02bb241d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 15 Jun 2023 10:54:15 +0200 Subject: [PATCH 097/213] Bump fsspec from 2023.5.0 to 2023.6.0 (#1433) * Bump fsspec from 2023.5.0 to 2023.6.0 Bumps [fsspec](https://github.com/fsspec/filesystem_spec) from 2023.5.0 to 2023.6.0. - [Commits](https://github.com/fsspec/filesystem_spec/compare/2023.5.0...2023.6.0) --- updated-dependencies: - dependency-name: fsspec dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Also change s3fs --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index bb2fe63a86..2c909177dc 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,6 +18,6 @@ pytest-cov==4.0.0 pytest-doctestplus==0.13.0 pytest-timeout==2.1.0 h5py==3.8.0 -fsspec==2023.5.0 -s3fs==2023.5.0 +fsspec==2023.6.0 +s3fs==2023.6.0 moto[server]>=4.0.8 From 67f25efc93306bf760807abde9f5e185c59b5219 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Jun 2023 17:53:52 +0200 Subject: [PATCH 098/213] Bump h5py from 3.8.0 to 3.9.0 (#1440) Bumps [h5py](https://github.com/h5py/h5py) from 3.8.0 to 3.9.0. - [Release notes](https://github.com/h5py/h5py/releases) - [Changelog](https://github.com/h5py/h5py/blob/master/docs/release_guide.rst) - [Commits](https://github.com/h5py/h5py/compare/3.8.0...3.9.0) --- updated-dependencies: - dependency-name: h5py dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 2c909177dc..60c2fffaac 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -17,7 +17,7 @@ coverage pytest-cov==4.0.0 pytest-doctestplus==0.13.0 pytest-timeout==2.1.0 -h5py==3.8.0 +h5py==3.9.0 fsspec==2023.6.0 s3fs==2023.6.0 moto[server]>=4.0.8 From b3cafa96b88c362e7843df8e2a07499588b7fa49 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 21 Jun 2023 17:54:04 +0200 Subject: [PATCH 099/213] chore: update pre-commit hooks (#1438) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/codespell-project/codespell: v2.2.4 → v2.2.5](https://github.com/codespell-project/codespell/compare/v2.2.4...v2.2.5) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 194e8b1d5f..583a2b0184 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: exclude: ^(venv/|docs/) types: ['python'] - repo: https://github.com/codespell-project/codespell - rev: v2.2.4 + rev: v2.2.5 hooks: - id: codespell args: ["-L", "ba,ihs,kake,nd,noe,nwo,te,fo", "-S", "fixture"] From 2713a89c13107a83843a76d5ead7f53351f8f973 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 25 Jun 2023 16:37:48 +0200 Subject: [PATCH 100/213] Bump pymongo from 4.3.3 to 4.4.0 (#1441) Bumps [pymongo](https://github.com/mongodb/mongo-python-driver) from 4.3.3 to 4.4.0. - [Release notes](https://github.com/mongodb/mongo-python-driver/releases) - [Changelog](https://github.com/mongodb/mongo-python-driver/blob/master/doc/changelog.rst) - [Commits](https://github.com/mongodb/mongo-python-driver/compare/4.3.3...4.4.0) --- updated-dependencies: - dependency-name: pymongo dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 60c2fffaac..29bb9b9905 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -11,7 +11,7 @@ azure-storage-blob==12.16.0 # pyup: ignore redis==4.5.5 types-redis types-setuptools -pymongo==4.3.3 +pymongo==4.4.0 # optional test requirements coverage pytest-cov==4.0.0 From 327b6942284283d0435d2513514f4712c71376b4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 25 Jun 2023 16:38:01 +0200 Subject: [PATCH 101/213] Bump pytest-cov from 4.0.0 to 4.1.0 (#1418) Bumps [pytest-cov](https://github.com/pytest-dev/pytest-cov) from 4.0.0 to 4.1.0. - [Changelog](https://github.com/pytest-dev/pytest-cov/blob/master/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest-cov/compare/v4.0.0...v4.1.0) --- updated-dependencies: - dependency-name: pytest-cov dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 29bb9b9905..8e7b5c94ab 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -14,7 +14,7 @@ types-setuptools pymongo==4.4.0 # optional test requirements coverage -pytest-cov==4.0.0 +pytest-cov==4.1.0 pytest-doctestplus==0.13.0 pytest-timeout==2.1.0 h5py==3.9.0 From 2169c4345d65f0fcbe1ae190212110d1d67ec8ab Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 28 Jun 2023 06:01:59 -0700 Subject: [PATCH 102/213] test: replace pkg_resources with packaging.version for version parsing/comparison (#1450) --- zarr/tests/test_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 1cac51ba0d..1541943d22 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -8,6 +8,7 @@ from tempfile import mkdtemp import numpy as np +import packaging.version import pytest from numcodecs import (BZ2, JSON, LZ4, Blosc, Categorize, Delta, FixedScaleOffset, GZip, MsgPack, Pickle, VLenArray, @@ -15,7 +16,6 @@ from numcodecs.compat import ensure_bytes, ensure_ndarray from numcodecs.tests.common import greetings from numpy.testing import assert_array_almost_equal, assert_array_equal -from pkg_resources import parse_version import zarr from zarr._storage.store import ( @@ -1389,7 +1389,7 @@ def test_object_codec_warnings(self): z = self.create_array(shape=10, chunks=5, dtype="i4", object_codec=JSON()) z.store.close() - @unittest.skipIf(parse_version(np.__version__) < parse_version('1.14.0'), + @unittest.skipIf(packaging.version.parse(np.__version__) < packaging.version.parse('1.14.0'), "unsupported numpy version") def test_structured_array_contain_object(self): From cc2bd4122988b6bc58676df9a717834c09926ffc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 5 Jul 2023 09:21:49 +0200 Subject: [PATCH 103/213] Bump pypa/gh-action-pypi-publish from 1.8.6 to 1.8.7 (#1451) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.6 to 1.8.7. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.6...v1.8.7) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 8b1d5ccb83..a00096bb18 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -64,7 +64,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.8.6 + - uses: pypa/gh-action-pypi-publish@v1.8.7 with: user: __token__ password: ${{ secrets.pypi_password }} From 8c98f4518ea20251c8ef3258276860a3cbef9eeb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 5 Jul 2023 17:04:57 +0200 Subject: [PATCH 104/213] Bump ipywidgets from 8.0.6 to 8.0.7 (#1452) --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 8e7b5c94ab..0398d8f494 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -3,7 +3,7 @@ lmdb==1.4.1; sys_platform != 'win32' # optional library requirements for Jupyter ipytree==0.2.2 -ipywidgets==8.0.6 +ipywidgets==8.0.7 # optional library requirements for services # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) From ac897822bdf1ec95fe0c34f621ece67f02e07e1e Mon Sep 17 00:00:00 2001 From: Christoph Gohlke Date: Mon, 10 Jul 2023 15:03:47 -0700 Subject: [PATCH 105/213] Add __contains__ method to KVStore (#1454) * Add __contains__ method to KVStore * Update release notes --- docs/release.rst | 11 ++++++++--- zarr/storage.py | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 67f33a8770..156ba7229c 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -13,10 +13,15 @@ Release notes # to document your changes. On releases it will be # re-indented so that it does not show up in the notes. - .. _unreleased: +.. _unreleased: - Unreleased - ---------- +Unreleased +---------- + +Bug fixes +~~~~~~~~~ + +* Add ``__contains__`` method to ``KVStore``. By :user:`Christoph Gohlke ` :issue:`1454`. .. _release_2.15.0: diff --git a/zarr/storage.py b/zarr/storage.py index e6c3f62faf..ef1bd64955 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -744,6 +744,9 @@ def __setitem__(self, key, value): def __delitem__(self, key): del self._mutable_mapping[key] + def __contains__(self, key): + return key in self._mutable_mapping + def get(self, key, default=None): return self._mutable_mapping.get(key, default) From 98f74d5e362eff88d1022749e82d7bed9d62b7a2 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Mon, 10 Jul 2023 17:05:55 -0500 Subject: [PATCH 106/213] Support Block (Chunk) Indexing (#1428) * add .venv to `.gitignore` * add block indexing capabilities * add release notes * fix docstrings * update tutorial * fix missing codecov hit for read-only arrays * add block selection to array tests * lint * move release notes to unreleased section * update block indexing "as of" to 2.16 --------- Co-authored-by: Altay Sansal --- .gitignore | 1 + docs/api/core.rst | 2 + docs/release.rst | 3 + docs/tutorial.rst | 78 ++++++++++++++ zarr/core.py | 210 ++++++++++++++++++++++++++++++++++-- zarr/indexing.py | 88 +++++++++++++++ zarr/tests/test_core.py | 9 +- zarr/tests/test_indexing.py | 180 ++++++++++++++++++++++++++++++- 8 files changed, 558 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 28e5544286..4f0d523785 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ # Distribution / packaging .Python env/ +.venv/ build/ develop-eggs/ dist/ diff --git a/docs/api/core.rst b/docs/api/core.rst index 5789fb996b..c4075fdb30 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -10,6 +10,8 @@ The Array class (``zarr.core``) .. automethod:: set_basic_selection .. automethod:: get_mask_selection .. automethod:: set_mask_selection + .. automethod:: get_block_selection + .. automethod:: set_block_selection .. automethod:: get_coordinate_selection .. automethod:: set_coordinate_selection .. automethod:: get_orthogonal_selection diff --git a/docs/release.rst b/docs/release.rst index 156ba7229c..e8d1c440d1 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -23,6 +23,9 @@ Bug fixes * Add ``__contains__`` method to ``KVStore``. By :user:`Christoph Gohlke ` :issue:`1454`. + * **Block Indexing**: Implemented blockwise (chunk blocks) indexing to ``zarr.Array``. + By :user:`Altay Sansal ` :issue:`1428` + .. _release_2.15.0: 2.15.0 diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 0f2e1c7345..e3155acfae 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -641,6 +641,84 @@ orthogonal indexing is also available directly on the array: >>> all(z.oindex[[0, 2], :] == z[[0, 2], :]) True +Block Indexing +~~~~~~~~~~~~~~ + +As of version 2.16.0, Zarr also support block indexing, which allows +selections of whole chunks based on their logical indices along each dimension +of an array. For example, this allows selecting a subset of chunk aligned rows and/or +columns from a 2-dimensional array. E.g.:: + + >>> import zarr + >>> import numpy as np + >>> z = zarr.array(np.arange(100).reshape(10, 10), chunks=(3, 3)) + +Retrieve items by specifying their block coordinates:: + + >>> z.get_block_selection(1) + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + +Equivalent slicing:: + + >>> z[3:6] + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + + +For convenience, the block selection functionality is also available via the +`blocks` property, e.g.:: + + >>> z.blocks[1] + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + +Block index arrays may be multidimensional to index multidimensional arrays. +For example:: + + >>> z.blocks[0, 1:3] + array([[ 3, 4, 5, 6, 7, 8], + [13, 14, 15, 16, 17, 18], + [23, 24, 25, 26, 27, 28]]) + +Data can also be modified. Let's start by a simple 2D array:: + + >>> import zarr + >>> import numpy as np + >>> z = zarr.zeros((6, 6), dtype=int, chunks=2) + +Set data for a selection of items:: + + >>> z.set_block_selection((1, 0), 1) + >>> z[...] + array([[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]]) + +For convenience, this functionality is also available via the ``blocks`` property. +E.g.:: + + >>> z.blocks[:, 2] = 7 + >>> z[...] + array([[0, 0, 0, 0, 7, 7], + [0, 0, 0, 0, 7, 7], + [1, 1, 0, 0, 7, 7], + [1, 1, 0, 0, 7, 7], + [0, 0, 0, 0, 7, 7], + [0, 0, 0, 0, 7, 7]]) + +Any combination of integer and slice can be used for block indexing:: + + >>> z.blocks[2, 1:3] + array([[0, 0, 7, 7], + [0, 0, 7, 7]]) + Indexing fields in structured arrays ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/zarr/core.py b/zarr/core.py index 5537733b4b..80f424bafc 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -22,6 +22,8 @@ OIndex, OrthogonalIndexer, VIndex, + BlockIndex, + BlockIndexer, PartialChunkIterator, check_fields, check_no_multi_fields, @@ -139,6 +141,7 @@ class Array: info vindex oindex + blocks write_empty_chunks meta_array @@ -154,6 +157,8 @@ class Array: set_mask_selection get_coordinate_selection set_coordinate_selection + get_block_selection + set_block_selection digest hexdigest resize @@ -230,6 +235,7 @@ def __init__( # initialize indexing helpers self._oindex = OIndex(self) self._vindex = VIndex(self) + self._blocks = BlockIndex(self) def _load_metadata(self): """(Re)load metadata from store.""" @@ -577,6 +583,12 @@ def vindex(self): :func:`set_mask_selection` for documentation and examples.""" return self._vindex + @property + def blocks(self): + """Shortcut for blocked chunked indexing, see :func:`get_block_selection` and + :func:`set_block_selection` for documentation and examples.""" + return self._blocks + @property def write_empty_chunks(self) -> bool: """A Boolean, True if chunks composed of the array's fill value @@ -814,7 +826,8 @@ def __getitem__(self, selection): -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, vindex, oindex, __setitem__ + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __setitem__ """ fields, pure_selection = pop_fields(selection) @@ -933,7 +946,8 @@ def get_basic_selection(self, selection=Ellipsis, out=None, fields=None): -------- set_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, vindex, oindex, __getitem__, __setitem__ + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ """ @@ -1089,7 +1103,8 @@ def get_orthogonal_selection(self, selection, out=None, fields=None): -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, set_orthogonal_selection, - vindex, oindex, __getitem__, __setitem__ + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ """ @@ -1160,7 +1175,8 @@ def get_coordinate_selection(self, selection, out=None, fields=None): -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_orthogonal_selection, set_orthogonal_selection, set_coordinate_selection, - vindex, oindex, __getitem__, __setitem__ + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ """ @@ -1185,6 +1201,90 @@ def get_coordinate_selection(self, selection, out=None, fields=None): return out + def get_block_selection(self, selection, out=None, fields=None): + """Retrieve a selection of individual chunk blocks, by providing the indices + (coordinates) for each chunk block. + + Parameters + ---------- + selection : tuple + An integer (coordinate) or slice for each dimension of the array. + out : ndarray, optional + If given, load the selected data directly into this array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + + Returns + ------- + out : ndarray + A NumPy array containing the data for the requested selection. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> z = zarr.array(np.arange(100).reshape(10, 10), chunks=(3, 3)) + + Retrieve items by specifying their block coordinates:: + + >>> z.get_block_selection((1, slice(None))) + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + + Which is equivalent to:: + + >>> z[3:6, :] + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + + For convenience, the block selection functionality is also available via the + `blocks` property, e.g.:: + + >>> z.blocks[1] + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + + Notes + ----- + Block indexing is a convenience indexing method to work on individual chunks + with chunk index slicing. It has the same concept as Dask's `Array.blocks` + indexing. + + Slices are supported. However, only with a step size of one. + + Block index arrays may be multidimensional to index multidimensional arrays. + For example:: + + >>> z.blocks[0, 1:3] + array([[ 3, 4, 5, 6, 7, 8], + [13, 14, 15, 16, 17, 18], + [23, 24, 25, 26, 27, 28]]) + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + set_coordinate_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + if not self._cache_metadata: + self._load_metadata() + + # check args + check_fields(fields, self._dtype) + + # setup indexer + indexer = BlockIndexer(selection, self) + + return self._get_selection(indexer=indexer, out=out, fields=fields) + def get_mask_selection(self, selection, out=None, fields=None): """Retrieve a selection of individual items, by providing a Boolean array of the same shape as the array against which the selection is being made, where True @@ -1238,8 +1338,8 @@ def get_mask_selection(self, selection, out=None, fields=None): -------- get_basic_selection, set_basic_selection, set_mask_selection, get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, vindex, oindex, __getitem__, __setitem__ - + set_coordinate_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ """ # refresh metadata @@ -1376,7 +1476,8 @@ def __setitem__(self, selection, value): -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, vindex, oindex, __getitem__ + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__ """ fields, pure_selection = pop_fields(selection) @@ -1464,7 +1565,8 @@ def set_basic_selection(self, selection, value, fields=None): -------- get_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - set_orthogonal_selection, vindex, oindex, __getitem__, __setitem__ + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ """ @@ -1555,7 +1657,8 @@ def set_orthogonal_selection(self, selection, value, fields=None): -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, - vindex, oindex, __getitem__, __setitem__ + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ """ @@ -1627,7 +1730,8 @@ def set_coordinate_selection(self, selection, value, fields=None): -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - vindex, oindex, __getitem__, __setitem__ + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ """ @@ -1654,6 +1758,89 @@ def set_coordinate_selection(self, selection, value, fields=None): self._set_selection(indexer, value, fields=fields) + def set_block_selection(self, selection, value, fields=None): + """Modify a selection of individual blocks, by providing the chunk indices + (coordinates) for each block to be modified. + + Parameters + ---------- + selection : tuple + An integer (coordinate) or slice for each dimension of the array. + value : scalar or array-like + Value to be stored into the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + + Examples + -------- + Set up a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> z = zarr.zeros((6, 6), dtype=int, chunks=2) + + Set data for a selection of items:: + + >>> z.set_block_selection((1, 0), 1) + >>> z[...] + array([[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]]) + + For convenience, this functionality is also available via the `blocks` property. + E.g.:: + + >>> z.blocks[2, 1] = 4 + >>> z[...] + array([[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [0, 0, 4, 4, 0, 0], + [0, 0, 4, 4, 0, 0]]) + + >>> z.blocks[:, 2] = 7 + >>> z[...] + array([[0, 0, 0, 0, 7, 7], + [0, 0, 0, 0, 7, 7], + [1, 1, 0, 0, 7, 7], + [1, 1, 0, 0, 7, 7], + [0, 0, 4, 4, 7, 7], + [0, 0, 4, 4, 7, 7]]) + + Notes + ----- + Block indexing is a convenience indexing method to work on individual chunks + with chunk index slicing. It has the same concept as Dask's `Array.blocks` + indexing. + + Slices are supported. However, only with a step size of one. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + # guard conditions + if self._read_only: + raise ReadOnlyError() + + # refresh metadata + if not self._cache_metadata: + self._load_metadata_nosync() + + # setup indexer + indexer = BlockIndexer(selection, self) + + self._set_selection(indexer, value, fields=fields) + def set_mask_selection(self, selection, value, fields=None): """Modify a selection of individual items, by providing a Boolean array of the same shape as the array against which the selection is being made, where True @@ -1712,7 +1899,8 @@ def set_mask_selection(self, selection, value, fields=None): -------- get_basic_selection, set_basic_selection, get_mask_selection, get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, - set_coordinate_selection, vindex, oindex, __getitem__, __setitem__ + set_coordinate_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ """ diff --git a/zarr/indexing.py b/zarr/indexing.py index 3fb3e2f204..bc2afba992 100644 --- a/zarr/indexing.py +++ b/zarr/indexing.py @@ -695,6 +695,94 @@ def __setitem__(self, selection, value): return self.array.set_orthogonal_selection(selection, value, fields=fields) +# noinspection PyProtectedMember +class BlockIndexer: + + def __init__(self, selection, array): + + # handle ellipsis + selection = replace_ellipsis(selection, array._shape) + + # normalize list to array + selection = replace_lists(selection) + + # setup per-dimension indexers + dim_indexers = [] + for dim_sel, dim_len, dim_chunk_size in \ + zip(selection, array._shape, array._chunks): + dim_numchunks = int(np.ceil(dim_len / dim_chunk_size)) + + if is_integer(dim_sel): + if dim_sel < 0: + dim_sel = dim_numchunks + dim_sel + + start = dim_sel * dim_chunk_size + stop = start + dim_chunk_size + slice_ = slice(start, stop) + + elif is_slice(dim_sel): + start = dim_sel.start if dim_sel.start is not None else 0 + stop = dim_sel.stop if dim_sel.stop is not None else dim_numchunks + + if dim_sel.step not in {1, None}: + raise IndexError('unsupported selection item for block indexing; ' + 'expected integer or slice with step=1, got {!r}' + .format(type(dim_sel))) + + # Can't reuse wraparound_indices because it expects a numpy array + # We have integers here. + if start < 0: + start = dim_numchunks + start + if stop < 0: + stop = dim_numchunks + stop + + start = start * dim_chunk_size + stop = stop * dim_chunk_size + slice_ = slice(start, stop) + + else: + raise IndexError('unsupported selection item for block indexing; ' + 'expected integer or slice, got {!r}' + .format(type(dim_sel))) + + dim_indexer = SliceDimIndexer(slice_, dim_len, dim_chunk_size) + dim_indexers.append(dim_indexer) + + if start >= dim_len or start < 0: + raise BoundsCheckError(dim_len) + + self.dim_indexers = dim_indexers + self.shape = tuple(s.nitems for s in self.dim_indexers) + self.drop_axes = None + + def __iter__(self): + for dim_projections in itertools.product(*self.dim_indexers): + chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) + chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) + out_selection = tuple(p.dim_out_sel for p in dim_projections + if p.dim_out_sel is not None) + + yield ChunkProjection(chunk_coords, chunk_selection, out_selection) + + +class BlockIndex: + + def __init__(self, array): + self.array = array + + def __getitem__(self, selection): + fields, selection = pop_fields(selection) + selection = ensure_tuple(selection) + selection = replace_lists(selection) + return self.array.get_block_selection(selection, fields=fields) + + def __setitem__(self, selection, value): + fields, selection = pop_fields(selection) + selection = ensure_tuple(selection) + selection = replace_lists(selection) + return self.array.set_block_selection(selection, value, fields=fields) + + # noinspection PyProtectedMember def is_coordinate_selection(selection, array): return ( diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 1541943d22..ab1a6e8aa7 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -338,6 +338,8 @@ def test_array_1d_selections(self): assert_array_equal(a[bix], z.get_mask_selection(bix)) assert_array_equal(a[bix], z.oindex[bix]) assert_array_equal(a[bix], z.vindex[bix]) + assert_array_equal(a[200:400], z.get_block_selection(slice(2, 4))) + assert_array_equal(a[200:400], z.blocks[2:4]) # set z.set_orthogonal_selection(slice(50, 150), 1) @@ -358,7 +360,10 @@ def test_array_1d_selections(self): assert_array_equal(8, z.vindex[bix]) z.oindex[bix] = 9 assert_array_equal(9, z.oindex[bix]) - + z.set_block_selection(slice(2, 4), 10) + assert_array_equal(10, z[200:400]) + z.blocks[2:4] = 11 + assert_array_equal(11, z[200:400]) z.store.close() # noinspection PyStatementEffect @@ -810,6 +815,8 @@ def test_read_only(self): z.set_coordinate_selection([0, 1, 2], 42) with pytest.raises(PermissionError): z.vindex[[0, 1, 2]] = 42 + with pytest.raises(PermissionError): + z.blocks[...] = 42 with pytest.raises(PermissionError): z.set_mask_selection(np.ones(z.shape, dtype=bool), 42) diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py index f5f57be010..61e76c63da 100644 --- a/zarr/tests/test_indexing.py +++ b/zarr/tests/test_indexing.py @@ -1096,7 +1096,6 @@ def _test_set_coordinate_selection(v, a, z, selection): def test_set_coordinate_selection_1d(): - # setup v = np.arange(1050, dtype=int) a = np.empty(v.shape, dtype=v.dtype) @@ -1154,6 +1153,185 @@ def test_set_coordinate_selection_2d(): _test_set_coordinate_selection(v, a, z, (ix0, ix1)) +def _test_get_block_selection(a, z, selection, expected_idx): + expect = a[expected_idx] + actual = z.get_block_selection(selection) + assert_array_equal(expect, actual) + actual = z.blocks[selection] + assert_array_equal(expect, actual) + + +block_selections_1d = [ + # test single item + 0, + 5, + # test wraparound + -1, + -4, + # test slice + slice(5), + slice(None, 3), + slice(5, 6), + slice(-3, -1), + slice(None), # Full slice +] + +block_selections_1d_array_projection = [ + # test single item + slice(100), + slice(500, 600), + # test wraparound + slice(1000, None), + slice(700, 800), + # test slice + slice(500), + slice(None, 300), + slice(500, 600), + slice(800, 1000), + slice(None), +] + +block_selections_1d_bad = [ + # slice not supported + slice(3, 8, 2), + # bad stuff + 2.3, + 'foo', + b'xxx', + None, + (0, 0), + (slice(None), slice(None)), + [0, 5, 3] +] + + +def test_get_block_selection_1d(): + # setup + a = np.arange(1050, dtype=int) + z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) + z[:] = a + + for selection, expected_idx in \ + zip(block_selections_1d, block_selections_1d_array_projection): + _test_get_block_selection(a, z, selection, expected_idx) + + bad_selections = block_selections_1d_bad + [ + z.nchunks + 1, # out of bounds + -(z.nchunks + 1), # out of bounds + ] + + for selection in bad_selections: + with pytest.raises(IndexError): + z.get_block_selection(selection) + with pytest.raises(IndexError): + z.blocks[selection] + + +block_selections_2d = [ + # test single item + (0, 0), + (1, 2), + # test wraparound + (-1, -1), + (-3, -2), + # test slice + (slice(1), slice(2)), + (slice(None, 2), slice(-2, -1)), + (slice(2, 3), slice(-2, None)), + (slice(-3, -1), slice(-3, -2)), + (slice(None), slice(None)), # Full slice +] + +block_selections_2d_array_projection = [ + # test single item + (slice(300), slice(3)), + (slice(300, 600), slice(6, 9)), + # test wraparound + (slice(900, None), slice(9, None)), + (slice(300, 600), slice(6, 9)), + # test slice + (slice(300), slice(6)), + (slice(None, 600), slice(6, 9)), + (slice(600, 900), slice(6, None)), + (slice(300, 900), slice(3, 6)), + (slice(None), slice(None)), # Full slice +] + + +def test_get_block_selection_2d(): + # setup + a = np.arange(10000, dtype=int).reshape(1000, 10) + z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) + z[:] = a + + for selection, expected_idx in \ + zip(block_selections_2d, block_selections_2d_array_projection): + _test_get_block_selection(a, z, selection, expected_idx) + + with pytest.raises(IndexError): + selection = slice(5, 15), [1, 2, 3] + z.get_block_selection(selection) + with pytest.raises(IndexError): + selection = Ellipsis, [1, 2, 3] + z.get_block_selection(selection) + with pytest.raises(IndexError): # out of bounds + selection = slice(15, 20), slice(None) + z.get_block_selection(selection) + + +def _test_set_block_selection(v: np.ndarray, a: np.ndarray, z: zarr.Array, selection, expected_idx): + for value in 42, v[expected_idx], v[expected_idx].tolist(): + # setup expectation + a[:] = 0 + a[expected_idx] = value + # test long-form API + z[:] = 0 + z.set_block_selection(selection, value) + assert_array_equal(a, z[:]) + # test short-form API + z[:] = 0 + z.blocks[selection] = value + assert_array_equal(a, z[:]) + + +def test_set_block_selection_1d(): + # setup + v = np.arange(1050, dtype=int) + a = np.empty(v.shape, dtype=v.dtype) + z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) + + for selection, expected_idx in \ + zip(block_selections_1d, block_selections_1d_array_projection): + _test_set_block_selection(v, a, z, selection, expected_idx) + + for selection in block_selections_1d_bad: + with pytest.raises(IndexError): + z.set_block_selection(selection, 42) + with pytest.raises(IndexError): + z.blocks[selection] = 42 + + +def test_set_block_selection_2d(): + # setup + v = np.arange(10000, dtype=int).reshape(1000, 10) + a = np.empty(v.shape, dtype=v.dtype) + z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) + + for selection, expected_idx in \ + zip(block_selections_2d, block_selections_2d_array_projection): + _test_set_block_selection(v, a, z, selection, expected_idx) + + with pytest.raises(IndexError): + selection = slice(5, 15), [1, 2, 3] + z.set_block_selection(selection, 42) + with pytest.raises(IndexError): + selection = Ellipsis, [1, 2, 3] + z.set_block_selection(selection, 42) + with pytest.raises(IndexError): # out of bounds + selection = slice(15, 20), slice(None) + z.set_block_selection(selection, 42) + + def _test_get_mask_selection(a, z, selection): expect = a[selection] actual = z.get_mask_selection(selection) From aa5db9f7ea688c392bd2036fd3f64516059a695b Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 10 Jul 2023 15:51:10 -0700 Subject: [PATCH 107/213] [V3] Allow for incomplete codec metadata using numcodecs.get_codec (#1447) * refactor(v3): Allow for incomplete codec metadata using numcodecs.get_codec * add test * lint * add release note --------- Co-authored-by: Ryan Abernathey --- docs/release.rst | 10 +++++++--- zarr/meta.py | 20 ++++++++------------ zarr/tests/test_meta.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index e8d1c440d1..46bd1f025d 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,10 +18,14 @@ Release notes Unreleased ---------- -Bug fixes -~~~~~~~~~ +Enhancements +~~~~~~~~~~~~ + +* Allow for partial codec specification in V3 array metadata. + By :user:`Joe Hamman ` :issue:`1443`. -* Add ``__contains__`` method to ``KVStore``. By :user:`Christoph Gohlke ` :issue:`1454`. +* Add ``__contains__`` method to ``KVStore``. + By :user:`Christoph Gohlke ` :issue:`1454`. * **Block Indexing**: Implemented blockwise (chunk blocks) indexing to ``zarr.Array``. By :user:`Altay Sansal ` :issue:`1428` diff --git a/zarr/meta.py b/zarr/meta.py index 59c56abf3d..aacffd7f77 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -441,26 +441,22 @@ def _decode_codec_metadata(cls, meta: Optional[Mapping]) -> Optional[Codec]: uri = 'https://purl.org/zarr/spec/codec/' conf = meta['configuration'] if meta['codec'].startswith(uri + 'gzip/'): - codec = numcodecs.GZip(level=conf['level']) + conf["id"] = "gzip" elif meta['codec'].startswith(uri + 'zlib/'): - codec = numcodecs.Zlib(level=conf['level']) + conf["id"] = "zlib" elif meta['codec'].startswith(uri + 'blosc/'): - codec = numcodecs.Blosc(clevel=conf['clevel'], - shuffle=conf['shuffle'], - blocksize=conf['blocksize'], - cname=conf['cname']) + conf["id"] = "blosc" elif meta['codec'].startswith(uri + 'bz2/'): - codec = numcodecs.BZ2(level=conf['level']) + conf["id"] = "bz2" elif meta['codec'].startswith(uri + 'lz4/'): - codec = numcodecs.LZ4(acceleration=conf['acceleration']) + conf["id"] = "lz4" elif meta['codec'].startswith(uri + 'lzma/'): - codec = numcodecs.LZMA(format=conf['format'], - check=conf['check'], - preset=conf['preset'], - filters=conf['filters']) + conf["id"] = "lzma" else: raise NotImplementedError + codec = numcodecs.get_codec(conf) + return codec @classmethod diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index 8acd634a13..a78375986e 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -314,6 +314,34 @@ def test_encode_decode_array_dtype_shape_v3(): assert 'filters' not in meta_dec +@pytest.mark.parametrize("comp_id", ["gzip", "zlib", "blosc", "bz2", "lz4", "lzma"]) +def test_decode_metadata_implicit_compressor_config_v3(comp_id): + meta = { + "attributes": {}, + "chunk_grid": { + "chunk_shape": [10], + "separator": "/", + "type": "regular" + }, + "chunk_memory_layout": "C", + "compressor": { + "codec": f"https://purl.org/zarr/spec/codec/{comp_id}/1.0", + "configuration": { + # intentionally left empty + } + }, + "data_type": " Date: Wed, 12 Jul 2023 09:54:21 -0400 Subject: [PATCH 108/213] style: add ruff and black to pre-commit --- .pre-commit-config.yaml | 19 +++++++++++-------- pyproject.toml | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 583a2b0184..55e0fc617a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,15 +5,18 @@ default_stages: [commit, push] default_language_version: python: python3 repos: - - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + - repo: https://github.com/charliermarsh/ruff-pre-commit + # Ruff version. + rev: 'v0.0.224' hooks: - - id: flake8 - args: [ - --max-line-length=100 - ] - exclude: ^(venv/|docs/) - types: ['python'] + - id: ruff + # Respect `exclude` and `extend-exclude` settings. + args: ["--force-exclude"] + - repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + language_version: python3.8 - repo: https://github.com/codespell-project/codespell rev: v2.2.5 hooks: diff --git a/pyproject.toml b/pyproject.toml index 4beb357bb0..4b293b90e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,42 @@ version_scheme = "guess-next-dev" local_scheme = "dirty-tag" write_to = "zarr/version.py" +[tool.ruff] +line-length = 100 +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".mypy_cache", + ".nox", + ".pants.d", + ".ruff_cache", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "venv", + "docs" +] + +[tool.black] +line-length = 100 +exclude = ''' +/( + \.git + | \.mypy_cache + | \.venv + | _build + | buck-out + | build + | dist + | docs +)/ +''' + [tool.mypy] python_version = "3.8" ignore_missing_imports = true From 94cdd1ab492416783a525b5f2d119be59537ab4a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 12 Jul 2023 22:15:34 -0400 Subject: [PATCH 109/213] style: tweak codespell config to avoid a false positive --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 55e0fc617a..c46115342d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: rev: v2.2.5 hooks: - id: codespell - args: ["-L", "ba,ihs,kake,nd,noe,nwo,te,fo", "-S", "fixture"] + args: ["-L", "ba,ihs,kake,nd,noe,nwo,te,fo,zar", "-S", "fixture"] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: From 4e348d6b80c96da461fd866576c971b8a659ba15 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 12 Jul 2023 22:16:05 -0400 Subject: [PATCH 110/213] style: lint the codebase --- bench/compress_normal.py | 25 +- docs/conf.py | 197 ++- zarr/__init__.py | 69 +- zarr/_storage/absstore.py | 87 +- zarr/_storage/store.py | 110 +- zarr/_storage/v3.py | 197 ++- zarr/_storage/v3_storage_transformers.py | 75 +- zarr/attrs.py | 31 +- zarr/context.py | 4 +- zarr/convenience.py | 338 +++-- zarr/core.py | 311 ++-- zarr/creation.py | 266 ++-- zarr/errors.py | 5 +- zarr/hierarchy.py | 487 ++++--- zarr/indexing.py | 230 ++- zarr/meta.py | 59 +- zarr/meta_v1.py | 40 +- zarr/n5.py | 345 +++-- zarr/storage.py | 661 +++++---- zarr/tests/test_attrs.py | 277 ++-- zarr/tests/test_convenience.py | 664 +++++---- zarr/tests/test_core.py | 1702 ++++++++++++---------- zarr/tests/test_creation.py | 287 ++-- zarr/tests/test_dim_separator.py | 41 +- zarr/tests/test_filters.py | 57 +- zarr/tests/test_hierarchy.py | 1142 ++++++++------- zarr/tests/test_indexing.py | 425 +++--- zarr/tests/test_info.py | 53 +- zarr/tests/test_meta.py | 333 +++-- zarr/tests/test_meta_array.py | 2 +- zarr/tests/test_n5.py | 15 +- zarr/tests/test_storage.py | 1639 ++++++++++----------- zarr/tests/test_storage_v3.py | 379 ++--- zarr/tests/test_sync.py | 184 ++- zarr/tests/test_util.py | 116 +- zarr/tests/util.py | 24 +- zarr/util.py | 269 ++-- 37 files changed, 5856 insertions(+), 5290 deletions(-) diff --git a/bench/compress_normal.py b/bench/compress_normal.py index ce0a05b9ec..9f1655541c 100644 --- a/bench/compress_normal.py +++ b/bench/compress_normal.py @@ -9,36 +9,39 @@ if __name__ == "__main__": - sys.path.insert(0, '..') + sys.path.insert(0, "..") # setup - a = np.random.normal(2000, 1000, size=200000000).astype('u2') - z = zarr.empty_like(a, chunks=1000000, - compression='blosc', - compression_opts=dict(cname='lz4', clevel=5, shuffle=2)) + a = np.random.normal(2000, 1000, size=200000000).astype("u2") + z = zarr.empty_like( + a, + chunks=1000000, + compression="blosc", + compression_opts=dict(cname="lz4", clevel=5, shuffle=2), + ) print(z) - print('*' * 79) + print("*" * 79) # time - t = timeit.repeat('z[:] = a', repeat=10, number=1, globals=globals()) + t = timeit.repeat("z[:] = a", repeat=10, number=1, globals=globals()) print(t) print(min(t)) print(z) # profile profile = line_profiler.LineProfiler(blosc.compress) - profile.run('z[:] = a') + profile.run("z[:] = a") profile.print_stats() - print('*' * 79) + print("*" * 79) # time - t = timeit.repeat('z[:]', repeat=10, number=1, globals=globals()) + t = timeit.repeat("z[:]", repeat=10, number=1, globals=globals()) print(t) print(min(t)) # profile profile = line_profiler.LineProfiler(blosc.decompress) - profile.run('z[:]') + profile.run("z[:]") profile.print_stats() diff --git a/docs/conf.py b/docs/conf.py index 413d648732..f85ecb7454 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,50 +26,50 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.append(os.path.abspath('..')) +sys.path.append(os.path.abspath("..")) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.viewcode', - 'sphinx.ext.intersphinx', - 'numpydoc', - 'sphinx_issues', + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "numpydoc", + "sphinx_issues", "sphinx_copybutton", - "sphinx_design" + "sphinx_design", ] numpydoc_show_class_members = False numpydoc_class_members_toctree = False -issues_github_path = 'zarr-developers/zarr-python' +issues_github_path = "zarr-developers/zarr-python" # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The main toctree document. -main_doc = 'index' +main_doc = "index" # General information about the project. -project = 'zarr' -copyright = '2022, Zarr Developers' -author = 'Zarr Developers' +project = "zarr" +copyright = "2022, Zarr Developers" +author = "Zarr Developers" version = zarr.__version__ # The full version, including alpha/beta/rc tags. @@ -80,42 +80,42 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = 'en' +language = "en" # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'talks'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "talks"] # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -125,181 +125,174 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'pydata_sphinx_theme' +html_theme = "pydata_sphinx_theme" -html_favicon = '_static/logo1.png' +html_favicon = "_static/logo1.png" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { - "github_url": "https://github.com/zarr-developers/zarr-python", - "twitter_url": "https://twitter.com/zarr_dev", - "icon_links": [ - { - "name": "Zarr Dev", - "url": "https://zarr.dev/", - "icon": "_static/logo1.png", - "type": "local" - }, - ], - "collapse_navigation": True + "github_url": "https://github.com/zarr-developers/zarr-python", + "twitter_url": "https://twitter.com/zarr_dev", + "icon_links": [ + { + "name": "Zarr Dev", + "url": "https://zarr.dev/", + "icon": "_static/logo1.png", + "type": "local", + }, + ], + "collapse_navigation": True, } # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. # " v documentation" by default. -#html_title = 'zarr v@@' +# html_title = 'zarr v@@' # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = '_static/logo1.png' +html_logo = "_static/logo1.png" # Add custom css def setup(app): - app.add_css_file('custom.css') + app.add_css_file("custom.css") # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] html_js_files = [ - 'custom.js', + "custom.js", ] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. -#html_extra_path = [] +# html_extra_path = [] # If not None, a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. # The empty string is equivalent to '%b %d, %Y'. -#html_last_updated_fmt = None +# html_last_updated_fmt = None # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' -#html_search_language = 'en' +# html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # 'ja' uses this config value. # 'zh' user can custom change `jieba` dictionary path. -#html_search_options = {'type': 'default'} +# html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' +# html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'zarrdoc' +htmlhelp_basename = "zarrdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', + # Latex figure (float) alignment + #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (main_doc, 'zarr.tex', 'Zarr-Python', - author, 'manual'), + (main_doc, "zarr.tex", "Zarr-Python", author, "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (main_doc, 'zarr', 'Zarr-Python', - [author], 1) -] +man_pages = [(main_doc, "zarr", "Zarr-Python", [author], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -308,30 +301,36 @@ def setup(app): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (main_doc, 'zarr', 'Zarr-Python', - author, 'zarr', 'One line description of project.', - 'Miscellaneous'), + ( + main_doc, + "zarr", + "Zarr-Python", + author, + "zarr", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False # Example configuration for intersphinx: refer to the Python standard library. # use in refs e.g: # :ref:`comparison manual ` intersphinx_mapping = { - 'python': ('https://docs.python.org/', None), - 'numpy': ('https://numpy.org/doc/stable/', None), + "python": ("https://docs.python.org/", None), + "numpy": ("https://numpy.org/doc/stable/", None), } diff --git a/zarr/__init__.py b/zarr/__init__.py index 4d2c992dbf..6cecb40af8 100644 --- a/zarr/__init__.py +++ b/zarr/__init__.py @@ -1,20 +1,53 @@ # flake8: noqa from zarr.codecs import * -from zarr.convenience import (consolidate_metadata, copy, copy_all, copy_store, - load, open, open_consolidated, save, save_array, - save_group, tree) +from zarr.convenience import ( + consolidate_metadata, + copy, + copy_all, + copy_store, + load, + open, + open_consolidated, + save, + save_array, + save_group, + tree, +) from zarr.core import Array -from zarr.creation import (array, create, empty, empty_like, full, full_like, - ones, ones_like, open_array, open_like, zeros, - zeros_like) +from zarr.creation import ( + array, + create, + empty, + empty_like, + full, + full_like, + ones, + ones_like, + open_array, + open_like, + zeros, + zeros_like, +) from zarr.errors import CopyError, MetadataError from zarr.hierarchy import Group, group, open_group from zarr.n5 import N5Store, N5FSStore from zarr._storage.store import v3_api_available -from zarr.storage import (ABSStore, DBMStore, DictStore, DirectoryStore, - KVStore, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, - NestedDirectoryStore, RedisStore, SQLiteStore, - TempStore, ZipStore) +from zarr.storage import ( + ABSStore, + DBMStore, + DictStore, + DirectoryStore, + KVStore, + LMDBStore, + LRUStoreCache, + MemoryStore, + MongoDBStore, + NestedDirectoryStore, + RedisStore, + SQLiteStore, + TempStore, + ZipStore, +) from zarr.sync import ProcessSynchronizer, ThreadSynchronizer from zarr.version import version as __version__ @@ -22,6 +55,16 @@ assert not __version__.startswith("0.0.0") if v3_api_available: - from zarr._storage.v3 import (ABSStoreV3, DBMStoreV3, KVStoreV3, DirectoryStoreV3, - LMDBStoreV3, LRUStoreCacheV3, MemoryStoreV3, MongoDBStoreV3, - RedisStoreV3, SQLiteStoreV3, ZipStoreV3) + from zarr._storage.v3 import ( + ABSStoreV3, + DBMStoreV3, + KVStoreV3, + DirectoryStoreV3, + LMDBStoreV3, + LRUStoreCacheV3, + MemoryStoreV3, + MongoDBStoreV3, + RedisStoreV3, + SQLiteStoreV3, + ZipStoreV3, + ) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index cc41018f9e..f62529f096 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -6,7 +6,7 @@ from zarr._storage.store import _get_metadata_suffix, data_root, meta_root, Store, StoreV3 __doctest_requires__ = { - ('ABSStore', 'ABSStore.*'): ['azure.storage.blob'], + ("ABSStore", "ABSStore.*"): ["azure.storage.blob"], } @@ -58,12 +58,18 @@ class ABSStore(Store): ----- In order to use this store, you must install the Microsoft Azure Storage SDK for Python, ``azure-storage-blob>=12.5.0``. - """ - - def __init__(self, container=None, prefix='', account_name=None, account_key=None, - blob_service_kwargs=None, dimension_separator=None, - client=None, - ): + """ # noqa: E501 + + def __init__( + self, + container=None, + prefix="", + account_name=None, + account_key=None, + blob_service_kwargs=None, + dimension_separator=None, + client=None, + ): self._dimension_separator = dimension_separator self.prefix = normalize_storage_path(prefix) if client is None: @@ -75,11 +81,14 @@ def __init__(self, container=None, prefix='', account_name=None, account_key=Non ) warnings.warn(msg, FutureWarning, stacklevel=2) from azure.storage.blob import ContainerClient + blob_service_kwargs = blob_service_kwargs or {} client = ContainerClient( - "https://{}.blob.core.windows.net/".format(account_name), container, - credential=account_key, **blob_service_kwargs - ) + "https://{}.blob.core.windows.net/".format(account_name), + container, + credential=account_key, + **blob_service_kwargs + ) self.client = client self._container = container @@ -88,8 +97,10 @@ def __init__(self, container=None, prefix='', account_name=None, account_key=Non @staticmethod def _warn_deprecated(property_): - msg = ("The {} property is deprecated and will be removed in a future " - "version. Get the property from 'ABSStore.client' instead.") + msg = ( + "The {} property is deprecated and will be removed in a future " + "version. Get the property from 'ABSStore.client' instead." + ) warnings.warn(msg.format(property_), FutureWarning, stacklevel=3) @property @@ -108,10 +119,10 @@ def account_key(self): return self._account_key def _append_path_to_prefix(self, path): - if self.prefix == '': + if self.prefix == "": return normalize_storage_path(path) else: - return '/'.join([self.prefix, normalize_storage_path(path)]) + return "/".join([self.prefix, normalize_storage_path(path)]) @staticmethod def _strip_prefix_from_path(path, prefix): @@ -119,17 +130,18 @@ def _strip_prefix_from_path(path, prefix): path_norm = normalize_storage_path(path) prefix_norm = normalize_storage_path(prefix) if prefix: - return path_norm[(len(prefix_norm)+1):] + return path_norm[(len(prefix_norm) + 1) :] else: return path_norm def __getitem__(self, key): from azure.core.exceptions import ResourceNotFoundError + blob_name = self._append_path_to_prefix(key) try: return self.client.download_blob(blob_name).readall() except ResourceNotFoundError: - raise KeyError('Blob %s not found' % blob_name) + raise KeyError("Blob %s not found" % blob_name) def __setitem__(self, key, value): value = ensure_bytes(value) @@ -138,16 +150,17 @@ def __setitem__(self, key, value): def __delitem__(self, key): from azure.core.exceptions import ResourceNotFoundError + try: self.client.delete_blob(self._append_path_to_prefix(key)) except ResourceNotFoundError: - raise KeyError('Blob %s not found' % key) + raise KeyError("Blob %s not found" % key) def __eq__(self, other): return ( - isinstance(other, ABSStore) and - self.client == other.client and - self.prefix == other.prefix + isinstance(other, ABSStore) + and self.client == other.client + and self.prefix == other.prefix ) def keys(self): @@ -155,7 +168,7 @@ def keys(self): def __iter__(self): if self.prefix: - list_blobs_prefix = self.prefix + '/' + list_blobs_prefix = self.prefix + "/" else: list_blobs_prefix = None for blob in self.client.list_blobs(list_blobs_prefix): @@ -171,17 +184,17 @@ def __contains__(self, key): def listdir(self, path=None): dir_path = normalize_storage_path(self._append_path_to_prefix(path)) if dir_path: - dir_path += '/' + dir_path += "/" items = [ self._strip_prefix_from_path(blob.name, dir_path) - for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter='/') + for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter="/") ] return items def rmdir(self, path=None): dir_path = normalize_storage_path(self._append_path_to_prefix(path)) if dir_path: - dir_path += '/' + dir_path += "/" for blob in self.client.list_blobs(name_starts_with=dir_path): self.client.delete_blob(blob) @@ -197,11 +210,11 @@ def getsize(self, path=None): return blob_client.get_blob_properties().size else: size = 0 - if fs_path == '': + if fs_path == "": fs_path = None - elif not fs_path.endswith('/'): - fs_path += '/' - for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter='/'): + elif not fs_path.endswith("/"): + fs_path += "/" + for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter="/"): blob_client = self.client.get_blob_client(blob) if blob_client.exists(): size += blob_client.get_blob_properties().size @@ -212,15 +225,14 @@ def clear(self): class ABSStoreV3(ABSStore, StoreV3): - def list(self): return list(self.keys()) def __eq__(self, other): return ( - isinstance(other, ABSStoreV3) and - self.client == other.client and - self.prefix == other.prefix + isinstance(other, ABSStoreV3) + and self.client == other.client + and self.prefix == other.prefix ) def __setitem__(self, key, value): @@ -234,24 +246,24 @@ def rmdir(self, path=None): # If we disallow an empty path then we will need to modify # TestABSStoreV3 to have the create_store method use a prefix. - ABSStore.rmdir(self, '') + ABSStore.rmdir(self, "") return meta_dir = meta_root + path - meta_dir = meta_dir.rstrip('/') + meta_dir = meta_dir.rstrip("/") ABSStore.rmdir(self, meta_dir) # remove data folder data_dir = data_root + path - data_dir = data_dir.rstrip('/') + data_dir = data_dir.rstrip("/") ABSStore.rmdir(self, data_dir) # remove metadata files sfx = _get_metadata_suffix(self) - array_meta_file = meta_dir + '.array' + sfx + array_meta_file = meta_dir + ".array" + sfx if array_meta_file in self: del self[array_meta_file] - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx if group_meta_file in self: del self[group_meta_file] @@ -259,6 +271,7 @@ def rmdir(self, path=None): # For now, calling the generic keys-based _getsize def getsize(self, path=None): from zarr.storage import _getsize # avoid circular import + return _getsize(self, path) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 0594dc22de..8daedae48f 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -11,17 +11,17 @@ from zarr.context import Context # v2 store keys -array_meta_key = '.zarray' -group_meta_key = '.zgroup' -attrs_key = '.zattrs' +array_meta_key = ".zarray" +group_meta_key = ".zgroup" +attrs_key = ".zattrs" # v3 paths -meta_root = 'meta/root/' -data_root = 'data/root/' +meta_root = "meta/root/" +data_root = "data/root/" DEFAULT_ZARR_VERSION = 2 -v3_api_available = os.environ.get('ZARR_V3_EXPERIMENTAL_API', '0').lower() not in ['0', 'false'] +v3_api_available = os.environ.get("ZARR_V3_EXPERIMENTAL_API", "0").lower() not in ["0", "false"] def assert_zarr_v3_api_available(): @@ -229,11 +229,11 @@ def _validate_key(self, key: str): ): raise ValueError("keys starts with unexpected value: `{}`".format(key)) - if key.endswith('/'): + if key.endswith("/"): raise ValueError("keys may not end in /") def list_prefix(self, prefix): - if prefix.startswith('/'): + if prefix.startswith("/"): raise ValueError("prefix must not begin with /") # TODO: force prefix to end with /? return [k for k in self.list() if k.startswith(prefix)] @@ -294,8 +294,7 @@ def supports_efficient_get_partial_values(self): return False def get_partial_values( - self, - key_ranges: Sequence[Tuple[str, Tuple[int, Optional[int]]]] + self, key_ranges: Sequence[Tuple[str, Tuple[int, Optional[int]]]] ) -> List[Union[bytes, memoryview, bytearray]]: """Get multiple partial values. key_ranges can be an iterable of key, range pairs, @@ -306,11 +305,9 @@ def get_partial_values( from the end of the file. A key may occur multiple times with different ranges. Inserts None for missing keys into the returned list.""" - results: List[Union[bytes, memoryview, bytearray]] = ( - [None] * len(key_ranges) # type: ignore[list-item] - ) - indexed_ranges_by_key: Dict[str, List[Tuple[int, Tuple[int, Optional[int]]]]] = ( - defaultdict(list) + results: List[Union[bytes, memoryview, bytearray]] = [None] * len(key_ranges) # type: ignore[list-item] # noqa: E501 + indexed_ranges_by_key: Dict[str, List[Tuple[int, Tuple[int, Optional[int]]]]] = defaultdict( + list ) for i, (key, range_) in enumerate(key_ranges): indexed_ranges_by_key[key].append((i, range_)) @@ -323,7 +320,7 @@ def get_partial_values( if range_length is None: results[i] = value[range_from:] else: - results[i] = value[range_from:range_from + range_length] + results[i] = value[range_from : range_from + range_length] return results def supports_efficient_set_partial_values(self): @@ -356,7 +353,7 @@ def set_partial_values(self, key_start_values): if start < 0: values[key][start:] = value else: - values[key][start:start + len(value)] = value + values[key][start : start + len(value)] = value for key, value in values.items(): self[key] = value @@ -377,14 +374,13 @@ def _ensure_store(store): We'll do this conversion in a few places automatically """ from zarr._storage.v3 import KVStoreV3 # avoid circular import + if store is None: return None elif isinstance(store, StoreV3): return store elif isinstance(store, Store): - raise ValueError( - f"cannot initialize a v3 store with a v{store._store_version} store" - ) + raise ValueError(f"cannot initialize a v3 store with a v{store._store_version} store") elif isinstance(store, MutableMapping): return KVStoreV3(store) else: @@ -444,10 +440,7 @@ def get_config(self): # Override in sub-class if need special encoding of config values. # By default, assume all non-private members are configuration # parameters except for type . - return { - k: v for k, v in self.__dict__.items() - if not k.startswith('_') and k != "type" - } + return {k: v for k, v in self.__dict__.items() if not k.startswith("_") and k != "type"} @classmethod def from_config(cls, _type, config): @@ -460,18 +453,18 @@ def from_config(cls, _type, config): @property def inner_store(self) -> Union["StorageTransformer", StoreV3]: - assert self._inner_store is not None, ( - "inner_store is not initialized, first get a copy via _copy_for_array." - ) + assert ( + self._inner_store is not None + ), "inner_store is not initialized, first get a copy via _copy_for_array." return self._inner_store # The following implementations are usually fine to keep as-is: def __eq__(self, other): return ( - type(self) == type(other) and - self._inner_store == other._inner_store and - self.get_config() == other.get_config() + type(self) == type(other) + and self._inner_store == other._inner_store + and self.get_config() == other.get_config() ) def erase(self, key): @@ -561,42 +554,41 @@ def set_partial_values(self, key_start_values): def _path_to_prefix(path: Optional[str]) -> str: # assume path already normalized if path: - prefix = path + '/' + prefix = path + "/" else: - prefix = '' + prefix = "" return prefix def _get_hierarchy_metadata(store: StoreV3) -> Mapping[str, Any]: - version = getattr(store, '_store_version', 2) + version = getattr(store, "_store_version", 2) if version < 3: - raise ValueError("zarr.json hierarchy metadata not stored for " - f"zarr v{version} stores") - if 'zarr.json' not in store: + raise ValueError("zarr.json hierarchy metadata not stored for " f"zarr v{version} stores") + if "zarr.json" not in store: raise ValueError("zarr.json metadata not found in store") - return store._metadata_class.decode_hierarchy_metadata(store['zarr.json']) + return store._metadata_class.decode_hierarchy_metadata(store["zarr.json"]) def _get_metadata_suffix(store: StoreV3) -> str: - if 'zarr.json' in store: - return _get_hierarchy_metadata(store)['metadata_key_suffix'] - return '.json' + if "zarr.json" in store: + return _get_hierarchy_metadata(store)["metadata_key_suffix"] + return ".json" def _rename_metadata_v3(store: StoreV3, src_path: str, dst_path: str) -> bool: """Rename source or group metadata file associated with src_path.""" any_renamed = False sfx = _get_metadata_suffix(store) - src_path = src_path.rstrip('/') - dst_path = dst_path.rstrip('/') - _src_array_json = meta_root + src_path + '.array' + sfx + src_path = src_path.rstrip("/") + dst_path = dst_path.rstrip("/") + _src_array_json = meta_root + src_path + ".array" + sfx if _src_array_json in store: - new_key = meta_root + dst_path + '.array' + sfx + new_key = meta_root + dst_path + ".array" + sfx store[new_key] = store.pop(_src_array_json) any_renamed = True - _src_group_json = meta_root + src_path + '.group' + sfx + _src_group_json = meta_root + src_path + ".group" + sfx if _src_group_json in store: - new_key = meta_root + dst_path + '.group' + sfx + new_key = meta_root + dst_path + ".group" + sfx store[new_key] = store.pop(_src_group_json) any_renamed = True return any_renamed @@ -606,7 +598,7 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: # assume path already normalized src_prefix = _path_to_prefix(src_path) dst_prefix = _path_to_prefix(dst_path) - version = getattr(store, '_store_version', 2) + version = getattr(store, "_store_version", 2) if version == 2: for key in list(store.keys()): if key.startswith(src_prefix): @@ -618,7 +610,7 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: _src_prefix = root_prefix + src_prefix _dst_prefix = root_prefix + dst_prefix for key in store.list_prefix(_src_prefix): # type: ignore - new_key = _dst_prefix + key[len(_src_prefix):] + new_key = _dst_prefix + key[len(_src_prefix) :] store[new_key] = store.pop(key) any_renamed = True any_meta_renamed = _rename_metadata_v3(store, src_path, dst_path) # type: ignore @@ -639,20 +631,20 @@ def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: meta_dir = meta_root + path - meta_dir = meta_dir.rstrip('/') + meta_dir = meta_dir.rstrip("/") _rmdir_from_keys(store, meta_dir) # remove data folder data_dir = data_root + path - data_dir = data_dir.rstrip('/') + data_dir = data_dir.rstrip("/") _rmdir_from_keys(store, data_dir) # remove metadata files sfx = _get_metadata_suffix(store) - array_meta_file = meta_dir + '.array' + sfx + array_meta_file = meta_dir + ".array" + sfx if array_meta_file in store: store.erase(array_meta_file) # type: ignore - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx if group_meta_file in store: store.erase(group_meta_file) # type: ignore @@ -663,8 +655,8 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str children = set() for key in list(store.keys()): if key.startswith(prefix) and len(key) > len(prefix): - suffix = key[len(prefix):] - child = suffix.split('/')[0] + suffix = key[len(prefix) :] + child = suffix.split("/")[0] children.add(child) return sorted(children) @@ -675,7 +667,7 @@ def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: if prefix: key = meta_root + prefix.rstrip("/") + ".array" + sfx else: - key = meta_root[:-1] + '.array' + sfx + key = meta_root[:-1] + ".array" + sfx else: key = prefix + array_meta_key return key @@ -685,9 +677,9 @@ def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: sfx = _get_metadata_suffix(store) # type: ignore if prefix: - key = meta_root + prefix.rstrip('/') + ".group" + sfx + key = meta_root + prefix.rstrip("/") + ".group" + sfx else: - key = meta_root[:-1] + '.group' + sfx + key = meta_root[:-1] + ".group" + sfx else: key = prefix + group_meta_key return key @@ -698,9 +690,9 @@ def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str: # for v3, attributes are stored in the array metadata sfx = _get_metadata_suffix(store) # type: ignore if prefix: - key = meta_root + prefix.rstrip('/') + ".array" + sfx + key = meta_root + prefix.rstrip("/") + ".array" + sfx else: - key = meta_root[:-1] + '.array' + sfx + key = meta_root[:-1] + ".array" + sfx else: key = prefix + attrs_key return key diff --git a/zarr/_storage/v3.py b/zarr/_storage/v3.py index 094deed02e..1a50265c11 100644 --- a/zarr/_storage/v3.py +++ b/zarr/_storage/v3.py @@ -9,44 +9,60 @@ MetadataError, ReadOnlyError, ) -from zarr.util import (buffer_size, json_loads, normalize_storage_path) +from zarr.util import buffer_size, json_loads, normalize_storage_path from zarr._storage.absstore import ABSStoreV3 # noqa: F401 -from zarr._storage.store import (_get_hierarchy_metadata, # noqa: F401 - _get_metadata_suffix, - _listdir_from_keys, - _rename_from_keys, - _rename_metadata_v3, - _rmdir_from_keys, - _rmdir_from_keys_v3, - _path_to_prefix, - _prefix_to_array_key, - _prefix_to_group_key, - array_meta_key, - attrs_key, - data_root, - group_meta_key, - meta_root, - BaseStore, - Store, - StoreV3) -from zarr.storage import (DBMStore, ConsolidatedMetadataStore, DirectoryStore, FSStore, KVStore, - LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, RedisStore, - SQLiteStore, ZipStore, _getsize) +from zarr._storage.store import ( # noqa: F401 + _get_hierarchy_metadata, + _get_metadata_suffix, + _listdir_from_keys, + _rename_from_keys, + _rename_metadata_v3, + _rmdir_from_keys, + _rmdir_from_keys_v3, + _path_to_prefix, + _prefix_to_array_key, + _prefix_to_group_key, + array_meta_key, + attrs_key, + data_root, + group_meta_key, + meta_root, + BaseStore, + Store, + StoreV3, +) +from zarr.storage import ( + DBMStore, + ConsolidatedMetadataStore, + DirectoryStore, + FSStore, + KVStore, + LMDBStore, + LRUStoreCache, + MemoryStore, + MongoDBStore, + RedisStore, + SQLiteStore, + ZipStore, + _getsize, +) __doctest_requires__ = { - ('RedisStore', 'RedisStore.*'): ['redis'], - ('MongoDBStore', 'MongoDBStore.*'): ['pymongo'], - ('LRUStoreCache', 'LRUStoreCache.*'): ['s3fs'], + ("RedisStore", "RedisStore.*"): ["redis"], + ("MongoDBStore", "MongoDBStore.*"): ["pymongo"], + ("LRUStoreCache", "LRUStoreCache.*"): ["s3fs"], } try: # noinspection PyUnresolvedReferences from zarr.codecs import Blosc + default_compressor = Blosc() except ImportError: # pragma: no cover from zarr.codecs import Zlib + default_compressor = Zlib() @@ -55,7 +71,7 @@ StoreLike = Union[BaseStore, MutableMapping] -class RmdirV3(): +class RmdirV3: """Mixin class that can be used to ensure override of any existing v2 rmdir class.""" def rmdir(self, path: str = "") -> None: @@ -64,7 +80,6 @@ def rmdir(self, path: str = "") -> None: class KVStoreV3(RmdirV3, KVStore, StoreV3): - def list(self): return list(self._mutable_mapping.keys()) @@ -73,10 +88,7 @@ def __setitem__(self, key, value): super().__setitem__(key, value) def __eq__(self, other): - return ( - isinstance(other, KVStoreV3) and - self._mutable_mapping == other._mutable_mapping - ) + return isinstance(other, KVStoreV3) and self._mutable_mapping == other._mutable_mapping KVStoreV3.__doc__ = KVStore.__doc__ @@ -122,15 +134,15 @@ def list(self): return list(self.keys()) def _normalize_key(self, key): - key = normalize_storage_path(key).lstrip('/') + key = normalize_storage_path(key).lstrip("/") return key.lower() if self.normalize_keys else key def getsize(self, path=None): size = 0 - if path is None or path == '': + if path is None or path == "": # size of both the data and meta subdirs dirs = [] - for d in ['data/root', 'meta/root']: + for d in ["data/root", "meta/root"]: dir_path = os.path.join(self.path, d) if os.path.exists(dir_path): dirs.append(dir_path) @@ -146,7 +158,7 @@ def getsize(self, path=None): return size def setitems(self, values): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() values = {self._normalize_key(key): val for key, val in values.items()} @@ -162,7 +174,7 @@ def setitems(self, values): self.map.setitems(values) def rmdir(self, path=None): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() if path: for base in [meta_root, data_root]: @@ -172,10 +184,10 @@ def rmdir(self, path=None): # remove any associated metadata files sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip('/') - array_meta_file = meta_dir + '.array' + sfx + meta_dir = (meta_root + path).rstrip("/") + array_meta_file = meta_dir + ".array" + sfx self.pop(array_meta_file, None) - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx self.pop(group_meta_file, None) else: store_path = self.dir_path(path) @@ -213,7 +225,6 @@ def get_partial_values(self, key_ranges): class MemoryStoreV3(MemoryStore, StoreV3): - def __init__(self, root=None, cls=dict, dimension_separator=None): if root is None: self.root = cls() @@ -225,9 +236,7 @@ def __init__(self, root=None, cls=dict, dimension_separator=None): def __eq__(self, other): return ( - isinstance(other, MemoryStoreV3) and - self.root == other.root and - self.cls == other.cls + isinstance(other, MemoryStoreV3) and self.root == other.root and self.cls == other.cls ) def __setitem__(self, key, value): @@ -256,13 +265,13 @@ def rename(self, src_path: Path, dst_path: Path): if base == meta_root: # check for and move corresponding metadata sfx = _get_metadata_suffix(self) - src_meta = src_key + '.array' + sfx + src_meta = src_key + ".array" + sfx if src_meta in src_parent: - dst_meta = dst_key + '.array' + sfx + dst_meta = dst_key + ".array" + sfx dst_parent[dst_meta] = src_parent.pop(src_meta) - src_meta = src_key + '.group' + sfx + src_meta = src_key + ".group" + sfx if src_meta in src_parent: - dst_meta = dst_key + '.group' + sfx + dst_meta = dst_key + ".group" + sfx dst_parent[dst_meta] = src_parent.pop(src_meta) any_renamed = True any_renamed = _rename_metadata_v3(self, src_path, dst_path) or any_renamed @@ -284,10 +293,10 @@ def rmdir(self, path: Path = None): # remove any associated metadata files sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip('/') - array_meta_file = meta_dir + '.array' + sfx + meta_dir = (meta_root + path).rstrip("/") + array_meta_file = meta_dir + ".array" + sfx self.pop(array_meta_file, None) - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx self.pop(group_meta_file, None) else: # clear out root @@ -298,15 +307,11 @@ def rmdir(self, path: Path = None): class DirectoryStoreV3(DirectoryStore, StoreV3): - def list(self): return list(self.keys()) def __eq__(self, other): - return ( - isinstance(other, DirectoryStoreV3) and - self.path == other.path - ) + return isinstance(other, DirectoryStoreV3) and self.path == other.path def __setitem__(self, key, value): self._validate_key(key) @@ -315,25 +320,24 @@ def __setitem__(self, key, value): def getsize(self, path: Path = None): return _getsize(self, path) - def rename(self, src_path, dst_path, metadata_key_suffix='.json'): + def rename(self, src_path, dst_path, metadata_key_suffix=".json"): store_src_path = normalize_storage_path(src_path) store_dst_path = normalize_storage_path(dst_path) dir_path = self.path any_existed = False - for root_prefix in ['meta', 'data']: - src_path = os.path.join(dir_path, root_prefix, 'root', store_src_path) + for root_prefix in ["meta", "data"]: + src_path = os.path.join(dir_path, root_prefix, "root", store_src_path) if os.path.exists(src_path): any_existed = True - dst_path = os.path.join(dir_path, root_prefix, 'root', store_dst_path) + dst_path = os.path.join(dir_path, root_prefix, "root", store_dst_path) os.renames(src_path, dst_path) - for suffix in ['.array' + metadata_key_suffix, - '.group' + metadata_key_suffix]: - src_meta = os.path.join(dir_path, 'meta', 'root', store_src_path + suffix) + for suffix in [".array" + metadata_key_suffix, ".group" + metadata_key_suffix]: + src_meta = os.path.join(dir_path, "meta", "root", store_src_path + suffix) if os.path.exists(src_meta): any_existed = True - dst_meta = os.path.join(dir_path, 'meta', 'root', store_dst_path + suffix) + dst_meta = os.path.join(dir_path, "meta", "root", store_dst_path + suffix) dst_dir = os.path.dirname(dst_meta) if not os.path.exists(dst_dir): os.makedirs(dst_dir) @@ -352,10 +356,10 @@ def rmdir(self, path=None): # remove any associated metadata files sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip('/') - array_meta_file = meta_dir + '.array' + sfx + meta_dir = (meta_root + path).rstrip("/") + array_meta_file = meta_dir + ".array" + sfx self.pop(array_meta_file, None) - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx self.pop(group_meta_file, None) elif os.path.isdir(dir_path): @@ -366,16 +370,15 @@ def rmdir(self, path=None): class ZipStoreV3(ZipStore, StoreV3): - def list(self): return list(self.keys()) def __eq__(self, other): return ( - isinstance(other, ZipStore) and - self.path == other.path and - self.compression == other.compression and - self.allowZip64 == other.allowZip64 + isinstance(other, ZipStore) + and self.path == other.path + and self.compression == other.compression + and self.allowZip64 == other.allowZip64 ) def __setitem__(self, key, value): @@ -405,7 +408,6 @@ def getsize(self, path=None): class RedisStoreV3(RmdirV3, RedisStore, StoreV3): - def list(self): return list(self.keys()) @@ -418,7 +420,6 @@ def __setitem__(self, key, value): class MongoDBStoreV3(RmdirV3, MongoDBStore, StoreV3): - def list(self): return list(self.keys()) @@ -431,7 +432,6 @@ def __setitem__(self, key, value): class DBMStoreV3(RmdirV3, DBMStore, StoreV3): - def list(self): return list(self.keys()) @@ -444,7 +444,6 @@ def __setitem__(self, key, value): class LMDBStoreV3(RmdirV3, LMDBStore, StoreV3): - def list(self): return list(self.keys()) @@ -457,7 +456,6 @@ def __setitem__(self, key, value): class SQLiteStoreV3(SQLiteStore, StoreV3): - def list(self): return list(self.keys()) @@ -490,15 +488,13 @@ def rmdir(self, path=None): if path: for base in [meta_root, data_root]: with self.lock: - self.cursor.execute( - 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,) - ) + self.cursor.execute('DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,)) # remove any associated metadata files sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip('/') - array_meta_file = meta_dir + '.array' + sfx + meta_dir = (meta_root + path).rstrip("/") + array_meta_file = meta_dir + ".array" + sfx self.pop(array_meta_file, None) - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx self.pop(group_meta_file, None) else: self.clear() @@ -508,7 +504,6 @@ def rmdir(self, path=None): class LRUStoreCacheV3(RmdirV3, LRUStoreCache, StoreV3): - def __init__(self, store, max_size: int): self._store = StoreV3._ensure_store(store) self._max_size = max_size @@ -572,10 +567,11 @@ def __init__(self, store: StoreLike, metadata_key=meta_root + "consolidated/.zme meta = json_loads(self.store[metadata_key]) # check format of consolidated metadata - consolidated_format = meta.get('zarr_consolidated_format', None) + consolidated_format = meta.get("zarr_consolidated_format", None) if consolidated_format != 1: - raise MetadataError('unsupported zarr consolidated metadata format: %s' % - consolidated_format) + raise MetadataError( + "unsupported zarr consolidated metadata format: %s" % consolidated_format + ) # decode metadata self.meta_store: Store = KVStoreV3(meta["metadata"]) @@ -586,34 +582,37 @@ def rmdir(self, key): def _normalize_store_arg_v3(store: Any, storage_options=None, mode="r") -> BaseStore: # default to v2 store for backward compatibility - zarr_version = getattr(store, '_store_version', 3) + zarr_version = getattr(store, "_store_version", 3) if zarr_version != 3: raise ValueError("store must be a version 3 store") if store is None: store = KVStoreV3(dict()) # add default zarr.json metadata - store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) + store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) return store if isinstance(store, os.PathLike): store = os.fspath(store) if FSStore._fsspec_installed(): import fsspec + if isinstance(store, fsspec.FSMap): - return FSStoreV3(store.root, - fs=store.fs, - mode=mode, - check=store.check, - create=store.create, - missing_exceptions=store.missing_exceptions, - **(storage_options or {})) + return FSStoreV3( + store.root, + fs=store.fs, + mode=mode, + check=store.check, + create=store.create, + missing_exceptions=store.missing_exceptions, + **(storage_options or {}), + ) if isinstance(store, str): if "://" in store or "::" in store: store = FSStoreV3(store, mode=mode, **(storage_options or {})) elif storage_options: raise ValueError("storage_options passed with non-fsspec path") - elif store.endswith('.zip'): + elif store.endswith(".zip"): store = ZipStoreV3(store, mode=mode) - elif store.endswith('.n5'): + elif store.endswith(".n5"): raise NotImplementedError("N5Store not yet implemented for V3") # return N5StoreV3(store) else: @@ -621,7 +620,7 @@ def _normalize_store_arg_v3(store: Any, storage_options=None, mode="r") -> BaseS else: store = StoreV3._ensure_store(store) - if 'zarr.json' not in store: + if "zarr.json" not in store: # add default zarr.json metadata - store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) + store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) return store diff --git a/zarr/_storage/v3_storage_transformers.py b/zarr/_storage/v3_storage_transformers.py index 3675d42c38..ff31a7281c 100644 --- a/zarr/_storage/v3_storage_transformers.py +++ b/zarr/_storage/v3_storage_transformers.py @@ -10,10 +10,10 @@ from zarr.util import normalize_storage_path -MAX_UINT_64 = 2 ** 64 - 1 +MAX_UINT_64 = 2**64 - 1 -v3_sharding_available = os.environ.get('ZARR_V3_SHARDING', '0').lower() not in ['0', 'false'] +v3_sharding_available = os.environ.get("ZARR_V3_SHARDING", "0").lower() not in ["0", "false"] def assert_zarr_v3_sharding_available(): @@ -31,8 +31,7 @@ class _ShardIndex(NamedTuple): def __localize_chunk__(self, chunk: Tuple[int, ...]) -> Tuple[int, ...]: return tuple( - chunk_i % shard_i - for chunk_i, shard_i in zip(chunk, self.store.chunks_per_shard) + chunk_i % shard_i for chunk_i, shard_i in zip(chunk, self.store.chunks_per_shard) ) def is_all_empty(self) -> bool: @@ -46,9 +45,7 @@ def get_chunk_slice(self, chunk: Tuple[int, ...]) -> Optional[slice]: else: return slice(int(chunk_start), int(chunk_start + chunk_len)) - def set_chunk_slice( - self, chunk: Tuple[int, ...], chunk_slice: Optional[slice] - ) -> None: + def set_chunk_slice(self, chunk: Tuple[int, ...], chunk_slice: Optional[slice]) -> None: localized_chunk = self.__localize_chunk__(chunk) if chunk_slice is None: self.offsets_and_lengths[localized_chunk] = (MAX_UINT_64, MAX_UINT_64) @@ -79,8 +76,7 @@ def from_bytes( def create_empty(cls, store: "ShardingStorageTransformer"): # reserving 2*64bit per chunk for offset and length: return cls.from_bytes( - MAX_UINT_64.to_bytes(8, byteorder="little") - * (2 * store._num_chunks_per_shard), + MAX_UINT_64.to_bytes(8, byteorder="little") * (2 * store._num_chunks_per_shard), store=store, ) @@ -98,15 +94,13 @@ def __init__(self, _type, chunks_per_shard) -> None: assert_zarr_v3_sharding_available() super().__init__(_type) if isinstance(chunks_per_shard, int): - chunks_per_shard = (chunks_per_shard, ) + chunks_per_shard = (chunks_per_shard,) else: chunks_per_shard = tuple(int(i) for i in chunks_per_shard) if chunks_per_shard == (): - chunks_per_shard = (1, ) + chunks_per_shard = (1,) self.chunks_per_shard = chunks_per_shard - self._num_chunks_per_shard = functools.reduce( - lambda x, y: x * y, chunks_per_shard, 1 - ) + self._num_chunks_per_shard = functools.reduce(lambda x, y: x * y, chunks_per_shard, 1) self._dimension_separator = None self._data_key_prefix = None @@ -118,36 +112,33 @@ def _copy_for_array(self, array, inner_store): # The array shape might be longer when initialized with subdtypes. # subdtypes dimensions come last, therefore padding chunks_per_shard # with ones, effectively disabling sharding on the unlisted dimensions. - transformer_copy.chunks_per_shard += ( - (1, ) * (len(array._shape) - len(self.chunks_per_shard)) + transformer_copy.chunks_per_shard += (1,) * ( + len(array._shape) - len(self.chunks_per_shard) ) return transformer_copy @property def dimension_separator(self) -> str: - assert self._dimension_separator is not None, ( - "dimension_separator is not initialized, first get a copy via _copy_for_array." - ) + assert ( + self._dimension_separator is not None + ), "dimension_separator is not initialized, first get a copy via _copy_for_array." return self._dimension_separator def _is_data_key(self, key: str) -> bool: - assert self._data_key_prefix is not None, ( - "data_key_prefix is not initialized, first get a copy via _copy_for_array." - ) + assert ( + self._data_key_prefix is not None + ), "data_key_prefix is not initialized, first get a copy via _copy_for_array." return key.startswith(self._data_key_prefix) def _key_to_shard(self, chunk_key: str) -> Tuple[str, Tuple[int, ...]]: prefix, _, chunk_string = chunk_key.rpartition("c") - chunk_subkeys = tuple( - map(int, chunk_string.split(self.dimension_separator)) - ) if chunk_string else (0, ) - shard_key_tuple = ( - subkey // shard_i - for subkey, shard_i in zip(chunk_subkeys, self.chunks_per_shard) + chunk_subkeys = ( + tuple(map(int, chunk_string.split(self.dimension_separator))) if chunk_string else (0,) ) - shard_key = ( - prefix + "c" + self.dimension_separator.join(map(str, shard_key_tuple)) + shard_key_tuple = ( + subkey // shard_i for subkey, shard_i in zip(chunk_subkeys, self.chunks_per_shard) ) + shard_key = prefix + "c" + self.dimension_separator.join(map(str, shard_key_tuple)) return shard_key, chunk_subkeys def _get_index_from_store(self, shard_key: str) -> _ShardIndex: @@ -164,16 +155,14 @@ def _get_index_from_store(self, shard_key: str) -> _ShardIndex: def _get_index_from_buffer(self, buffer: Union[bytes, bytearray]) -> _ShardIndex: # At the end of each shard 2*64bit per chunk for offset and length define the index: - return _ShardIndex.from_bytes(buffer[-16 * self._num_chunks_per_shard:], self) + return _ShardIndex.from_bytes(buffer[-16 * self._num_chunks_per_shard :], self) def _get_chunks_in_shard(self, shard_key: str) -> Iterator[Tuple[int, ...]]: _, _, chunk_string = shard_key.rpartition("c") - shard_key_tuple = tuple( - map(int, chunk_string.split(self.dimension_separator)) - ) if chunk_string else (0, ) - for chunk_offset in itertools.product( - *(range(i) for i in self.chunks_per_shard) - ): + shard_key_tuple = ( + tuple(map(int, chunk_string.split(self.dimension_separator))) if chunk_string else (0,) + ) + for chunk_offset in itertools.product(*(range(i) for i in self.chunks_per_shard)): yield tuple( shard_key_i * shards_i + offset_i for shard_key_i, offset_i, shards_i in zip( @@ -250,9 +239,7 @@ def __setitem__(self, key, value): for _, chunk_slice in valid_chunk_slices ] ) - for chunk_value, (chunk_to_read, _) in zip( - chunk_values, valid_chunk_slices - ): + for chunk_value, (chunk_to_read, _) in zip(chunk_values, valid_chunk_slices): new_content[chunk_to_read] = chunk_value else: if full_shard_value is None: @@ -263,9 +250,7 @@ def __setitem__(self, key, value): shard_content = b"" for chunk_subkey, chunk_content in new_content.items(): - chunk_slice = slice( - len(shard_content), len(shard_content) + len(chunk_content) - ) + chunk_slice = slice(len(shard_content), len(shard_content) + len(chunk_content)) index.set_chunk_slice(chunk_subkey, chunk_slice) shard_content += chunk_content # Appending the index at the end of the shard: @@ -298,9 +283,7 @@ def _shard_key_to_original_keys(self, key: str) -> Iterator[str]: prefix, _, _ = key.rpartition("c") for chunk_tuple in self._get_chunks_in_shard(key): if index.get_chunk_slice(chunk_tuple) is not None: - yield prefix + "c" + self.dimension_separator.join( - map(str, chunk_tuple) - ) + yield prefix + "c" + self.dimension_separator.join(map(str, chunk_tuple)) else: yield key diff --git a/zarr/attrs.py b/zarr/attrs.py index 60dd7f1d79..01fc617b3c 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -25,10 +25,9 @@ class Attributes(MutableMapping): """ - def __init__(self, store, key='.zattrs', read_only=False, cache=True, - synchronizer=None): + def __init__(self, store, key=".zattrs", read_only=False, cache=True, synchronizer=None): - self._version = getattr(store, '_store_version', 2) + self._version = getattr(store, "_store_version", 2) _Store = Store if self._version == 2 else StoreV3 self.store = _Store._ensure_store(store) self.key = key @@ -43,7 +42,7 @@ def _get_nosync(self): except KeyError: d = dict() if self._version > 2: - d['attributes'] = {} + d["attributes"] = {} else: d = self.store._metadata_class.parse_metadata(data) return d @@ -54,7 +53,7 @@ def asdict(self): return self._cached_asdict d = self._get_nosync() if self._version == 3: - d = d['attributes'] + d = d["attributes"] if self.cache: self._cached_asdict = d return d @@ -65,7 +64,7 @@ def refresh(self): if self._version == 2: self._cached_asdict = self._get_nosync() else: - self._cached_asdict = self._get_nosync()['attributes'] + self._cached_asdict = self._get_nosync()["attributes"] def __contains__(self, x): return x in self.asdict() @@ -77,7 +76,7 @@ def _write_op(self, f, *args, **kwargs): # guard condition if self.read_only: - raise PermissionError('attributes are read-only') + raise PermissionError("attributes are read-only") # synchronization if self.synchronizer is None: @@ -98,7 +97,7 @@ def _setitem_nosync(self, item, value): if self._version == 2: d[item] = value else: - d['attributes'][item] = value + d["attributes"][item] = value # _put modified data self._put_nosync(d) @@ -115,7 +114,7 @@ def _delitem_nosync(self, key): if self._version == 2: del d[key] else: - del d['attributes'][key] + del d["attributes"][key] # _put modified data self._put_nosync(d) @@ -137,8 +136,8 @@ def _put_nosync(self, d): warnings.warn( "only attribute keys of type 'string' will be allowed in the future", DeprecationWarning, - stacklevel=2 - ) + stacklevel=2, + ) try: d_to_check = {str(k): v for k, v in d_to_check.items()} @@ -163,15 +162,15 @@ def _put_nosync(self, d): # Note: this changes the store.counter result in test_caching_on! meta = self.store._metadata_class.parse_metadata(self.store[self.key]) - if 'attributes' in meta and 'filters' in meta['attributes']: + if "attributes" in meta and "filters" in meta["attributes"]: # need to preserve any existing "filters" attribute - d['attributes']['filters'] = meta['attributes']['filters'] - meta['attributes'] = d['attributes'] + d["attributes"]["filters"] = meta["attributes"]["filters"] + meta["attributes"] = d["attributes"] else: meta = d self.store[self.key] = json_dumps(meta) if self.cache: - self._cached_asdict = d['attributes'] + self._cached_asdict = d["attributes"] # noinspection PyMethodOverriding def update(self, *args, **kwargs): @@ -187,7 +186,7 @@ def _update_nosync(self, *args, **kwargs): if self._version == 2: d.update(*args, **kwargs) else: - d['attributes'].update(*args, **kwargs) + d["attributes"].update(*args, **kwargs) # _put modified data self._put_nosync(d) diff --git a/zarr/context.py b/zarr/context.py index 83fbaafa9b..3dd7dda4ac 100644 --- a/zarr/context.py +++ b/zarr/context.py @@ -1,11 +1,10 @@ - from typing import TypedDict from numcodecs.compat import NDArrayLike class Context(TypedDict, total=False): - """ A context for component specific information + """A context for component specific information All keys are optional. Any component reading the context must provide a default implementation in the case a key cannot be found. @@ -16,4 +15,5 @@ class Context(TypedDict, total=False): An array-like instance to use for determining the preferred output array type. """ + meta_array: NDArrayLike diff --git a/zarr/convenience.py b/zarr/convenience.py index 9a0eae20a3..ff236d0df2 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -14,8 +14,14 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import (_get_metadata_suffix, contains_array, contains_group, - normalize_store_arg, BaseStore, ConsolidatedMetadataStore) +from zarr.storage import ( + _get_metadata_suffix, + contains_array, + contains_group, + normalize_store_arg, + BaseStore, + ConsolidatedMetadataStore, +) from zarr._storage.v3 import ConsolidatedMetadataStoreV3 from zarr.util import TreeViewer, buffer_size, normalize_storage_path @@ -25,7 +31,7 @@ def _check_and_update_path(store: BaseStore, path): - if getattr(store, '_store_version', 2) > 2 and not path: + if getattr(store, "_store_version", 2) > 2 and not path: raise ValueError("path must be provided for v3 stores") return normalize_storage_path(path) @@ -94,15 +100,17 @@ def open(store: StoreLike = None, mode: str = "a", *, zarr_version=None, path=No # we pass storage options explicitly, since normalize_store_arg might construct # a store if the input is a fsspec-compatible URL _store: BaseStore = normalize_store_arg( - store, storage_options=kwargs.pop("storage_options", {}), mode=mode, + store, + storage_options=kwargs.pop("storage_options", {}), + mode=mode, zarr_version=zarr_version, ) # path = _check_and_update_path(_store, path) path = normalize_storage_path(path) - kwargs['path'] = path + kwargs["path"] = path - if mode in {'w', 'w-', 'x'}: - if 'shape' in kwargs: + if mode in {"w", "w-", "x"}: + if "shape" in kwargs: return open_array(_store, mode=mode, **kwargs) else: return open_group(_store, mode=mode, **kwargs) @@ -167,8 +175,9 @@ def save_array(store: StoreLike, arr, *, zarr_version=None, path=None, **kwargs) _store: BaseStore = normalize_store_arg(store, mode="w", zarr_version=zarr_version) path = _check_and_update_path(_store, path) try: - _create_array(arr, store=_store, overwrite=True, zarr_version=zarr_version, path=path, - **kwargs) + _create_array( + arr, store=_store, overwrite=True, zarr_version=zarr_version, path=path, **kwargs + ) finally: if may_need_closing: # needed to ensure zip file records are written @@ -240,7 +249,7 @@ def save_group(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): """ if len(args) == 0 and len(kwargs) == 0: - raise ValueError('at least one array must be provided') + raise ValueError("at least one array must be provided") # handle polymorphic store arg may_need_closing = _might_close(store) _store: BaseStore = normalize_store_arg(store, mode="w", zarr_version=zarr_version) @@ -248,7 +257,7 @@ def save_group(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): try: grp = _create_group(_store, path=path, overwrite=True, zarr_version=zarr_version) for i, arr in enumerate(args): - k = 'arr_{}'.format(i) + k = "arr_{}".format(i) grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) for k, arr in kwargs.items(): grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) @@ -337,16 +346,14 @@ def save(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): """ if len(args) == 0 and len(kwargs) == 0: - raise ValueError('at least one array must be provided') + raise ValueError("at least one array must be provided") if len(args) == 1 and len(kwargs) == 0: save_array(store, args[0], zarr_version=zarr_version, path=path) else: - save_group(store, *args, zarr_version=zarr_version, path=path, - **kwargs) + save_group(store, *args, zarr_version=zarr_version, path=path, **kwargs) class LazyLoader(Mapping): - def __init__(self, grp): self.grp = grp self.cache = dict() @@ -369,9 +376,9 @@ def __contains__(self, item): return item in self.grp def __repr__(self): - r = ' ' + dest_key + descr = descr + " -> " + dest_key # decide what to do do_copy = True - if if_exists != 'replace': + if if_exists != "replace": if dest_key in dest: - if if_exists == 'raise': - raise CopyError('key {!r} exists in destination' - .format(dest_key)) - elif if_exists == 'skip': + if if_exists == "raise": + raise CopyError("key {!r} exists in destination".format(dest_key)) + elif if_exists == "skip": do_copy = False # take action if do_copy: - log('copy {}'.format(descr)) + log("copy {}".format(descr)) if not dry_run: data = source[source_key] n_bytes_copied += buffer_size(data) dest[dest_key] = data n_copied += 1 else: - log('skip {}'.format(descr)) + log("skip {}".format(descr)) n_skipped += 1 # log a final message with a summary of what happened @@ -727,12 +743,21 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None, def _check_dest_is_group(dest): - if not hasattr(dest, 'create_dataset'): - raise ValueError('dest must be a group, got {!r}'.format(dest)) - - -def copy(source, dest, name=None, shallow=False, without_attrs=False, log=None, - if_exists='raise', dry_run=False, **create_kws): + if not hasattr(dest, "create_dataset"): + raise ValueError("dest must be a group, got {!r}".format(dest)) + + +def copy( + source, + dest, + name=None, + shallow=False, + without_attrs=False, + log=None, + if_exists="raise", + dry_run=False, + **create_kws +): """Copy the `source` array or group into the `dest` group. Parameters @@ -855,8 +880,15 @@ def copy(source, dest, name=None, shallow=False, without_attrs=False, log=None, # do the copying n_copied, n_skipped, n_bytes_copied = _copy( - log, source, dest, name=name, root=True, shallow=shallow, - without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, + log, + source, + dest, + name=name, + root=True, + shallow=shallow, + without_attrs=without_attrs, + if_exists=if_exists, + dry_run=dry_run, **create_kws ) @@ -866,47 +898,49 @@ def copy(source, dest, name=None, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, - dry_run, **create_kws): +def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_run, **create_kws): # N.B., if this is a dry run, dest may be None # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 # are we copying to/from h5py? - source_h5py = source.__module__.startswith('h5py.') - dest_h5py = dest is not None and dest.__module__.startswith('h5py.') + source_h5py = source.__module__.startswith("h5py.") + dest_h5py = dest is not None and dest.__module__.startswith("h5py.") # check if_exists parameter - valid_if_exists = ['raise', 'replace', 'skip', 'skip_initialized'] + valid_if_exists = ["raise", "replace", "skip", "skip_initialized"] if if_exists not in valid_if_exists: - raise ValueError('if_exists must be one of {!r}; found {!r}' - .format(valid_if_exists, if_exists)) - if dest_h5py and if_exists == 'skip_initialized': - raise ValueError('{!r} can only be used when copying to zarr' - .format(if_exists)) + raise ValueError( + "if_exists must be one of {!r}; found {!r}".format(valid_if_exists, if_exists) + ) + if dest_h5py and if_exists == "skip_initialized": + raise ValueError("{!r} can only be used when copying to zarr".format(if_exists)) # determine name to copy to if name is None: - name = source.name.split('/')[-1] + name = source.name.split("/")[-1] if not name: # this can happen if source is the root group - raise TypeError('source has no name, please provide the `name` ' - 'parameter to indicate a name to copy to') + raise TypeError( + "source has no name, please provide the `name` " + "parameter to indicate a name to copy to" + ) - if hasattr(source, 'shape'): + if hasattr(source, "shape"): # copy a dataset/array # check if already exists, decide what to do do_copy = True exists = dest is not None and name in dest if exists: - if if_exists == 'raise': - raise CopyError('an object {!r} already exists in destination ' - '{!r}'.format(name, dest.name)) - elif if_exists == 'skip': + if if_exists == "raise": + raise CopyError( + "an object {!r} already exists in destination " "{!r}".format(name, dest.name) + ) + elif if_exists == "skip": do_copy = False - elif if_exists == 'skip_initialized': + elif if_exists == "skip_initialized": ds = dest[name] if ds.nchunks_initialized == ds.nchunks: do_copy = False @@ -915,7 +949,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, if do_copy: # log a message about what we're going to do - log('copy {} {} {}'.format(source.name, source.shape, source.dtype)) + log("copy {} {} {}".format(source.name, source.shape, source.dtype)) if not dry_run: @@ -927,38 +961,37 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, kws = create_kws.copy() # setup chunks option, preserve by default - kws.setdefault('chunks', source.chunks) + kws.setdefault("chunks", source.chunks) # setup compression options if source_h5py: if dest_h5py: # h5py -> h5py; preserve compression options by default - kws.setdefault('compression', source.compression) - kws.setdefault('compression_opts', source.compression_opts) - kws.setdefault('shuffle', source.shuffle) - kws.setdefault('fletcher32', source.fletcher32) - kws.setdefault('fillvalue', source.fillvalue) + kws.setdefault("compression", source.compression) + kws.setdefault("compression_opts", source.compression_opts) + kws.setdefault("shuffle", source.shuffle) + kws.setdefault("fletcher32", source.fletcher32) + kws.setdefault("fillvalue", source.fillvalue) else: # h5py -> zarr; use zarr default compression options - kws.setdefault('fill_value', source.fillvalue) + kws.setdefault("fill_value", source.fillvalue) else: if dest_h5py: # zarr -> h5py; use some vaguely sensible defaults - kws.setdefault('chunks', True) - kws.setdefault('compression', 'gzip') - kws.setdefault('compression_opts', 1) - kws.setdefault('shuffle', False) - kws.setdefault('fillvalue', source.fill_value) + kws.setdefault("chunks", True) + kws.setdefault("compression", "gzip") + kws.setdefault("compression_opts", 1) + kws.setdefault("shuffle", False) + kws.setdefault("fillvalue", source.fill_value) else: # zarr -> zarr; preserve compression options by default - kws.setdefault('compressor', source.compressor) - kws.setdefault('filters', source.filters) - kws.setdefault('order', source.order) - kws.setdefault('fill_value', source.fill_value) + kws.setdefault("compressor", source.compressor) + kws.setdefault("filters", source.filters) + kws.setdefault("order", source.order) + kws.setdefault("fill_value", source.fill_value) # create new dataset in destination - ds = dest.create_dataset(name, shape=source.shape, - dtype=source.dtype, **kws) + ds = dest.create_dataset(name, shape=source.shape, dtype=source.dtype, **kws) # copy data - N.B., go chunk by chunk to avoid loading # everything into memory @@ -966,19 +999,18 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, chunks = ds.chunks chunk_offsets = [range(0, s, c) for s, c in zip(shape, chunks)] for offset in itertools.product(*chunk_offsets): - sel = tuple(slice(o, min(s, o + c)) - for o, s, c in zip(offset, shape, chunks)) + sel = tuple(slice(o, min(s, o + c)) for o, s, c in zip(offset, shape, chunks)) ds[sel] = source[sel] n_bytes_copied += ds.size * ds.dtype.itemsize # copy attributes if not without_attrs: - if dest_h5py and 'filters' in source.attrs: + if dest_h5py and "filters" in source.attrs: # No filters key in v3 metadata so it was stored in the # attributes instead. We cannot copy this key to # HDF5 attrs, though! source_attrs = source.attrs.asdict().copy() - source_attrs.pop('filters', None) + source_attrs.pop("filters", None) else: source_attrs = source.attrs ds.attrs.update(source_attrs) @@ -986,7 +1018,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, n_copied += 1 else: - log('skip {} {} {}'.format(source.name, source.shape, source.dtype)) + log("skip {} {} {}".format(source.name, source.shape, source.dtype)) n_skipped += 1 elif root or not shallow: @@ -994,21 +1026,20 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, # check if an array is in the way do_copy = True - exists_array = (dest is not None and - name in dest and - hasattr(dest[name], 'shape')) + exists_array = dest is not None and name in dest and hasattr(dest[name], "shape") if exists_array: - if if_exists == 'raise': - raise CopyError('an array {!r} already exists in destination ' - '{!r}'.format(name, dest.name)) - elif if_exists == 'skip': + if if_exists == "raise": + raise CopyError( + "an array {!r} already exists in destination " "{!r}".format(name, dest.name) + ) + elif if_exists == "skip": do_copy = False # take action if do_copy: # log action - log('copy {}'.format(source.name)) + log("copy {}".format(source.name)) if not dry_run: @@ -1035,9 +1066,17 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, # recurse for k in source.keys(): c, s, b = _copy( - log, source[k], grp, name=k, root=False, shallow=shallow, - without_attrs=without_attrs, if_exists=if_exists, - dry_run=dry_run, **create_kws) + log, + source[k], + grp, + name=k, + root=False, + shallow=shallow, + without_attrs=without_attrs, + if_exists=if_exists, + dry_run=dry_run, + **create_kws + ) n_copied += c n_skipped += s n_bytes_copied += b @@ -1045,14 +1084,22 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, n_copied += 1 else: - log('skip {}'.format(source.name)) + log("skip {}".format(source.name)) n_skipped += 1 return n_copied, n_skipped, n_bytes_copied -def copy_all(source, dest, shallow=False, without_attrs=False, log=None, - if_exists='raise', dry_run=False, **create_kws): +def copy_all( + source, + dest, + shallow=False, + without_attrs=False, + log=None, + if_exists="raise", + dry_run=False, + **create_kws +): """Copy all children of the `source` group into the `dest` group. Parameters @@ -1137,16 +1184,24 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 - zarr_version = getattr(source, '_version', 2) + zarr_version = getattr(source, "_version", 2) # setup logging with _LogWriter(log) as log: for k in source.keys(): c, s, b = _copy( - log, source[k], dest, name=k, root=False, shallow=shallow, - without_attrs=without_attrs, if_exists=if_exists, - dry_run=dry_run, **create_kws) + log, + source[k], + dest, + name=k, + root=False, + shallow=shallow, + without_attrs=without_attrs, + if_exists=if_exists, + dry_run=dry_run, + **create_kws + ) n_copied += c n_skipped += s n_bytes_copied += b @@ -1159,7 +1214,7 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path=''): +def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path=""): """ Consolidate all metadata for groups and arrays within the given store into a single resource and put it under the given key. @@ -1203,8 +1258,7 @@ def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path='' if version == 2: def is_zarr_key(key): - return (key.endswith('.zarray') or key.endswith('.zgroup') or - key.endswith('.zattrs')) + return key.endswith(".zarray") or key.endswith(".zgroup") or key.endswith(".zattrs") else: @@ -1213,23 +1267,21 @@ def is_zarr_key(key): sfx = _get_metadata_suffix(store) # type: ignore def is_zarr_key(key): - return (key.endswith('.array' + sfx) or key.endswith('.group' + sfx) or - key == 'zarr.json') + return ( + key.endswith(".array" + sfx) or key.endswith(".group" + sfx) or key == "zarr.json" + ) # cannot create a group without a path in v3 # so create /meta/root/consolidated group to store the metadata - if 'consolidated' not in store: - _create_group(store, path='consolidated') - if not metadata_key.startswith('meta/root/'): - metadata_key = 'meta/root/consolidated/' + metadata_key + if "consolidated" not in store: + _create_group(store, path="consolidated") + if not metadata_key.startswith("meta/root/"): + metadata_key = "meta/root/consolidated/" + metadata_key # path = 'consolidated' out = { - 'zarr_consolidated_format': 1, - 'metadata': { - key: json_loads(store[key]) - for key in store if is_zarr_key(key) - } + "zarr_consolidated_format": 1, + "metadata": {key: json_loads(store[key]) for key in store if is_zarr_key(key)}, } store[metadata_key] = json_dumps(out) return open_consolidated(store, metadata_key=metadata_key, path=path) @@ -1278,26 +1330,26 @@ def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", ** """ # normalize parameters - zarr_version = kwargs.get('zarr_version') - store = normalize_store_arg(store, storage_options=kwargs.get("storage_options"), mode=mode, - zarr_version=zarr_version) - if mode not in {'r', 'r+'}: - raise ValueError("invalid mode, expected either 'r' or 'r+'; found {!r}" - .format(mode)) - - path = kwargs.pop('path', None) + zarr_version = kwargs.get("zarr_version") + store = normalize_store_arg( + store, storage_options=kwargs.get("storage_options"), mode=mode, zarr_version=zarr_version + ) + if mode not in {"r", "r+"}: + raise ValueError("invalid mode, expected either 'r' or 'r+'; found {!r}".format(mode)) + + path = kwargs.pop("path", None) if store._store_version == 2: ConsolidatedStoreClass = ConsolidatedMetadataStore else: assert_zarr_v3_api_available() ConsolidatedStoreClass = ConsolidatedMetadataStoreV3 # default is to store within 'consolidated' group on v3 - if not metadata_key.startswith('meta/root/'): - metadata_key = 'meta/root/consolidated/' + metadata_key + if not metadata_key.startswith("meta/root/"): + metadata_key = "meta/root/consolidated/" + metadata_key # setup metadata store meta_store = ConsolidatedStoreClass(store, metadata_key=metadata_key) # pass through - chunk_store = kwargs.pop('chunk_store', None) or store + chunk_store = kwargs.pop("chunk_store", None) or store return open(store=meta_store, chunk_store=chunk_store, mode=mode, path=path, **kwargs) diff --git a/zarr/core.py b/zarr/core.py index 80f424bafc..43ccdbaf7d 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -193,17 +193,16 @@ def __init__( assert_zarr_v3_api_available() if chunk_store is not None: - chunk_store = normalize_store_arg(chunk_store, - zarr_version=zarr_version) + chunk_store = normalize_store_arg(chunk_store, zarr_version=zarr_version) self._store = store self._chunk_store = chunk_store self._transformed_chunk_store = None self._path = normalize_storage_path(path) if self._path: - self._key_prefix = self._path + '/' + self._key_prefix = self._path + "/" else: - self._key_prefix = '' + self._key_prefix = "" self._read_only = bool(read_only) self._synchronizer = synchronizer self._cache_metadata = cache_metadata @@ -216,18 +215,19 @@ def __init__( self._meta_array = np.empty(()) self._version = zarr_version if self._version == 3: - self._data_key_prefix = 'data/root/' + self._key_prefix - self._data_path = 'data/root/' + self._path + self._data_key_prefix = "data/root/" + self._key_prefix + self._data_path = "data/root/" + self._path self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) - self._metadata_key_suffix = self._hierarchy_metadata['metadata_key_suffix'] + self._metadata_key_suffix = self._hierarchy_metadata["metadata_key_suffix"] # initialize metadata self._load_metadata() # initialize attributes akey = _prefix_to_attrs_key(self._store, self._key_prefix) - self._attrs = Attributes(store, key=akey, read_only=read_only, - synchronizer=synchronizer, cache=cache_attrs) + self._attrs = Attributes( + store, key=akey, read_only=read_only, synchronizer=synchronizer, cache=cache_attrs + ) # initialize info reporter self._info_reporter = InfoReporter(self) @@ -257,13 +257,13 @@ def _load_metadata_nosync(self): # decode and store metadata as instance members meta = self._store._metadata_class.decode_array_metadata(meta_bytes) self._meta = meta - self._shape = meta['shape'] - self._fill_value = meta['fill_value'] - dimension_separator = meta.get('dimension_separator', None) + self._shape = meta["shape"] + self._fill_value = meta["fill_value"] + dimension_separator = meta.get("dimension_separator", None) if self._version == 2: - self._chunks = meta['chunks'] - self._dtype = meta['dtype'] - self._order = meta['order'] + self._chunks = meta["chunks"] + self._dtype = meta["dtype"] + self._order = meta["order"] if dimension_separator is None: try: dimension_separator = self._store._dimension_separator @@ -274,17 +274,17 @@ def _load_metadata_nosync(self): if dimension_separator is None: dimension_separator = "." else: - self._chunks = meta['chunk_grid']['chunk_shape'] - self._dtype = meta['data_type'] - self._order = meta['chunk_memory_layout'] - chunk_separator = meta['chunk_grid']['separator'] + self._chunks = meta["chunk_grid"]["chunk_shape"] + self._dtype = meta["data_type"] + self._order = meta["chunk_memory_layout"] + chunk_separator = meta["chunk_grid"]["separator"] if dimension_separator is None: - dimension_separator = meta.get('dimension_separator', chunk_separator) + dimension_separator = meta.get("dimension_separator", chunk_separator) self._dimension_separator = dimension_separator # setup compressor - compressor = meta.get('compressor', None) + compressor = meta.get("compressor", None) if compressor is None: self._compressor = None elif self._version == 2: @@ -294,17 +294,17 @@ def _load_metadata_nosync(self): # setup filters if self._version == 2: - filters = meta.get('filters', []) + filters = meta.get("filters", []) else: # TODO: storing filters under attributes for now since the v3 # array metadata does not have a 'filters' attribute. - filters = meta['attributes'].get('filters', []) + filters = meta["attributes"].get("filters", []) if filters: filters = [get_codec(config) for config in filters] self._filters = filters if self._version == 3: - storage_transformers = meta.get('storage_transformers', []) + storage_transformers = meta.get("storage_transformers", []) if storage_transformers: transformed_store = self._chunk_store or self._store for storage_transformer in storage_transformers[::-1]: @@ -323,7 +323,7 @@ def _refresh_metadata_nosync(self): def _flush_metadata_nosync(self): if self._is_view: - raise PermissionError('operation not permitted for views') + raise PermissionError("operation not permitted for views") if self._compressor: compressor_config = self._compressor.get_config() @@ -334,20 +334,26 @@ def _flush_metadata_nosync(self): else: filters_config = None _compressor = compressor_config if self._version == 2 else self._compressor - meta = dict(shape=self._shape, compressor=_compressor, - fill_value=self._fill_value, filters=filters_config) - if getattr(self._store, '_store_version', 2) == 2: - meta.update( - dict(chunks=self._chunks, dtype=self._dtype, order=self._order) - ) + meta = dict( + shape=self._shape, + compressor=_compressor, + fill_value=self._fill_value, + filters=filters_config, + ) + if getattr(self._store, "_store_version", 2) == 2: + meta.update(dict(chunks=self._chunks, dtype=self._dtype, order=self._order)) else: meta.update( - dict(chunk_grid=dict(type='regular', - chunk_shape=self._chunks, - separator=self._dimension_separator), - data_type=self._dtype, - chunk_memory_layout=self._order, - attributes=self.attrs.asdict()) + dict( + chunk_grid=dict( + type="regular", + chunk_shape=self._chunks, + separator=self._dimension_separator, + ), + data_type=self._dtype, + chunk_memory_layout=self._order, + attributes=self.attrs.asdict(), + ) ) mkey = _prefix_to_array_key(self._store, self._key_prefix) self._store[mkey] = self._store._metadata_class.encode_array_metadata(meta) @@ -368,8 +374,8 @@ def name(self): if self.path: # follow h5py convention: add leading slash name = self.path - if name[0] != '/': - name = '/' + name + if name[0] != "/": + name = "/" + name return name return None @@ -377,7 +383,7 @@ def name(self): def basename(self): """Final component of name.""" if self.name is not None: - return self.name.split('/')[-1] + return self.name.split("/")[-1] return None @property @@ -513,10 +519,9 @@ def nbytes_stored(self): @property def _cdata_shape(self): if self._shape == (): - return 1, + return (1,) else: - return tuple(math.ceil(s / c) - for s, c in zip(self._shape, self._chunks)) + return tuple(math.ceil(s / c) for s, c in zip(self._shape, self._chunks)) @property def cdata_shape(self): @@ -550,14 +555,14 @@ def nchunks_initialized(self): # return sum(1 for k in members if prog.match(k)) # key pattern for chunk keys - prog = re.compile(self._data_key_prefix + r'c\d+') # TODO: ndim == 0 case? + prog = re.compile(self._data_key_prefix + r"c\d+") # TODO: ndim == 0 case? # get chunk keys, excluding the prefix members = self.chunk_store.list_prefix(self._data_path) # count the chunk keys return sum(1 for k in members if prog.match(k)) else: # key pattern for chunk keys - prog = re.compile(r'\.'.join([r'\d+'] * min(1, self.ndim))) + prog = re.compile(r"\.".join([r"\d+"] * min(1, self.ndim))) # count chunk keys return sum(1 for k in listdir(self.chunk_store, self._path) if prog.match(k)) @@ -605,11 +610,11 @@ def meta_array(self): def __eq__(self, other): return ( - isinstance(other, Array) and - self.store == other.store and - self.read_only == other.read_only and - self.path == other.path and - not self._is_view + isinstance(other, Array) + and self.store == other.store + and self.read_only == other.read_only + and self.path == other.path + and not self._is_view # N.B., no need to compare other properties, should be covered by # store comparison ) @@ -664,10 +669,10 @@ def islice(self, start=None, end=None): end = self.shape[0] if not isinstance(start, int) or start < 0: - raise ValueError('start must be a nonnegative integer') + raise ValueError("start must be a nonnegative integer") if not isinstance(end, int) or end < 0: - raise ValueError('end must be a nonnegative integer') + raise ValueError("end must be a nonnegative integer") # Avoid repeatedly decompressing chunks by iterating over the chunks # in the first dimension. @@ -675,7 +680,7 @@ def islice(self, start=None, end=None): chunk = None for j in range(start, end): if j % chunk_size == 0: - chunk = self[j: j + chunk_size] + chunk = self[j : j + chunk_size] # init chunk if we start offset of chunk borders elif chunk is None: chunk_start = j - j % chunk_size @@ -691,7 +696,7 @@ def __len__(self): return self.shape[0] else: # 0-dimensional array, same error message as numpy - raise TypeError('len() of unsized object') + raise TypeError("len() of unsized object") def __getitem__(self, selection): """Retrieve data for an item or region of the array. @@ -960,11 +965,9 @@ def get_basic_selection(self, selection=Ellipsis, out=None, fields=None): # handle zero-dimensional arrays if self._shape == (): - return self._get_basic_selection_zd(selection=selection, out=out, - fields=fields) + return self._get_basic_selection_zd(selection=selection, out=out, fields=fields) else: - return self._get_basic_selection_nd(selection=selection, out=out, - fields=fields) + return self._get_basic_selection_nd(selection=selection, out=out, fields=fields) def _get_basic_selection_zd(self, selection, out=None, fields=None): # special case basic selection for zero-dimensional array @@ -1371,10 +1374,11 @@ def _get_selection(self, indexer, out=None, fields=None): # setup output array if out is None: - out = np.empty_like(self._meta_array, shape=out_shape, - dtype=out_dtype, order=self._order) + out = np.empty_like( + self._meta_array, shape=out_shape, dtype=out_dtype, order=self._order + ) else: - check_array_shape('out', out, out_shape) + check_array_shape("out", out, out_shape) # iterate over chunks @@ -1382,8 +1386,12 @@ def _get_selection(self, indexer, out=None, fields=None): # allow storage to get multiple items at once lchunk_coords, lchunk_selection, lout_selection = zip(*indexer) self._chunk_getitems( - lchunk_coords, lchunk_selection, out, lout_selection, - drop_axes=indexer.drop_axes, fields=fields + lchunk_coords, + lchunk_selection, + out, + lout_selection, + drop_axes=indexer.drop_axes, + fields=fields, ) if out.shape: return out @@ -1753,7 +1761,7 @@ def set_coordinate_selection(self, selection, value, fields=None): except TypeError: # Handle types like `list` or `tuple` value = np.array(value, like=self._meta_array) - if hasattr(value, 'shape') and len(value.shape) > 1: + if hasattr(value, "shape") and len(value.shape) > 1: value = value.reshape(-1) self._set_selection(indexer, value, fields=fields) @@ -1998,13 +2006,16 @@ def _set_selection(self, indexer, value, fields=None): # setting a scalar value pass else: - if not hasattr(value, 'shape'): + if not hasattr(value, "shape"): value = np.asanyarray(value, like=self._meta_array) - check_array_shape('value', value, sel_shape) + check_array_shape("value", value, sel_shape) # iterate over chunks in range - if not hasattr(self.chunk_store, "setitems") or self._synchronizer is not None \ - or any(map(lambda x: x == 0, self.shape)): + if ( + not hasattr(self.chunk_store, "setitems") + or self._synchronizer is not None + or any(map(lambda x: x == 0, self.shape)) + ): # iterative approach for chunk_coords, chunk_selection, out_selection in indexer: @@ -2044,8 +2055,7 @@ def _set_selection(self, indexer, value, fields=None): cv = chunk_value[item] chunk_values.append(cv) - self._chunk_setitems(lchunk_coords, lchunk_selection, chunk_values, - fields=fields) + self._chunk_setitems(lchunk_coords, lchunk_selection, chunk_values, fields=fields) def _process_chunk( self, @@ -2059,23 +2069,22 @@ def _process_chunk( partial_read_decode=False, ): """Take binary data from storage and fill output array""" - if (out_is_ndarray and - not fields and - is_contiguous_selection(out_selection) and - is_total_slice(chunk_selection, self._chunks) and - not self._filters and - self._dtype != object): + if ( + out_is_ndarray + and not fields + and is_contiguous_selection(out_selection) + and is_total_slice(chunk_selection, self._chunks) + and not self._filters + and self._dtype != object + ): dest = out[out_selection] # Assume that array-like objects that doesn't have a # `writeable` flag is writable. dest_is_writable = getattr(dest, "writeable", True) - write_direct = ( - dest_is_writable and - ( - (self._order == 'C' and dest.flags.c_contiguous) or - (self._order == 'F' and dest.flags.f_contiguous) - ) + write_direct = dest_is_writable and ( + (self._order == "C" and dest.flags.c_contiguous) + or (self._order == "F" and dest.flags.f_contiguous) ) if write_direct: @@ -2104,9 +2113,7 @@ def _process_chunk( index_selection = PartialChunkIterator(chunk_selection, self.chunks) for start, nitems, partial_out_selection in index_selection: expected_shape = [ - len( - range(*partial_out_selection[i].indices(self.chunks[0] + 1)) - ) + len(range(*partial_out_selection[i].indices(self.chunks[0] + 1))) if i < len(partial_out_selection) else dim for i, dim in enumerate(self.chunks) @@ -2143,8 +2150,9 @@ def _process_chunk( # store selected data in output out[out_selection] = tmp - def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, - drop_axes=None, fields=None): + def _chunk_getitems( + self, lchunk_coords, lchunk_selection, out, lout_selection, drop_axes=None, fields=None + ): """Obtain part or whole of chunks. Parameters @@ -2238,8 +2246,10 @@ def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, def _chunk_setitems(self, lchunk_coords, lchunk_selection, values, fields=None): ckeys = map(self._chunk_key, lchunk_coords) - cdatas = {key: self._process_for_setitem(key, sel, val, fields=fields) - for key, sel, val in zip(ckeys, lchunk_selection, values)} + cdatas = { + key: self._process_for_setitem(key, sel, val, fields=fields) + for key, sel, val in zip(ckeys, lchunk_selection, values) + } to_store = {} if not self.write_empty_chunks: empty_chunks = {k: v for k, v in cdatas.items() if all_equal(self.fill_value, v)} @@ -2291,8 +2301,7 @@ def _chunk_setitem(self, chunk_coords, chunk_selection, value, fields=None): lock = self._synchronizer[ckey] with lock: - self._chunk_setitem_nosync(chunk_coords, chunk_selection, value, - fields=fields) + self._chunk_setitem_nosync(chunk_coords, chunk_selection, value, fields=fields) def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=None): ckey = self._chunk_key(chunk_coords) @@ -2354,7 +2363,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): # decode chunk chunk = self._decode_chunk(cdata) if not chunk.flags.writeable: - chunk = chunk.copy(order='K') + chunk = chunk.copy(order="K") # modify if fields: @@ -2372,8 +2381,12 @@ def _chunk_key(self, chunk_coords): # where P = self._key_prefix, i, j, ... = chunk_coords # e.g. c0/2/3 for 3d array with chunk index (0, 2, 3) # https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#regular-grids - return ("data/root/" + self._key_prefix + - "c" + self._dimension_separator.join(map(str, chunk_coords))) + return ( + "data/root/" + + self._key_prefix + + "c" + + self._dimension_separator.join(map(str, chunk_coords)) + ) else: return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) @@ -2382,8 +2395,7 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): if self._compressor: # only decode requested items if ( - all(x is not None for x in [start, nitems]) - and self._compressor.codec_id == "blosc" + all(x is not None for x in [start, nitems]) and self._compressor.codec_id == "blosc" ) and hasattr(self._compressor, "decode_partial"): chunk = self._compressor.decode_partial(cdata, start, nitems) else: @@ -2408,10 +2420,10 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # codec in the filter chain, i.e., a filter that converts from object # array to something else during encoding, and converts back to object # array during decoding. - raise RuntimeError('cannot read object array without object codec') + raise RuntimeError("cannot read object array without object codec") # ensure correct chunk shape - chunk = chunk.reshape(-1, order='A') + chunk = chunk.reshape(-1, order="A") chunk = chunk.reshape(expected_shape or self._chunks, order=self._order) return chunk @@ -2425,7 +2437,7 @@ def _encode_chunk(self, chunk): # check object encoding if ensure_ndarray_like(chunk).dtype == object: - raise RuntimeError('cannot write object array without object codec') + raise RuntimeError("cannot write object array without object codec") # compress if self._compressor: @@ -2434,24 +2446,21 @@ def _encode_chunk(self, chunk): cdata = chunk # ensure in-memory data is immutable and easy to compare - if ( - isinstance(self.chunk_store, KVStore) - or isinstance(self._chunk_store, KVStore) - ): + if isinstance(self.chunk_store, KVStore) or isinstance(self._chunk_store, KVStore): cdata = ensure_bytes(cdata) return cdata def __repr__(self): t = type(self) - r = '<{}.{}'.format(t.__module__, t.__name__) + r = "<{}.{}".format(t.__module__, t.__name__) if self.name: - r += ' %r' % self.name - r += ' %s' % str(self.shape) - r += ' %s' % self.dtype + r += " %r" % self.name + r += " %s" % str(self.shape) + r += " %s" % self.dtype if self._read_only: - r += ' read-only' - r += '>' + r += " read-only" + r += ">" return r @property @@ -2483,13 +2492,12 @@ def info_items(self): return self._synchronized_op(self._info_items_nosync) def _info_items_nosync(self): - def typestr(o): - return '{}.{}'.format(type(o).__module__, type(o).__name__) + return "{}.{}".format(type(o).__module__, type(o).__name__) def bytestr(n): if n > 2**10: - return '{} ({})'.format(n, human_readable_size(n)) + return "{} ({})".format(n, human_readable_size(n)) else: return str(n) @@ -2497,41 +2505,39 @@ def bytestr(n): # basic info if self.name is not None: - items += [('Name', self.name)] + items += [("Name", self.name)] items += [ - ('Type', typestr(self)), - ('Data type', '%s' % self.dtype), - ('Shape', str(self.shape)), - ('Chunk shape', str(self.chunks)), - ('Order', self.order), - ('Read-only', str(self.read_only)), + ("Type", typestr(self)), + ("Data type", "%s" % self.dtype), + ("Shape", str(self.shape)), + ("Chunk shape", str(self.chunks)), + ("Order", self.order), + ("Read-only", str(self.read_only)), ] # filters if self.filters: for i, f in enumerate(self.filters): - items += [('Filter [%s]' % i, repr(f))] + items += [("Filter [%s]" % i, repr(f))] # compressor - items += [('Compressor', repr(self.compressor))] + items += [("Compressor", repr(self.compressor))] # synchronizer if self._synchronizer is not None: - items += [('Synchronizer type', typestr(self._synchronizer))] + items += [("Synchronizer type", typestr(self._synchronizer))] # storage info - items += [('Store type', typestr(self._store))] + items += [("Store type", typestr(self._store))] if self._chunk_store is not None: - items += [('Chunk store type', typestr(self._chunk_store))] - items += [('No. bytes', bytestr(self.nbytes))] + items += [("Chunk store type", typestr(self._chunk_store))] + items += [("No. bytes", bytestr(self.nbytes))] if self.nbytes_stored > 0: items += [ - ('No. bytes stored', bytestr(self.nbytes_stored)), - ('Storage ratio', '%.1f' % (self.nbytes / self.nbytes_stored)), + ("No. bytes stored", bytestr(self.nbytes_stored)), + ("Storage ratio", "%.1f" % (self.nbytes / self.nbytes_stored)), ] - items += [ - ('Chunks initialized', '{}/{}'.format(self.nchunks_initialized, self.nchunks)) - ] + items += [("Chunks initialized", "{}/{}".format(self.nchunks_initialized, self.nchunks))] return items @@ -2590,7 +2596,7 @@ def hexdigest(self, hashname="sha1"): # This is a bytes object on Python 3 and we want a str. if type(checksum) is not str: - checksum = checksum.decode('utf8') + checksum = checksum.decode("utf8") return checksum @@ -2682,8 +2688,7 @@ def _resize_nosync(self, *args): # determine the new number and arrangement of chunks chunks = self._chunks - new_cdata_shape = tuple(math.ceil(s / c) - for s, c in zip(new_shape, chunks)) + new_cdata_shape = tuple(math.ceil(s / c) for s, c in zip(new_shape, chunks)) # remove any chunks not within range # The idea is that, along each dimension, @@ -2752,18 +2757,18 @@ def append(self, data, axis=0): def _append_nosync(self, data, axis=0): # ensure data is array-like - if not hasattr(data, 'shape'): + if not hasattr(data, "shape"): data = np.asanyarray(data, like=self._meta_array) # ensure shapes are compatible for non-append dimensions - self_shape_preserved = tuple(s for i, s in enumerate(self._shape) - if i != axis) - data_shape_preserved = tuple(s for i, s in enumerate(data.shape) - if i != axis) + self_shape_preserved = tuple(s for i, s in enumerate(self._shape) if i != axis) + data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis) if self_shape_preserved != data_shape_preserved: - raise ValueError('shape of data to append is not compatible with the array; ' - 'all dimensions must match except for the dimension being ' - 'appended') + raise ValueError( + "shape of data to append is not compatible with the array; " + "all dimensions must match except for the dimension being " + "appended" + ) # remember old shape old_shape = self._shape @@ -2787,9 +2792,16 @@ def _append_nosync(self, data, axis=0): return new_shape - def view(self, shape=None, chunks=None, dtype=None, - fill_value=None, filters=None, read_only=None, - synchronizer=None): + def view( + self, + shape=None, + chunks=None, + dtype=None, + fill_value=None, + filters=None, + read_only=None, + synchronizer=None, + ): """Return an array sharing the same data. Parameters @@ -2904,8 +2916,15 @@ def view(self, shape=None, chunks=None, dtype=None, read_only = self._read_only if synchronizer is None: synchronizer = self._synchronizer - a = Array(store=store, path=path, chunk_store=chunk_store, read_only=read_only, - synchronizer=synchronizer, cache_metadata=True, zarr_version=self._version) + a = Array( + store=store, + path=path, + chunk_store=chunk_store, + read_only=read_only, + synchronizer=synchronizer, + cache_metadata=True, + zarr_version=self._version, + ) a._is_view = True # allow override of some properties diff --git a/zarr/creation.py b/zarr/creation.py index dc8b8a157d..726d0b5932 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -11,18 +11,42 @@ ContainsArrayError, ContainsGroupError, ) -from zarr.storage import (contains_array, contains_group, default_compressor, - init_array, normalize_storage_path, - normalize_store_arg) +from zarr.storage import ( + contains_array, + contains_group, + default_compressor, + init_array, + normalize_storage_path, + normalize_store_arg, +) from zarr.util import normalize_dimension_separator -def create(shape, chunks=True, dtype=None, compressor='default', - fill_value: Optional[int] = 0, order='C', store=None, synchronizer=None, - overwrite=False, path=None, chunk_store=None, filters=None, - cache_metadata=True, cache_attrs=True, read_only=False, - object_codec=None, dimension_separator=None, write_empty_chunks=True, - *, zarr_version=None, meta_array=None, storage_transformers=(), **kwargs): +def create( + shape, + chunks=True, + dtype=None, + compressor="default", + fill_value: Optional[int] = 0, + order="C", + store=None, + synchronizer=None, + overwrite=False, + path=None, + chunk_store=None, + filters=None, + cache_metadata=True, + cache_attrs=True, + read_only=False, + object_codec=None, + dimension_separator=None, + write_empty_chunks=True, + *, + zarr_version=None, + meta_array=None, + storage_transformers=(), + **kwargs, +): """Create an array. Parameters @@ -150,11 +174,11 @@ def create(shape, chunks=True, dtype=None, compressor='default', """ if zarr_version is None and store is None: - zarr_version = getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) # handle polymorphic store arg store = normalize_store_arg(store, zarr_version=zarr_version, mode="w") - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) # API compatibility with h5py compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) @@ -168,22 +192,43 @@ def create(shape, chunks=True, dtype=None, compressor='default', raise ValueError( f"Specified dimension_separator: {dimension_separator}" f"conflicts with store's separator: " - f"{store_separator}") + f"{store_separator}" + ) dimension_separator = normalize_dimension_separator(dimension_separator) if zarr_version > 2 and path is None: - path = '/' + path = "/" # initialize array metadata - init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, - fill_value=fill_value, order=order, overwrite=overwrite, path=path, - chunk_store=chunk_store, filters=filters, object_codec=object_codec, - dimension_separator=dimension_separator, storage_transformers=storage_transformers) + init_array( + store, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + overwrite=overwrite, + path=path, + chunk_store=chunk_store, + filters=filters, + object_codec=object_codec, + dimension_separator=dimension_separator, + storage_transformers=storage_transformers, + ) # instantiate array - z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, read_only=read_only, - write_empty_chunks=write_empty_chunks, meta_array=meta_array) + z = Array( + store, + path=path, + chunk_store=chunk_store, + synchronizer=synchronizer, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + read_only=read_only, + write_empty_chunks=write_empty_chunks, + meta_array=meta_array, + ) return z @@ -193,7 +238,7 @@ def _kwargs_compat(compressor, fill_value, kwargs): # to be compatible with h5py, as well as backwards-compatible with Zarr # 1.x, accept 'compression' and 'compression_opts' keyword arguments - if compressor != 'default': + if compressor != "default": # 'compressor' overrides 'compression' if "compression" in kwargs: warn( @@ -208,14 +253,14 @@ def _kwargs_compat(compressor, fill_value, kwargs): ) del kwargs["compression_opts"] - elif 'compression' in kwargs: - compression = kwargs.pop('compression') - compression_opts = kwargs.pop('compression_opts', None) + elif "compression" in kwargs: + compression = kwargs.pop("compression") + compression_opts = kwargs.pop("compression_opts", None) - if compression is None or compression == 'none': + if compression is None or compression == "none": compressor = None - elif compression == 'default': + elif compression == "default": compressor = default_compressor elif isinstance(compression, str): @@ -233,21 +278,21 @@ def _kwargs_compat(compressor, fill_value, kwargs): compressor = codec_cls(compression_opts) # be lenient here if user gives compressor as 'compression' - elif hasattr(compression, 'get_config'): + elif hasattr(compression, "get_config"): compressor = compression else: - raise ValueError('bad value for compression: %r' % compression) + raise ValueError("bad value for compression: %r" % compression) # handle 'fillvalue' - if 'fillvalue' in kwargs: + if "fillvalue" in kwargs: # to be compatible with h5py, accept 'fillvalue' instead of # 'fill_value' - fill_value = kwargs.pop('fillvalue') + fill_value = kwargs.pop("fillvalue") # ignore other keyword arguments for k in kwargs: - warn('ignoring keyword argument %r' % k) + warn("ignoring keyword argument %r" % k) return compressor, fill_value @@ -334,16 +379,13 @@ def _get_shape_chunks(a): shape = None chunks = None - if hasattr(a, 'shape') and \ - isinstance(a.shape, tuple): + if hasattr(a, "shape") and isinstance(a.shape, tuple): shape = a.shape - if hasattr(a, 'chunks') and \ - isinstance(a.chunks, tuple) and \ - (len(a.chunks) == len(a.shape)): + if hasattr(a, "chunks") and isinstance(a.chunks, tuple) and (len(a.chunks) == len(a.shape)): chunks = a.chunks - elif hasattr(a, 'chunklen'): + elif hasattr(a, "chunklen"): # bcolz carray chunks = (a.chunklen,) + a.shape[1:] @@ -368,27 +410,27 @@ def array(data, **kwargs): """ # ensure data is array-like - if not hasattr(data, 'shape') or not hasattr(data, 'dtype'): + if not hasattr(data, "shape") or not hasattr(data, "dtype"): data = np.asanyarray(data) # setup dtype - kw_dtype = kwargs.get('dtype') + kw_dtype = kwargs.get("dtype") if kw_dtype is None: - kwargs['dtype'] = data.dtype + kwargs["dtype"] = data.dtype else: - kwargs['dtype'] = kw_dtype + kwargs["dtype"] = kw_dtype # setup shape and chunks data_shape, data_chunks = _get_shape_chunks(data) - kwargs['shape'] = data_shape - kw_chunks = kwargs.get('chunks') + kwargs["shape"] = data_shape + kw_chunks = kwargs.get("chunks") if kw_chunks is None: - kwargs['chunks'] = data_chunks + kwargs["chunks"] = data_chunks else: - kwargs['chunks'] = kw_chunks + kwargs["chunks"] = kw_chunks # pop read-only to apply after storing the data - read_only = kwargs.pop('read_only', False) + read_only = kwargs.pop("read_only", False) # instantiate array z = create(**kwargs) @@ -425,7 +467,7 @@ def open_array( zarr_version=None, dimension_separator=None, meta_array=None, - **kwargs + **kwargs, ): """Open an array using file-mode-like semantics. @@ -539,27 +581,27 @@ def open_array( # a : read/write if exists, create otherwise (default) if zarr_version is None and store is None: - zarr_version = getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) # handle polymorphic store arg - store = normalize_store_arg(store, storage_options=storage_options, - mode=mode, zarr_version=zarr_version) - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + store = normalize_store_arg( + store, storage_options=storage_options, mode=mode, zarr_version=zarr_version + ) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) if chunk_store is not None: - chunk_store = normalize_store_arg(chunk_store, - storage_options=storage_options, - mode=mode, - zarr_version=zarr_version) + chunk_store = normalize_store_arg( + chunk_store, storage_options=storage_options, mode=mode, zarr_version=zarr_version + ) # respect the dimension separator specified in a store, if present if dimension_separator is None: - if hasattr(store, '_dimension_separator'): + if hasattr(store, "_dimension_separator"): dimension_separator = store._dimension_separator else: - dimension_separator = '.' if zarr_version == 2 else '/' + dimension_separator = "." if zarr_version == 2 else "/" if zarr_version == 3 and path is None: - path = 'array' # TODO: raise ValueError instead? + path = "array" # TODO: raise ValueError instead? path = normalize_storage_path(path) @@ -572,48 +614,84 @@ def open_array( # ensure store is initialized - if mode in ['r', 'r+']: + if mode in ["r", "r+"]: if not contains_array(store, path=path): if contains_group(store, path=path): raise ContainsGroupError(path) raise ArrayNotFoundError(path) - elif mode == 'w': - init_array(store, shape=shape, chunks=chunks, dtype=dtype, - compressor=compressor, fill_value=fill_value, - order=order, filters=filters, overwrite=True, path=path, - object_codec=object_codec, chunk_store=chunk_store, - dimension_separator=dimension_separator) - - elif mode == 'a': + elif mode == "w": + init_array( + store, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + filters=filters, + overwrite=True, + path=path, + object_codec=object_codec, + chunk_store=chunk_store, + dimension_separator=dimension_separator, + ) + + elif mode == "a": if not contains_array(store, path=path): if contains_group(store, path=path): raise ContainsGroupError(path) - init_array(store, shape=shape, chunks=chunks, dtype=dtype, - compressor=compressor, fill_value=fill_value, - order=order, filters=filters, path=path, - object_codec=object_codec, chunk_store=chunk_store, - dimension_separator=dimension_separator) + init_array( + store, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + filters=filters, + path=path, + object_codec=object_codec, + chunk_store=chunk_store, + dimension_separator=dimension_separator, + ) - elif mode in ['w-', 'x']: + elif mode in ["w-", "x"]: if contains_group(store, path=path): raise ContainsGroupError(path) elif contains_array(store, path=path): raise ContainsArrayError(path) else: - init_array(store, shape=shape, chunks=chunks, dtype=dtype, - compressor=compressor, fill_value=fill_value, - order=order, filters=filters, path=path, - object_codec=object_codec, chunk_store=chunk_store, - dimension_separator=dimension_separator) + init_array( + store, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + filters=filters, + path=path, + object_codec=object_codec, + chunk_store=chunk_store, + dimension_separator=dimension_separator, + ) # determine read only status - read_only = mode == 'r' + read_only = mode == "r" # instantiate array - z = Array(store, read_only=read_only, synchronizer=synchronizer, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, path=path, - chunk_store=chunk_store, write_empty_chunks=write_empty_chunks, meta_array=meta_array) + z = Array( + store, + read_only=read_only, + synchronizer=synchronizer, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + path=path, + chunk_store=chunk_store, + write_empty_chunks=write_empty_chunks, + meta_array=meta_array, + ) return z @@ -622,21 +700,21 @@ def _like_args(a, kwargs): shape, chunks = _get_shape_chunks(a) if shape is not None: - kwargs.setdefault('shape', shape) + kwargs.setdefault("shape", shape) if chunks is not None: - kwargs.setdefault('chunks', chunks) + kwargs.setdefault("chunks", chunks) - if hasattr(a, 'dtype'): - kwargs.setdefault('dtype', a.dtype) + if hasattr(a, "dtype"): + kwargs.setdefault("dtype", a.dtype) if isinstance(a, Array): - kwargs.setdefault('compressor', a.compressor) - kwargs.setdefault('order', a.order) - kwargs.setdefault('filters', a.filters) - kwargs.setdefault('zarr_version', a._version) + kwargs.setdefault("compressor", a.compressor) + kwargs.setdefault("order", a.order) + kwargs.setdefault("filters", a.filters) + kwargs.setdefault("zarr_version", a._version) else: - kwargs.setdefault('compressor', 'default') - kwargs.setdefault('order', 'C') + kwargs.setdefault("compressor", "default") + kwargs.setdefault("order", "C") def empty_like(a, **kwargs): @@ -661,7 +739,7 @@ def full_like(a, **kwargs): """Create a filled array like `a`.""" _like_args(a, kwargs) if isinstance(a, Array): - kwargs.setdefault('fill_value', a.fill_value) + kwargs.setdefault("fill_value", a.fill_value) return full(**kwargs) @@ -669,5 +747,5 @@ def open_like(a, path, **kwargs): """Open a persistent array like `a`.""" _like_args(a, kwargs) if isinstance(a, Array): - kwargs.setdefault('fill_value', a.fill_value) + kwargs.setdefault("fill_value", a.fill_value) return open_array(path, **kwargs) diff --git a/zarr/errors.py b/zarr/errors.py index 808cbe99a4..30c9b13d39 100644 --- a/zarr/errors.py +++ b/zarr/errors.py @@ -67,8 +67,9 @@ def __init__(self): def err_too_many_indices(selection, shape): - raise IndexError('too many indices for array; expected {}, got {}' - .format(len(shape), len(selection))) + raise IndexError( + "too many indices for array; expected {}, got {}".format(len(shape), len(selection)) + ) class VindexInvalidSelectionError(_BaseZarrIndexError): diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 18e7ac7863..c7cc5c6fe2 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -3,12 +3,27 @@ import numpy as np -from zarr._storage.store import (_get_metadata_suffix, data_root, meta_root, - DEFAULT_ZARR_VERSION, assert_zarr_v3_api_available) +from zarr._storage.store import ( + _get_metadata_suffix, + data_root, + meta_root, + DEFAULT_ZARR_VERSION, + assert_zarr_v3_api_available, +) from zarr.attrs import Attributes from zarr.core import Array -from zarr.creation import (array, create, empty, empty_like, full, full_like, - ones, ones_like, zeros, zeros_like) +from zarr.creation import ( + array, + create, + empty, + empty_like, + full, + full_like, + ones, + ones_like, + zeros, + zeros_like, +) from zarr.errors import ( ContainsArrayError, ContainsGroupError, @@ -120,12 +135,21 @@ class Group(MutableMapping): """ - def __init__(self, store, path=None, read_only=False, chunk_store=None, - cache_attrs=True, synchronizer=None, zarr_version=None, *, - meta_array=None): + def __init__( + self, + store, + path=None, + read_only=False, + chunk_store=None, + cache_attrs=True, + synchronizer=None, + zarr_version=None, + *, + meta_array=None + ): store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) if zarr_version is None: - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) if zarr_version != 2: assert_zarr_v3_api_available() @@ -136,9 +160,9 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._chunk_store = chunk_store self._path = normalize_storage_path(path) if self._path: - self._key_prefix = self._path + '/' + self._key_prefix = self._path + "/" else: - self._key_prefix = '' + self._key_prefix = "" self._read_only = read_only self._synchronizer = synchronizer if meta_array is not None: @@ -182,8 +206,9 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, # Note: mkey doesn't actually exist for implicit groups, but the # object can still be created. akey = mkey - self._attrs = Attributes(store, key=akey, read_only=read_only, - cache=cache_attrs, synchronizer=synchronizer) + self._attrs = Attributes( + store, key=akey, read_only=read_only, cache=cache_attrs, synchronizer=synchronizer + ) # setup info self._info = InfoReporter(self) @@ -204,15 +229,15 @@ def name(self): if self._path: # follow h5py convention: add leading slash name = self._path - if name[0] != '/': - name = '/' + name + if name[0] != "/": + name = "/" + name return name - return '/' + return "/" @property def basename(self): """Final component of name.""" - return self.name.split('/')[-1] + return self.name.split("/")[-1] @property def read_only(self): @@ -252,10 +277,10 @@ def meta_array(self): def __eq__(self, other): return ( - isinstance(other, Group) and - self._store == other.store and - self._read_only == other.read_only and - self._path == other.path + isinstance(other, Group) + and self._store == other.store + and self._read_only == other.read_only + and self._path == other.path # N.B., no need to compare attributes, should be covered by # store comparison ) @@ -279,11 +304,10 @@ def __iter__(self): quux """ - if getattr(self._store, '_store_version', 2) == 2: + if getattr(self._store, "_store_version", 2) == 2: for key in sorted(listdir(self._store, self._path)): path = self._key_prefix + key - if (contains_array(self._store, path) or - contains_group(self._store, path)): + if contains_array(self._store, path) or contains_group(self._store, path): yield key else: # TODO: Should this iterate over data folders and/or metadata @@ -296,15 +320,15 @@ def __iter__(self): # yield any groups or arrays sfx = self._metadata_key_suffix for key in keys: - len_suffix = len('.group') + len(sfx) # same for .array - if key.endswith(('.group' + sfx, '.array' + sfx)): + len_suffix = len(".group") + len(sfx) # same for .array + if key.endswith((".group" + sfx, ".array" + sfx)): yield key[name_start:-len_suffix] # also yield any implicit groups for prefix in prefixes: - prefix = prefix.rstrip('/') + prefix = prefix.rstrip("/") # only implicit if there is no .group.sfx file - if not prefix + '.group' + sfx in self._store: + if prefix + ".group" + sfx not in self._store: yield prefix[name_start:] # Note: omit data/root/ to avoid duplicate listings @@ -316,12 +340,12 @@ def __len__(self): def __repr__(self): t = type(self) - r = '<{}.{}'.format(t.__module__, t.__name__) + r = "<{}.{}".format(t.__module__, t.__name__) if self.name: - r += ' %r' % self.name + r += " %r" % self.name if self._read_only: - r += ' read-only' - r += '>' + r += " read-only" + r += ">" return r def __enter__(self): @@ -333,39 +357,38 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.store.close() def info_items(self): - def typestr(o): - return '{}.{}'.format(type(o).__module__, type(o).__name__) + return "{}.{}".format(type(o).__module__, type(o).__name__) items = [] # basic info if self.name is not None: - items += [('Name', self.name)] + items += [("Name", self.name)] items += [ - ('Type', typestr(self)), - ('Read-only', str(self.read_only)), + ("Type", typestr(self)), + ("Read-only", str(self.read_only)), ] # synchronizer if self._synchronizer is not None: - items += [('Synchronizer type', typestr(self._synchronizer))] + items += [("Synchronizer type", typestr(self._synchronizer))] # storage info - items += [('Store type', typestr(self._store))] + items += [("Store type", typestr(self._store))] if self._chunk_store is not None: - items += [('Chunk store type', typestr(self._chunk_store))] + items += [("Chunk store type", typestr(self._chunk_store))] # members - items += [('No. members', len(self))] + items += [("No. members", len(self))] array_keys = sorted(self.array_keys()) group_keys = sorted(self.group_keys()) - items += [('No. arrays', len(array_keys))] - items += [('No. groups', len(group_keys))] + items += [("No. arrays", len(array_keys))] + items += [("No. groups", len(group_keys))] if array_keys: - items += [('Arrays', ', '.join(array_keys))] + items += [("Arrays", ", ".join(array_keys))] if group_keys: - items += [('Groups', ', '.join(group_keys))] + items += [("Groups", ", ".join(group_keys))] return items @@ -385,7 +408,7 @@ def __setstate__(self, state): self.__init__(**state) def _item_path(self, item): - absolute = isinstance(item, str) and item and item[0] == '/' + absolute = isinstance(item, str) and item and item[0] == "/" path = normalize_storage_path(item) if not absolute and self._path: path = self._key_prefix + path @@ -409,8 +432,9 @@ def __contains__(self, item): """ path = self._item_path(item) - return contains_array(self._store, path) or \ - contains_group(self._store, path, explicit_only=False) + return contains_array(self._store, path) or contains_group( + self._store, path, explicit_only=False + ) def __getitem__(self, item): """Obtain a group member. @@ -435,23 +459,41 @@ def __getitem__(self, item): """ path = self._item_path(item) if contains_array(self._store, path): - return Array(self._store, read_only=self._read_only, path=path, - chunk_store=self._chunk_store, - synchronizer=self._synchronizer, cache_attrs=self.attrs.cache, - zarr_version=self._version, meta_array=self._meta_array) + return Array( + self._store, + read_only=self._read_only, + path=path, + chunk_store=self._chunk_store, + synchronizer=self._synchronizer, + cache_attrs=self.attrs.cache, + zarr_version=self._version, + meta_array=self._meta_array, + ) elif contains_group(self._store, path, explicit_only=True): - return Group(self._store, read_only=self._read_only, path=path, - chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, zarr_version=self._version, - meta_array=self._meta_array) + return Group( + self._store, + read_only=self._read_only, + path=path, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version, + meta_array=self._meta_array, + ) elif self._version == 3: - implicit_group = meta_root + path + '/' + implicit_group = meta_root + path + "/" # non-empty folder in the metadata path implies an implicit group if self._store.list_prefix(implicit_group): - return Group(self._store, read_only=self._read_only, path=path, - chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, zarr_version=self._version, - meta_array=self._meta_array) + return Group( + self._store, + read_only=self._read_only, + path=path, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version, + meta_array=self._meta_array, + ) else: raise KeyError(item) else: @@ -465,8 +507,9 @@ def __delitem__(self, item): def _delitem_nosync(self, item): path = self._item_path(item) - if contains_array(self._store, path) or \ - contains_group(self._store, path, explicit_only=False): + if contains_array(self._store, path) or contains_group( + self._store, path, explicit_only=False + ): rmdir(self._store, path) else: raise KeyError(item) @@ -510,13 +553,13 @@ def group_keys(self): yield key else: dir_name = meta_root + self._path - group_sfx = '.group' + self._metadata_key_suffix + group_sfx = ".group" + self._metadata_key_suffix # The fact that we call sorted means this can't be a streaming generator. # The keys are already in memory. all_keys = sorted(listdir(self._store, dir_name)) for key in all_keys: if key.endswith(group_sfx): - key = key[:-len(group_sfx)] + key = key[: -len(group_sfx)] if key in all_keys: # otherwise we will double count this group continue @@ -555,7 +598,8 @@ def groups(self): chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, - zarr_version=self._version) + zarr_version=self._version, + ) else: for key in self.group_keys(): @@ -567,7 +611,8 @@ def groups(self): chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, - zarr_version=self._version) + zarr_version=self._version, + ) def array_keys(self, recurse=False): """Return an iterator over member names for arrays only. @@ -591,9 +636,7 @@ def array_keys(self, recurse=False): ['baz', 'quux'] """ - return self._array_iter(keys_only=True, - method='array_keys', - recurse=recurse) + return self._array_iter(keys_only=True, method="array_keys", recurse=recurse) def arrays(self, recurse=False): """Return an iterator over (name, value) pairs for arrays only. @@ -619,9 +662,7 @@ def arrays(self, recurse=False): quux """ - return self._array_iter(keys_only=False, - method='arrays', - recurse=recurse) + return self._array_iter(keys_only=False, method="arrays", recurse=recurse) def _array_iter(self, keys_only, method, recurse): if self._version == 2: @@ -635,12 +676,12 @@ def _array_iter(self, keys_only, method, recurse): yield from getattr(group, method)(recurse=recurse) else: dir_name = meta_root + self._path - array_sfx = '.array' + self._metadata_key_suffix - group_sfx = '.group' + self._metadata_key_suffix + array_sfx = ".array" + self._metadata_key_suffix + group_sfx = ".group" + self._metadata_key_suffix for key in sorted(listdir(self._store, dir_name)): if key.endswith(array_sfx): - key = key[:-len(array_sfx)] + key = key[: -len(array_sfx)] _key = key.rstrip("/") yield _key if keys_only else (_key, self[key]) @@ -794,8 +835,7 @@ def visit(self, func): return self.visitvalues(lambda o: func(o.name[base_len:].lstrip("/"))) def visitkeys(self, func): - """An alias for :py:meth:`~Group.visit`. - """ + """An alias for :py:meth:`~Group.visit`.""" return self.visit(func) @@ -924,12 +964,17 @@ def _create_group_nosync(self, name, overwrite=False): path = self._item_path(name) # create terminal group - init_group(self._store, path=path, chunk_store=self._chunk_store, - overwrite=overwrite) - - return Group(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, zarr_version=self._version) + init_group(self._store, path=path, chunk_store=self._chunk_store, overwrite=overwrite) + + return Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version, + ) def create_groups(self, *names, **kwargs): """Convenience method to create multiple groups in a single call.""" @@ -960,20 +1005,26 @@ def require_group(self, name, overwrite=False): """ - return self._write_op(self._require_group_nosync, name, - overwrite=overwrite) + return self._write_op(self._require_group_nosync, name, overwrite=overwrite) def _require_group_nosync(self, name, overwrite=False): path = self._item_path(name) # create terminal group if necessary if not contains_group(self._store, path): - init_group(store=self._store, path=path, chunk_store=self._chunk_store, - overwrite=overwrite) + init_group( + store=self._store, path=path, chunk_store=self._chunk_store, overwrite=overwrite + ) - return Group(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, zarr_version=self._version) + return Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version, + ) def require_groups(self, *names): """Convenience method to require multiple groups in a single call.""" @@ -1048,17 +1099,15 @@ def _create_dataset_nosync(self, name, data=None, **kwargs): path = self._item_path(name) # determine synchronizer - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) # create array if data is None: - a = create(store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + a = create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) else: - a = array(data, store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + a = array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) return a @@ -1084,11 +1133,11 @@ def require_dataset(self, name, shape, dtype=None, exact=False, **kwargs): """ - return self._write_op(self._require_dataset_nosync, name, shape=shape, - dtype=dtype, exact=exact, **kwargs) + return self._write_op( + self._require_dataset_nosync, name, shape=shape, dtype=dtype, exact=exact, **kwargs + ) - def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, - **kwargs): + def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, **kwargs): path = self._item_path(name) @@ -1096,31 +1145,37 @@ def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, # array already exists at path, validate that it is the right shape and type - synchronizer = kwargs.get('synchronizer', self._synchronizer) - cache_metadata = kwargs.get('cache_metadata', True) - cache_attrs = kwargs.get('cache_attrs', self.attrs.cache) - a = Array(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, synchronizer=synchronizer, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, - meta_array=self._meta_array) + synchronizer = kwargs.get("synchronizer", self._synchronizer) + cache_metadata = kwargs.get("cache_metadata", True) + cache_attrs = kwargs.get("cache_attrs", self.attrs.cache) + a = Array( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + synchronizer=synchronizer, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + meta_array=self._meta_array, + ) shape = normalize_shape(shape) if shape != a.shape: - raise TypeError('shape do not match existing array; expected {}, got {}' - .format(a.shape, shape)) + raise TypeError( + "shape do not match existing array; expected {}, got {}".format(a.shape, shape) + ) dtype = np.dtype(dtype) if exact: if dtype != a.dtype: - raise TypeError('dtypes do not match exactly; expected {}, got {}' - .format(a.dtype, dtype)) + raise TypeError( + "dtypes do not match exactly; expected {}, got {}".format(a.dtype, dtype) + ) else: if not np.can_cast(dtype, a.dtype): - raise TypeError('dtypes ({}, {}) cannot be safely cast' - .format(dtype, a.dtype)) + raise TypeError("dtypes ({}, {}) cannot be safely cast".format(dtype, a.dtype)) return a else: - return self._create_dataset_nosync(name, shape=shape, dtype=dtype, - **kwargs) + return self._create_dataset_nosync(name, shape=shape, dtype=dtype, **kwargs) def create(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -1129,10 +1184,9 @@ def create(self, name, **kwargs): def _create_nosync(self, name, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return create(store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def empty(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -1141,10 +1195,9 @@ def empty(self, name, **kwargs): def _empty_nosync(self, name, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return empty(store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return empty(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def zeros(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -1153,10 +1206,9 @@ def zeros(self, name, **kwargs): def _zeros_nosync(self, name, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return zeros(store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return zeros(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def ones(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -1165,8 +1217,8 @@ def ones(self, name, **kwargs): def _ones_nosync(self, name, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) return ones(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def full(self, name, fill_value, **kwargs): @@ -1176,10 +1228,15 @@ def full(self, name, fill_value, **kwargs): def _full_nosync(self, name, fill_value, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return full(store=self._store, path=path, chunk_store=self._chunk_store, - fill_value=fill_value, **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return full( + store=self._store, + path=path, + chunk_store=self._chunk_store, + fill_value=fill_value, + **kwargs + ) def array(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -1188,10 +1245,9 @@ def array(self, name, data, **kwargs): def _array_nosync(self, name, data, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return array(data, store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def empty_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -1200,10 +1256,11 @@ def empty_like(self, name, data, **kwargs): def _empty_like_nosync(self, name, data, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return empty_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return empty_like( + data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs + ) def zeros_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -1212,10 +1269,11 @@ def zeros_like(self, name, data, **kwargs): def _zeros_like_nosync(self, name, data, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return zeros_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return zeros_like( + data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs + ) def ones_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -1224,10 +1282,11 @@ def ones_like(self, name, data, **kwargs): def _ones_like_nosync(self, name, data, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return ones_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return ones_like( + data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs + ) def full_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -1236,10 +1295,11 @@ def full_like(self, name, data, **kwargs): def _full_like_nosync(self, name, data, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return full_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return full_like( + data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs + ) def _move_nosync(self, path, new_path): rename(self._store, path, new_path) @@ -1261,11 +1321,14 @@ def move(self, source, dest): dest = self._item_path(dest) # Check that source exists. - if not (contains_array(self._store, source) or - contains_group(self._store, source, explicit_only=False)): + if not ( + contains_array(self._store, source) + or contains_group(self._store, source, explicit_only=False) + ): raise ValueError('The source, "%s", does not exist.' % source) - if (contains_array(self._store, dest) or - contains_group(self._store, dest, explicit_only=False)): + if contains_array(self._store, dest) or contains_group( + self._store, dest, explicit_only=False + ): raise ValueError('The dest, "%s", already exists.' % dest) # Ensure groups needed for `dest` exist. @@ -1275,23 +1338,30 @@ def move(self, source, dest): self._write_op(self._move_nosync, source, dest) -def _normalize_store_arg(store, *, storage_options=None, mode="r", - zarr_version=None): +def _normalize_store_arg(store, *, storage_options=None, mode="r", zarr_version=None): if zarr_version is None: - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) if zarr_version != 2: assert_zarr_v3_api_available() if store is None: return MemoryStore() if zarr_version == 2 else MemoryStoreV3() - return normalize_store_arg(store, - storage_options=storage_options, mode=mode, - zarr_version=zarr_version) - - -def group(store=None, overwrite=False, chunk_store=None, - cache_attrs=True, synchronizer=None, path=None, *, zarr_version=None): + return normalize_store_arg( + store, storage_options=storage_options, mode=mode, zarr_version=zarr_version + ) + + +def group( + store=None, + overwrite=False, + chunk_store=None, + cache_attrs=True, + synchronizer=None, + path=None, + *, + zarr_version=None +): """Create a group. Parameters @@ -1336,9 +1406,9 @@ def group(store=None, overwrite=False, chunk_store=None, """ # handle polymorphic store arg - store = _normalize_store_arg(store, zarr_version=zarr_version, mode='w') + store = _normalize_store_arg(store, zarr_version=zarr_version, mode="w") if zarr_version is None: - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) if zarr_version != 2: assert_zarr_v3_api_available() @@ -1352,16 +1422,31 @@ def group(store=None, overwrite=False, chunk_store=None, requires_init = overwrite or not contains_group(store, path) if requires_init: - init_group(store, overwrite=overwrite, chunk_store=chunk_store, - path=path) - - return Group(store, read_only=False, chunk_store=chunk_store, - cache_attrs=cache_attrs, synchronizer=synchronizer, path=path, - zarr_version=zarr_version) - - -def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=None, - chunk_store=None, storage_options=None, *, zarr_version=None, meta_array=None): + init_group(store, overwrite=overwrite, chunk_store=chunk_store, path=path) + + return Group( + store, + read_only=False, + chunk_store=chunk_store, + cache_attrs=cache_attrs, + synchronizer=synchronizer, + path=path, + zarr_version=zarr_version, + ) + + +def open_group( + store=None, + mode="a", + cache_attrs=True, + synchronizer=None, + path=None, + chunk_store=None, + storage_options=None, + *, + zarr_version=None, + meta_array=None +): """Open a group using file-mode-like semantics. Parameters @@ -1414,44 +1499,41 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N # handle polymorphic store arg store = _normalize_store_arg( - store, storage_options=storage_options, mode=mode, - zarr_version=zarr_version) + store, storage_options=storage_options, mode=mode, zarr_version=zarr_version + ) if zarr_version is None: - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) if zarr_version != 2: assert_zarr_v3_api_available() if chunk_store is not None: - chunk_store = _normalize_store_arg(chunk_store, - storage_options=storage_options, - mode=mode, - zarr_version=zarr_version) - if getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) != zarr_version: - raise ValueError( # pragma: no cover - "zarr_version of store and chunk_store must match" - ) + chunk_store = _normalize_store_arg( + chunk_store, storage_options=storage_options, mode=mode, zarr_version=zarr_version + ) + if getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) != zarr_version: + raise ValueError("zarr_version of store and chunk_store must match") # pragma: no cover path = normalize_storage_path(path) # ensure store is initialized - if mode in ['r', 'r+']: + if mode in ["r", "r+"]: if not contains_group(store, path=path): if contains_array(store, path=path): raise ContainsArrayError(path) raise GroupNotFoundError(path) - elif mode == 'w': + elif mode == "w": init_group(store, overwrite=True, path=path, chunk_store=chunk_store) - elif mode == 'a': + elif mode == "a": if not contains_group(store, path=path): if contains_array(store, path=path): raise ContainsArrayError(path) init_group(store, path=path, chunk_store=chunk_store) - elif mode in ['w-', 'x']: + elif mode in ["w-", "x"]: if contains_array(store, path=path): raise ContainsArrayError(path) elif contains_group(store, path=path): @@ -1460,8 +1542,15 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N init_group(store, path=path, chunk_store=chunk_store) # determine read only status - read_only = mode == 'r' - - return Group(store, read_only=read_only, cache_attrs=cache_attrs, - synchronizer=synchronizer, path=path, chunk_store=chunk_store, - zarr_version=zarr_version, meta_array=meta_array) + read_only = mode == "r" + + return Group( + store, + read_only=read_only, + cache_attrs=cache_attrs, + synchronizer=synchronizer, + path=path, + chunk_store=chunk_store, + zarr_version=zarr_version, + meta_array=meta_array, + ) diff --git a/zarr/indexing.py b/zarr/indexing.py index bc2afba992..487cc8b9d9 100644 --- a/zarr/indexing.py +++ b/zarr/indexing.py @@ -34,17 +34,14 @@ def is_integer_list(x): def is_integer_array(x, ndim=None): - t = not np.isscalar(x) and \ - hasattr(x, 'shape') and \ - hasattr(x, 'dtype') and \ - x.dtype.kind in 'ui' + t = not np.isscalar(x) and hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype.kind in "ui" if ndim is not None: t = t and len(x.shape) == ndim return t def is_bool_array(x, ndim=None): - t = hasattr(x, 'shape') and hasattr(x, 'dtype') and x.dtype == bool + t = hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype == bool if ndim is not None: t = t and len(x.shape) == ndim return t @@ -80,24 +77,15 @@ def is_pure_fancy_indexing(selection, ndim): no_slicing = ( isinstance(selection, tuple) and len(selection) == ndim - and not ( - any(isinstance(elem, slice) or elem is Ellipsis - for elem in selection) - ) + and not (any(isinstance(elem, slice) or elem is Ellipsis for elem in selection)) ) return ( - no_slicing and - all( - is_integer(elem) - or is_integer_list(elem) - or is_integer_array(elem) - for elem in selection - ) and - any( - is_integer_list(elem) - or is_integer_array(elem) + no_slicing + and all( + is_integer(elem) or is_integer_list(elem) or is_integer_array(elem) for elem in selection ) + and any(is_integer_list(elem) or is_integer_array(elem) for elem in selection) ) @@ -112,12 +100,13 @@ def is_pure_orthogonal_indexing(selection, ndim): # Case two: selection contains either zero or one integer iterables. # All other selection elements are slices or integers return ( - isinstance(selection, tuple) and len(selection) == ndim and - sum(is_integer_list(elem) or is_integer_array(elem) for elem in selection) <= 1 and - all( - is_integer_list(elem) or is_integer_array(elem) - or isinstance(elem, (int, slice)) for - elem in selection) + isinstance(selection, tuple) + and len(selection) == ndim + and sum(is_integer_list(elem) or is_integer_array(elem) for elem in selection) <= 1 + and all( + is_integer_list(elem) or is_integer_array(elem) or isinstance(elem, (int, slice)) + for elem in selection + ) ) @@ -138,8 +127,7 @@ def normalize_integer_selection(dim_sel, dim_len): ChunkDimProjection = collections.namedtuple( - 'ChunkDimProjection', - ('dim_chunk_ix', 'dim_chunk_sel', 'dim_out_sel') + "ChunkDimProjection", ("dim_chunk_ix", "dim_chunk_sel", "dim_out_sel") ) """A mapping from chunk to output array for a single dimension. @@ -156,7 +144,6 @@ def normalize_integer_selection(dim_sel, dim_len): class IntDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): # normalize @@ -181,7 +168,6 @@ def ceildiv(a, b): class SliceDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): # normalize @@ -234,8 +220,7 @@ def __iter__(self): dim_chunk_sel_stop = self.stop - dim_offset dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, self.step) - dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), - self.step) + dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), self.step) # If there are no elements on the selection within this chunk, then skip if dim_chunk_nitems == 0: @@ -291,8 +276,7 @@ def replace_ellipsis(selection, shape): def replace_lists(selection): return tuple( - np.asarray(dim_sel) if isinstance(dim_sel, list) else dim_sel - for dim_sel in selection + np.asarray(dim_sel) if isinstance(dim_sel, list) else dim_sel for dim_sel in selection ) @@ -303,8 +287,7 @@ def ensure_tuple(v): ChunkProjection = collections.namedtuple( - 'ChunkProjection', - ('chunk_coords', 'chunk_selection', 'out_selection') + "ChunkProjection", ("chunk_coords", "chunk_selection", "out_selection") ) """A mapping of items from chunk to output array. Can be used to extract items from the chunk array for loading into an output array. Can also be used to extract items from a @@ -336,10 +319,7 @@ def is_positive_slice(s): def is_contiguous_selection(selection): selection = ensure_tuple(selection) - return all( - (is_integer_array(s) or is_contiguous_slice(s) or s == Ellipsis) - for s in selection - ) + return all((is_integer_array(s) or is_contiguous_slice(s) or s == Ellipsis) for s in selection) def is_basic_selection(selection): @@ -349,7 +329,6 @@ def is_basic_selection(selection): # noinspection PyProtectedMember class BasicIndexer: - def __init__(self, selection, array): # handle ellipsis @@ -357,8 +336,7 @@ def __init__(self, selection, array): # setup per-dimension indexers dim_indexers = [] - for dim_sel, dim_len, dim_chunk_len in \ - zip(selection, array._shape, array._chunks): + for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) @@ -367,15 +345,15 @@ def __init__(self, selection, array): dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) else: - raise IndexError('unsupported selection item for basic indexing; ' - 'expected integer or slice, got {!r}' - .format(type(dim_sel))) + raise IndexError( + "unsupported selection item for basic indexing; " + "expected integer or slice, got {!r}".format(type(dim_sel)) + ) dim_indexers.append(dim_indexer) self.dim_indexers = dim_indexers - self.shape = tuple(s.nitems for s in self.dim_indexers - if not isinstance(s, IntDimIndexer)) + self.shape = tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)) self.drop_axes = None def __iter__(self): @@ -383,25 +361,28 @@ def __iter__(self): chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple(p.dim_out_sel for p in dim_projections - if p.dim_out_sel is not None) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) yield ChunkProjection(chunk_coords, chunk_selection, out_selection) class BoolArrayDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): # check number of dimensions if not is_bool_array(dim_sel, 1): - raise IndexError('Boolean arrays in an orthogonal selection must ' - 'be 1-dimensional only') + raise IndexError( + "Boolean arrays in an orthogonal selection must " "be 1-dimensional only" + ) # check shape if dim_sel.shape[0] != dim_len: - raise IndexError('Boolean array has the wrong length for dimension; ' - 'expected {}, got {}'.format(dim_len, dim_sel.shape[0])) + raise IndexError( + "Boolean array has the wrong length for dimension; " + "expected {}, got {}".format(dim_len, dim_sel.shape[0]) + ) # store attributes self.dim_sel = dim_sel @@ -410,11 +391,11 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) # precompute number of selected items for each chunk - self.chunk_nitems = np.zeros(self.nchunks, dtype='i8') + self.chunk_nitems = np.zeros(self.nchunks, dtype="i8") for dim_chunk_ix in range(self.nchunks): dim_offset = dim_chunk_ix * self.dim_chunk_len self.chunk_nitems[dim_chunk_ix] = np.count_nonzero( - self.dim_sel[dim_offset:dim_offset + self.dim_chunk_len] + self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] ) self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) self.nitems = self.chunk_nitems_cumsum[-1] @@ -427,12 +408,12 @@ def __iter__(self): # find region in chunk dim_offset = dim_chunk_ix * self.dim_chunk_len - dim_chunk_sel = self.dim_sel[dim_offset:dim_offset + self.dim_chunk_len] + dim_chunk_sel = self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] # pad out if final chunk if dim_chunk_sel.shape[0] < self.dim_chunk_len: tmp = np.zeros(self.dim_chunk_len, dtype=bool) - tmp[:dim_chunk_sel.shape[0]] = dim_chunk_sel + tmp[: dim_chunk_sel.shape[0]] = dim_chunk_sel dim_chunk_sel = tmp # find region in output @@ -482,14 +463,22 @@ def boundscheck_indices(x, dim_len): class IntArrayDimIndexer: """Integer array selection against a single dimension.""" - def __init__(self, dim_sel, dim_len, dim_chunk_len, wraparound=True, boundscheck=True, - order=Order.UNKNOWN): + def __init__( + self, + dim_sel, + dim_len, + dim_chunk_len, + wraparound=True, + boundscheck=True, + order=Order.UNKNOWN, + ): # ensure 1d array dim_sel = np.asanyarray(dim_sel) if not is_integer_array(dim_sel, 1): - raise IndexError('integer arrays in an orthogonal selection must be ' - '1-dimensional only') + raise IndexError( + "integer arrays in an orthogonal selection must be " "1-dimensional only" + ) # handle wraparound if wraparound: @@ -570,10 +559,14 @@ def ix_(selection, shape): selection = replace_ellipsis(selection, shape) # replace slice and int as these are not supported by numpy.ix_ - selection = [slice_to_range(dim_sel, dim_len) if isinstance(dim_sel, slice) - else [dim_sel] if is_integer(dim_sel) - else dim_sel - for dim_sel, dim_len in zip(selection, shape)] + selection = [ + slice_to_range(dim_sel, dim_len) + if isinstance(dim_sel, slice) + else [dim_sel] + if is_integer(dim_sel) + else dim_sel + for dim_sel, dim_len in zip(selection, shape) + ] # now get numpy to convert to a coordinate selection selection = np.ix_(*selection) @@ -608,7 +601,6 @@ def oindex_set(a, selection, value): # noinspection PyProtectedMember class OrthogonalIndexer: - def __init__(self, selection, array): # handle ellipsis @@ -619,8 +611,7 @@ def __init__(self, selection, array): # setup per-dimension indexers dim_indexers = [] - for dim_sel, dim_len, dim_chunk_len in \ - zip(selection, array._shape, array._chunks): + for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) @@ -635,21 +626,24 @@ def __init__(self, selection, array): dim_indexer = BoolArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) else: - raise IndexError('unsupported selection item for orthogonal indexing; ' - 'expected integer, slice, integer array or Boolean ' - 'array, got {!r}' - .format(type(dim_sel))) + raise IndexError( + "unsupported selection item for orthogonal indexing; " + "expected integer, slice, integer array or Boolean " + "array, got {!r}".format(type(dim_sel)) + ) dim_indexers.append(dim_indexer) self.array = array self.dim_indexers = dim_indexers - self.shape = tuple(s.nitems for s in self.dim_indexers - if not isinstance(s, IntDimIndexer)) + self.shape = tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)) self.is_advanced = not is_basic_selection(selection) if self.is_advanced: - self.drop_axes = tuple(i for i, dim_indexer in enumerate(self.dim_indexers) - if isinstance(dim_indexer, IntDimIndexer)) + self.drop_axes = tuple( + i + for i, dim_indexer in enumerate(self.dim_indexers) + if isinstance(dim_indexer, IntDimIndexer) + ) else: self.drop_axes = None @@ -658,8 +652,9 @@ def __iter__(self): chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple(p.dim_out_sel for p in dim_projections - if p.dim_out_sel is not None) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) # handle advanced indexing arrays orthogonally if self.is_advanced: @@ -678,7 +673,6 @@ def __iter__(self): class OIndex: - def __init__(self, array): self.array = array @@ -697,7 +691,6 @@ def __setitem__(self, selection, value): # noinspection PyProtectedMember class BlockIndexer: - def __init__(self, selection, array): # handle ellipsis @@ -708,8 +701,7 @@ def __init__(self, selection, array): # setup per-dimension indexers dim_indexers = [] - for dim_sel, dim_len, dim_chunk_size in \ - zip(selection, array._shape, array._chunks): + for dim_sel, dim_len, dim_chunk_size in zip(selection, array._shape, array._chunks): dim_numchunks = int(np.ceil(dim_len / dim_chunk_size)) if is_integer(dim_sel): @@ -725,9 +717,10 @@ def __init__(self, selection, array): stop = dim_sel.stop if dim_sel.stop is not None else dim_numchunks if dim_sel.step not in {1, None}: - raise IndexError('unsupported selection item for block indexing; ' - 'expected integer or slice with step=1, got {!r}' - .format(type(dim_sel))) + raise IndexError( + "unsupported selection item for block indexing; " + "expected integer or slice with step=1, got {!r}".format(type(dim_sel)) + ) # Can't reuse wraparound_indices because it expects a numpy array # We have integers here. @@ -741,9 +734,10 @@ def __init__(self, selection, array): slice_ = slice(start, stop) else: - raise IndexError('unsupported selection item for block indexing; ' - 'expected integer or slice, got {!r}' - .format(type(dim_sel))) + raise IndexError( + "unsupported selection item for block indexing; " + "expected integer or slice, got {!r}".format(type(dim_sel)) + ) dim_indexer = SliceDimIndexer(slice_, dim_len, dim_chunk_size) dim_indexers.append(dim_indexer) @@ -759,14 +753,14 @@ def __iter__(self): for dim_projections in itertools.product(*self.dim_indexers): chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple(p.dim_out_sel for p in dim_projections - if p.dim_out_sel is not None) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) yield ChunkProjection(chunk_coords, chunk_selection, out_selection) class BlockIndex: - def __init__(self, array): self.array = array @@ -785,25 +779,20 @@ def __setitem__(self, selection, value): # noinspection PyProtectedMember def is_coordinate_selection(selection, array): - return ( - (len(selection) == len(array._shape)) and - all(is_integer(dim_sel) or is_integer_array(dim_sel) - for dim_sel in selection) + return (len(selection) == len(array._shape)) and all( + is_integer(dim_sel) or is_integer_array(dim_sel) for dim_sel in selection ) # noinspection PyProtectedMember def is_mask_selection(selection, array): return ( - len(selection) == 1 and - is_bool_array(selection[0]) and - selection[0].shape == array._shape + len(selection) == 1 and is_bool_array(selection[0]) and selection[0].shape == array._shape ) # noinspection PyProtectedMember class CoordinateIndexer: - def __init__(self, selection, array): # some initial normalization @@ -813,9 +802,11 @@ def __init__(self, selection, array): # validation if not is_coordinate_selection(selection, array): - raise IndexError('invalid coordinate selection; expected one integer ' - '(coordinate) array per dimension of the target array, ' - 'got {!r}'.format(selection)) + raise IndexError( + "invalid coordinate selection; expected one integer " + "(coordinate) array per dimension of the target array, " + "got {!r}".format(selection) + ) # handle wraparound, boundscheck for dim_sel, dim_len in zip(selection, array.shape): @@ -828,8 +819,7 @@ def __init__(self, selection, array): # compute chunk index for each point in the selection chunks_multi_index = tuple( - dim_sel // dim_chunk_len - for (dim_sel, dim_chunk_len) in zip(selection, array._chunks) + dim_sel // dim_chunk_len for (dim_sel, dim_chunk_len) in zip(selection, array._chunks) ) # broadcast selection - this will raise error if array dimensions don't match @@ -844,8 +834,7 @@ def __init__(self, selection, array): chunks_multi_index = [dim_chunks.reshape(-1) for dim_chunks in chunks_multi_index] # ravel chunk indices - chunks_raveled_indices = np.ravel_multi_index(chunks_multi_index, - dims=array._cdata_shape) + chunks_raveled_indices = np.ravel_multi_index(chunks_multi_index, dims=array._cdata_shape) # group points by chunk if np.any(np.diff(chunks_raveled_indices) < 0): @@ -901,7 +890,6 @@ def __iter__(self): # noinspection PyProtectedMember class MaskIndexer(CoordinateIndexer): - def __init__(self, selection, array): # some initial normalization @@ -910,9 +898,10 @@ def __init__(self, selection, array): # validation if not is_mask_selection(selection, array): - raise IndexError('invalid mask selection; expected one Boolean (mask)' - 'array with the same shape as the target array, got {!r}' - .format(selection)) + raise IndexError( + "invalid mask selection; expected one Boolean (mask)" + "array with the same shape as the target array, got {!r}".format(selection) + ) # convert to indices selection = np.nonzero(selection[0]) @@ -922,7 +911,6 @@ def __init__(self, selection, array): class VIndex: - def __init__(self, array): self.array = array @@ -955,8 +943,10 @@ def check_fields(fields, dtype): return dtype # check type if not isinstance(fields, (str, list, tuple)): - raise IndexError("'fields' argument must be a string or list of strings; found " - "{!r}".format(type(fields))) + raise IndexError( + "'fields' argument must be a string or list of strings; found " + "{!r}".format(type(fields)) + ) if fields: if dtype.names is None: raise IndexError("invalid 'fields' argument, array does not have any fields") @@ -980,7 +970,7 @@ def check_no_multi_fields(fields): if len(fields) == 1: return fields[0] elif len(fields) > 1: - raise IndexError('multiple fields are not supported for this operation') + raise IndexError("multiple fields are not supported for this operation") return fields @@ -1009,11 +999,7 @@ def make_slice_selection(selection): ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) elif isinstance(dim_selection, np.ndarray): if len(dim_selection) == 1: - ls.append( - slice( - int(dim_selection[0]), int(dim_selection[0]) + 1, 1 - ) - ) + ls.append(slice(int(dim_selection[0]), int(dim_selection[0]) + 1, 1)) else: raise ArrayIndexError() else: @@ -1108,10 +1094,10 @@ def __init__(self, selection, arr_shape): def __iter__(self): chunk1 = self.chunk_loc_slices[0] nitems = (chunk1[-1].stop - chunk1[-1].start) * np.prod( - self.arr_shape[len(chunk1):], dtype=int + self.arr_shape[len(chunk1) :], dtype=int ) for partial_out_selection in self.chunk_loc_slices: start = 0 for i, sl in enumerate(partial_out_selection): - start += sl.start * np.prod(self.arr_shape[i + 1:], dtype=int) + start += sl.start * np.prod(self.arr_shape[i + 1 :], dtype=int) yield start, nitems, partial_out_selection diff --git a/zarr/meta.py b/zarr/meta.py index aacffd7f77..48791ddf17 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -27,15 +27,11 @@ "extensions": [], } -_v3_core_types = set( - "".join(d) for d in itertools.product("<>", ("u", "i", "f"), ("2", "4", "8")) -) +_v3_core_types = set("".join(d) for d in itertools.product("<>", ("u", "i", "f"), ("2", "4", "8"))) _v3_core_types = {"bool", "i1", "u1"} | _v3_core_types # The set of complex types allowed ({"c8", ">c16"}) -_v3_complex_types = set( - f"{end}c{_bytes}" for end, _bytes in itertools.product("<>", ("8", "16")) -) +_v3_complex_types = set(f"{end}c{_bytes}" for end, _bytes in itertools.product("<>", ("8", "16"))) # All dtype.str values corresponding to datetime64 and timedelta64 # see: https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units @@ -43,7 +39,7 @@ _time_units = ["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"] _v3_datetime_types = set( f"{end}{kind}8[{unit}]" - for end, unit, kind in itertools.product("<>", _date_units + _time_units, ('m', 'M')) + for end, unit, kind in itertools.product("<>", _date_units + _time_units, ("m", "M")) ) @@ -217,9 +213,7 @@ def encode_group_metadata(cls, meta=None) -> bytes: return json_dumps(meta) @classmethod - def decode_fill_value( - cls, v: Any, dtype: np.dtype, object_codec: Any = None - ) -> Any: + def decode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: # early out if v is None: return v @@ -267,9 +261,7 @@ def decode_fill_value( return np.array(v, dtype=dtype)[()] @classmethod - def encode_fill_value( - cls, v: Any, dtype: np.dtype, object_codec: Any = None - ) -> Any: + def encode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: # early out if v is None: return v @@ -318,11 +310,9 @@ def decode_dtype(cls, d, validate=True): if isinstance(d, dict): # extract the type from the extension info try: - d = d['type'] + d = d["type"] except KeyError: - raise KeyError( - "Extended dtype info must provide a key named 'type'." - ) + raise KeyError("Extended dtype info must provide a key named 'type'.") d = cls._decode_dtype_descr(d) dtype = np.dtype(d) if validate: @@ -389,9 +379,7 @@ def encode_hierarchy_metadata(cls, meta=None) -> bytes: return json_dumps(meta) @classmethod - def decode_hierarchy_metadata( - cls, s: Union[MappingType, bytes, str] - ) -> MappingType[str, Any]: + def decode_hierarchy_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: meta = cls.parse_metadata(s) # check metadata format # zarr_format = meta.get("zarr_format", None) @@ -414,7 +402,7 @@ def _encode_codec_metadata(cls, codec: Codec) -> Optional[Mapping]: # only support gzip for now config = codec.get_config() del config["id"] - uri = 'https://purl.org/zarr/spec/codec/' + uri = "https://purl.org/zarr/spec/codec/" if isinstance(codec, numcodecs.GZip): uri = uri + "gzip/1.0" elif isinstance(codec, numcodecs.Zlib): @@ -438,19 +426,19 @@ def _decode_codec_metadata(cls, meta: Optional[Mapping]) -> Optional[Codec]: if meta is None: return None - uri = 'https://purl.org/zarr/spec/codec/' - conf = meta['configuration'] - if meta['codec'].startswith(uri + 'gzip/'): + uri = "https://purl.org/zarr/spec/codec/" + conf = meta["configuration"] + if meta["codec"].startswith(uri + "gzip/"): conf["id"] = "gzip" - elif meta['codec'].startswith(uri + 'zlib/'): + elif meta["codec"].startswith(uri + "zlib/"): conf["id"] = "zlib" - elif meta['codec'].startswith(uri + 'blosc/'): + elif meta["codec"].startswith(uri + "blosc/"): conf["id"] = "blosc" - elif meta['codec'].startswith(uri + 'bz2/'): + elif meta["codec"].startswith(uri + "bz2/"): conf["id"] = "bz2" - elif meta['codec'].startswith(uri + 'lz4/'): + elif meta["codec"].startswith(uri + "lz4/"): conf["id"] = "lz4" - elif meta['codec'].startswith(uri + 'lzma/'): + elif meta["codec"].startswith(uri + "lzma/"): conf["id"] = "lzma" else: raise NotImplementedError @@ -461,8 +449,7 @@ def _decode_codec_metadata(cls, meta: Optional[Mapping]) -> Optional[Codec]: @classmethod def _encode_storage_transformer_metadata( - cls, - storage_transformer: "StorageTransformer" + cls, storage_transformer: "StorageTransformer" ) -> Optional[Mapping]: return { "extension": storage_transformer.extension_uri, @@ -478,9 +465,9 @@ def _decode_storage_transformer_metadata(cls, meta: Mapping) -> "StorageTransfor # This might be changed to a proper registry in the future KNOWN_STORAGE_TRANSFORMERS = [DummyStorageTransfomer, ShardingStorageTransformer] - conf = meta.get('configuration', {}) - extension_uri = meta['extension'] - transformer_type = meta['type'] + conf = meta.get("configuration", {}) + extension_uri = meta["extension"] + transformer_type = meta["type"] for StorageTransformerCls in KNOWN_STORAGE_TRANSFORMERS: if StorageTransformerCls.extension_uri == extension_uri: @@ -527,9 +514,9 @@ def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType ) # compressor field should be absent when there is no compression if compressor: - meta['compressor'] = compressor + meta["compressor"] = compressor if storage_transformers: - meta['storage_transformers'] = storage_transformers + meta["storage_transformers"] = storage_transformers except Exception as e: raise MetadataError("error decoding metadata: %s" % e) diff --git a/zarr/meta_v1.py b/zarr/meta_v1.py index bc4ae12228..4ac381f2ca 100644 --- a/zarr/meta_v1.py +++ b/zarr/meta_v1.py @@ -6,24 +6,24 @@ def decode_metadata(b): - s = str(b, 'ascii') + s = str(b, "ascii") meta = json.loads(s) - zarr_format = meta.get('zarr_format', None) + zarr_format = meta.get("zarr_format", None) if zarr_format != 1: - raise MetadataError('unsupported zarr format: %s' % zarr_format) + raise MetadataError("unsupported zarr format: %s" % zarr_format) try: meta = dict( - zarr_format=meta['zarr_format'], - shape=tuple(meta['shape']), - chunks=tuple(meta['chunks']), - dtype=decode_dtype(meta['dtype']), - compression=meta['compression'], - compression_opts=meta['compression_opts'], - fill_value=meta['fill_value'], - order=meta['order'], + zarr_format=meta["zarr_format"], + shape=tuple(meta["shape"]), + chunks=tuple(meta["chunks"]), + dtype=decode_dtype(meta["dtype"]), + compression=meta["compression"], + compression_opts=meta["compression_opts"], + fill_value=meta["fill_value"], + order=meta["order"], ) except Exception as e: - raise MetadataError('error decoding metadata: %s' % e) + raise MetadataError("error decoding metadata: %s" % e) else: return meta @@ -31,16 +31,16 @@ def decode_metadata(b): def encode_metadata(meta): meta = dict( zarr_format=1, - shape=meta['shape'], - chunks=meta['chunks'], - dtype=encode_dtype(meta['dtype']), - compression=meta['compression'], - compression_opts=meta['compression_opts'], - fill_value=meta['fill_value'], - order=meta['order'], + shape=meta["shape"], + chunks=meta["chunks"], + dtype=encode_dtype(meta["dtype"]), + compression=meta["compression"], + compression_opts=meta["compression_opts"], + fill_value=meta["fill_value"], + order=meta["order"], ) s = json.dumps(meta, indent=4, sort_keys=True, ensure_ascii=True) - b = s.encode('ascii') + b = s.encode("ascii") return b diff --git a/zarr/n5.py b/zarr/n5.py index 1eb6ef2b33..7e73905527 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -18,16 +18,16 @@ from .storage import attrs_key as zarr_attrs_key from .storage import group_meta_key as zarr_group_meta_key -N5_FORMAT = '2.0.0' +N5_FORMAT = "2.0.0" zarr_to_n5_keys = [ - ('chunks', 'blockSize'), - ('dtype', 'dataType'), - ('compressor', 'compression'), - ('shape', 'dimensions') + ("chunks", "blockSize"), + ("dtype", "dataType"), + ("compressor", "compression"), + ("shape", "dimensions"), ] -n5_attrs_key = 'attributes.json' -n5_keywords = ['n5', 'dataType', 'dimensions', 'blockSize', 'compression'] +n5_attrs_key = "attributes.json" +n5_keywords = ["n5", "dataType", "dimensions", "blockSize", "compression"] class N5Store(NestedDirectoryStore): @@ -173,13 +173,13 @@ def __contains__(self, key): if key_new not in self: return False # group if not a dataset (attributes do not contain 'dimensions') - return 'dimensions' not in self._load_n5_attrs(key_new) + return "dimensions" not in self._load_n5_attrs(key_new) elif key.endswith(zarr_array_meta_key): key_new = key.replace(zarr_array_meta_key, n5_attrs_key) # array if attributes contain 'dimensions' - return 'dimensions' in self._load_n5_attrs(key_new) + return "dimensions" in self._load_n5_attrs(key_new) elif key.endswith(zarr_attrs_key): @@ -195,10 +195,7 @@ def __contains__(self, key): return super().__contains__(key_new) def __eq__(self, other): - return ( - isinstance(other, N5Store) and - self.path == other.path - ) + return isinstance(other, N5Store) and self.path == other.path def listdir(self, path: Optional[str] = None): @@ -229,7 +226,7 @@ def listdir(self, path: Optional[str] = None): for file_name in file_names: file_path = os.path.join(dir_path, file_name) rel_path = file_path.split(root_path + os.path.sep)[1] - new_child = rel_path.replace(os.path.sep, '.') + new_child = rel_path.replace(os.path.sep, ".") new_children.append(invert_chunk_coords(new_child)) else: new_children.append(entry) @@ -265,7 +262,7 @@ def _is_group(self, path: str): attrs_key = os.path.join(path, n5_attrs_key) n5_attrs = self._load_n5_attrs(attrs_key) - return len(n5_attrs) > 0 and 'dimensions' not in n5_attrs + return len(n5_attrs) > 0 and "dimensions" not in n5_attrs def _is_array(self, path: str): @@ -274,7 +271,7 @@ def _is_array(self, path: str): else: attrs_key = os.path.join(path, n5_attrs_key) - return 'dimensions' in self._load_n5_attrs(attrs_key) + return "dimensions" in self._load_n5_attrs(attrs_key) def _contains_attrs(self, path: str): @@ -340,27 +337,28 @@ class N5FSStore(FSStore): dimensions, hence the Zarr arrays targeting N5 have the deceptive "." dimension separator. """ - _array_meta_key = 'attributes.json' - _group_meta_key = 'attributes.json' - _attrs_key = 'attributes.json' + + _array_meta_key = "attributes.json" + _group_meta_key = "attributes.json" + _attrs_key = "attributes.json" def __init__(self, *args, **kwargs): - if 'dimension_separator' in kwargs: - kwargs.pop('dimension_separator') - warnings.warn('Keyword argument `dimension_separator` will be ignored') + if "dimension_separator" in kwargs: + kwargs.pop("dimension_separator") + warnings.warn("Keyword argument `dimension_separator` will be ignored") dimension_separator = "." super().__init__(*args, dimension_separator=dimension_separator, **kwargs) @staticmethod def _swap_separator(key: str): - segments = list(key.split('/')) + segments = list(key.split("/")) if segments: last_segment = segments[-1] if _prog_ckey.match(last_segment): - coords = list(last_segment.split('.')) - last_segment = '/'.join(coords[::-1]) + coords = list(last_segment.split(".")) + last_segment = "/".join(coords[::-1]) segments = segments[:-1] + [last_segment] - key = '/'.join(segments) + key = "/".join(segments) return key def _normalize_key(self, key: str): @@ -527,7 +525,7 @@ def listdir(self, path: Optional[str] = None): for file_name in self.fs.find(entry_path): file_path = os.path.join(root_path, file_name) rel_path = file_path.split(root_path)[1] - new_child = rel_path.lstrip('/').replace('/', ".") + new_child = rel_path.lstrip("/").replace("/", ".") new_children.append(invert_chunk_coords(new_child)) else: new_children.append(entry) @@ -586,7 +584,7 @@ def _contains_attrs(self, path: Optional[str]): def is_chunk_key(key: str): rv = False - segments = list(key.split('/')) + segments = list(key.split("/")) if segments: last_segment = segments[-1] rv = bool(_prog_ckey.match(last_segment)) @@ -594,118 +592,116 @@ def is_chunk_key(key: str): def invert_chunk_coords(key: str): - segments = list(key.split('/')) + segments = list(key.split("/")) if segments: last_segment = segments[-1] if _prog_ckey.match(last_segment): - coords = list(last_segment.split('.')) - last_segment = '/'.join(coords[::-1]) + coords = list(last_segment.split(".")) + last_segment = "/".join(coords[::-1]) segments = segments[:-1] + [last_segment] - key = '/'.join(segments) + key = "/".join(segments) return key def group_metadata_to_n5(group_metadata: Dict[str, Any]) -> Dict[str, Any]: - '''Convert group metadata from zarr to N5 format.''' - del group_metadata['zarr_format'] + """Convert group metadata from zarr to N5 format.""" + del group_metadata["zarr_format"] # TODO: This should only exist at the top-level - group_metadata['n5'] = N5_FORMAT + group_metadata["n5"] = N5_FORMAT return group_metadata def group_metadata_to_zarr(group_metadata: Dict[str, Any]) -> Dict[str, Any]: - '''Convert group metadata from N5 to zarr format.''' + """Convert group metadata from N5 to zarr format.""" # This only exists at the top level - group_metadata.pop('n5', None) - group_metadata['zarr_format'] = ZARR_FORMAT + group_metadata.pop("n5", None) + group_metadata["zarr_format"] = ZARR_FORMAT return group_metadata def array_metadata_to_n5(array_metadata: Dict[str, Any], top_level=False) -> Dict[str, Any]: - '''Convert array metadata from zarr to N5 format. If the `top_level` keyword argument is True, - then the `N5` : N5_FORMAT key : value pair will be inserted into the metadata.''' + """Convert array metadata from zarr to N5 format. If the `top_level` keyword argument is True, + then the `N5` : N5_FORMAT key : value pair will be inserted into the metadata.""" for f, t in zarr_to_n5_keys: array_metadata[t] = array_metadata.pop(f) - del array_metadata['zarr_format'] + del array_metadata["zarr_format"] if top_level: - array_metadata['n5'] = N5_FORMAT + array_metadata["n5"] = N5_FORMAT try: - dtype = np.dtype(array_metadata['dataType']) + dtype = np.dtype(array_metadata["dataType"]) except TypeError: - raise TypeError( - f"Data type {array_metadata['dataType']} is not supported by N5") + raise TypeError(f"Data type {array_metadata['dataType']} is not supported by N5") - array_metadata['dataType'] = dtype.name - array_metadata['dimensions'] = array_metadata['dimensions'][::-1] - array_metadata['blockSize'] = array_metadata['blockSize'][::-1] + array_metadata["dataType"] = dtype.name + array_metadata["dimensions"] = array_metadata["dimensions"][::-1] + array_metadata["blockSize"] = array_metadata["blockSize"][::-1] - if 'fill_value' in array_metadata: - if array_metadata['fill_value'] != 0 and array_metadata['fill_value'] is not None: + if "fill_value" in array_metadata: + if array_metadata["fill_value"] != 0 and array_metadata["fill_value"] is not None: raise ValueError( - f'''Received fill_value = {array_metadata['fill_value']}, - but N5 only supports fill_value = 0''' - ) - del array_metadata['fill_value'] + f"""Received fill_value = {array_metadata['fill_value']}, + but N5 only supports fill_value = 0""" + ) + del array_metadata["fill_value"] - if 'order' in array_metadata: - if array_metadata['order'] != 'C': + if "order" in array_metadata: + if array_metadata["order"] != "C": raise ValueError( f"Received order = {array_metadata['order']}, but N5 only supports order = C" - ) - del array_metadata['order'] + ) + del array_metadata["order"] - if 'filters' in array_metadata: - if array_metadata['filters'] != [] and array_metadata['filters'] is not None: - raise ValueError( - "Received filters, but N5 storage does not support zarr filters" - ) - del array_metadata['filters'] + if "filters" in array_metadata: + if array_metadata["filters"] != [] and array_metadata["filters"] is not None: + raise ValueError("Received filters, but N5 storage does not support zarr filters") + del array_metadata["filters"] - assert 'compression' in array_metadata - compressor_config = array_metadata['compression'] + assert "compression" in array_metadata + compressor_config = array_metadata["compression"] compressor_config = compressor_config_to_n5(compressor_config) - array_metadata['compression'] = compressor_config + array_metadata["compression"] = compressor_config - if 'dimension_separator' in array_metadata: - del array_metadata['dimension_separator'] + if "dimension_separator" in array_metadata: + del array_metadata["dimension_separator"] return array_metadata -def array_metadata_to_zarr(array_metadata: Dict[str, Any], - top_level: bool = False) -> Dict[str, Any]: - '''Convert array metadata from N5 to zarr format. - If the `top_level` keyword argument is True, then the `N5` key will be removed from metadata''' +def array_metadata_to_zarr( + array_metadata: Dict[str, Any], top_level: bool = False +) -> Dict[str, Any]: + """Convert array metadata from N5 to zarr format. + If the `top_level` keyword argument is True, then the `N5` key will be removed from metadata""" for t, f in zarr_to_n5_keys: array_metadata[t] = array_metadata.pop(f) if top_level: - array_metadata.pop('n5') - array_metadata['zarr_format'] = ZARR_FORMAT - - array_metadata['shape'] = array_metadata['shape'][::-1] - array_metadata['chunks'] = array_metadata['chunks'][::-1] - array_metadata['fill_value'] = 0 # also if None was requested - array_metadata['order'] = 'C' - array_metadata['filters'] = [] - array_metadata['dimension_separator'] = '.' - array_metadata['dtype'] = np.dtype(array_metadata['dtype']).str - - compressor_config = array_metadata['compressor'] + array_metadata.pop("n5") + array_metadata["zarr_format"] = ZARR_FORMAT + + array_metadata["shape"] = array_metadata["shape"][::-1] + array_metadata["chunks"] = array_metadata["chunks"][::-1] + array_metadata["fill_value"] = 0 # also if None was requested + array_metadata["order"] = "C" + array_metadata["filters"] = [] + array_metadata["dimension_separator"] = "." + array_metadata["dtype"] = np.dtype(array_metadata["dtype"]).str + + compressor_config = array_metadata["compressor"] compressor_config = compressor_config_to_zarr(compressor_config) - array_metadata['compressor'] = { - 'id': N5ChunkWrapper.codec_id, - 'compressor_config': compressor_config, - 'dtype': array_metadata['dtype'], - 'chunk_shape': array_metadata['chunks'] + array_metadata["compressor"] = { + "id": N5ChunkWrapper.codec_id, + "compressor_config": compressor_config, + "dtype": array_metadata["dtype"], + "chunk_shape": array_metadata["chunks"], } return array_metadata def attrs_to_zarr(attrs: Dict[str, Any]) -> Dict[str, Any]: - '''Get all zarr attributes from an N5 attributes dictionary (i.e., - all non-keyword attributes).''' + """Get all zarr attributes from an N5 attributes dictionary (i.e., + all non-keyword attributes).""" # remove all N5 keywords for n5_key in n5_keywords: @@ -718,134 +714,133 @@ def attrs_to_zarr(attrs: Dict[str, Any]) -> Dict[str, Any]: def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict[str, Any]: if compressor_config is None: - return {'type': 'raw'} + return {"type": "raw"} else: _compressor_config = compressor_config # peel wrapper, if present - if _compressor_config['id'] == N5ChunkWrapper.codec_id: - _compressor_config = _compressor_config['compressor_config'] + if _compressor_config["id"] == N5ChunkWrapper.codec_id: + _compressor_config = _compressor_config["compressor_config"] - codec_id = _compressor_config['id'] - n5_config = {'type': codec_id} + codec_id = _compressor_config["id"] + n5_config = {"type": codec_id} - if codec_id == 'bz2': + if codec_id == "bz2": - n5_config['type'] = 'bzip2' - n5_config['blockSize'] = _compressor_config['level'] + n5_config["type"] = "bzip2" + n5_config["blockSize"] = _compressor_config["level"] - elif codec_id == 'blosc': + elif codec_id == "blosc": - n5_config['cname'] = _compressor_config['cname'] - n5_config['clevel'] = _compressor_config['clevel'] - n5_config['shuffle'] = _compressor_config['shuffle'] - n5_config['blocksize'] = _compressor_config['blocksize'] + n5_config["cname"] = _compressor_config["cname"] + n5_config["clevel"] = _compressor_config["clevel"] + n5_config["shuffle"] = _compressor_config["shuffle"] + n5_config["blocksize"] = _compressor_config["blocksize"] - elif codec_id == 'lzma': + elif codec_id == "lzma": # Switch to XZ for N5 if we are using the default XZ format. # Note: 4 is the default, which is lzma.CHECK_CRC64. - if _compressor_config['format'] == 1 and _compressor_config['check'] in [-1, 4]: - n5_config['type'] = 'xz' + if _compressor_config["format"] == 1 and _compressor_config["check"] in [-1, 4]: + n5_config["type"] = "xz" else: warnings.warn( "Not all N5 implementations support lzma compression (yet). You " "might not be able to open the dataset with another N5 library.", - RuntimeWarning + RuntimeWarning, ) - n5_config['format'] = _compressor_config['format'] - n5_config['check'] = _compressor_config['check'] - n5_config['filters'] = _compressor_config['filters'] + n5_config["format"] = _compressor_config["format"] + n5_config["check"] = _compressor_config["check"] + n5_config["filters"] = _compressor_config["filters"] # The default is lzma.PRESET_DEFAULT, which is 6. - if _compressor_config['preset']: - n5_config['preset'] = _compressor_config['preset'] + if _compressor_config["preset"]: + n5_config["preset"] = _compressor_config["preset"] else: - n5_config['preset'] = 6 + n5_config["preset"] = 6 - elif codec_id == 'zlib': + elif codec_id == "zlib": - n5_config['type'] = 'gzip' - n5_config['level'] = _compressor_config['level'] - n5_config['useZlib'] = True + n5_config["type"] = "gzip" + n5_config["level"] = _compressor_config["level"] + n5_config["useZlib"] = True - elif codec_id == 'gzip': + elif codec_id == "gzip": - n5_config['type'] = 'gzip' - n5_config['level'] = _compressor_config['level'] - n5_config['useZlib'] = False + n5_config["type"] = "gzip" + n5_config["level"] = _compressor_config["level"] + n5_config["useZlib"] = False else: - n5_config.update({k: v for k, v in _compressor_config.items() if k != 'type'}) + n5_config.update({k: v for k, v in _compressor_config.items() if k != "type"}) return n5_config def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dict[str, Any]]: - codec_id = compressor_config['type'] - zarr_config = {'id': codec_id} + codec_id = compressor_config["type"] + zarr_config = {"id": codec_id} - if codec_id == 'bzip2': + if codec_id == "bzip2": - zarr_config['id'] = 'bz2' - zarr_config['level'] = compressor_config['blockSize'] + zarr_config["id"] = "bz2" + zarr_config["level"] = compressor_config["blockSize"] - elif codec_id == 'blosc': + elif codec_id == "blosc": - zarr_config['cname'] = compressor_config['cname'] - zarr_config['clevel'] = compressor_config['clevel'] - zarr_config['shuffle'] = compressor_config['shuffle'] - zarr_config['blocksize'] = compressor_config['blocksize'] + zarr_config["cname"] = compressor_config["cname"] + zarr_config["clevel"] = compressor_config["clevel"] + zarr_config["shuffle"] = compressor_config["shuffle"] + zarr_config["blocksize"] = compressor_config["blocksize"] - elif codec_id == 'lzma': + elif codec_id == "lzma": - zarr_config['format'] = compressor_config['format'] - zarr_config['check'] = compressor_config['check'] - zarr_config['preset'] = compressor_config['preset'] - zarr_config['filters'] = compressor_config['filters'] + zarr_config["format"] = compressor_config["format"] + zarr_config["check"] = compressor_config["check"] + zarr_config["preset"] = compressor_config["preset"] + zarr_config["filters"] = compressor_config["filters"] - elif codec_id == 'xz': + elif codec_id == "xz": - zarr_config['id'] = 'lzma' - zarr_config['format'] = 1 # lzma.FORMAT_XZ - zarr_config['check'] = -1 - zarr_config['preset'] = compressor_config['preset'] - zarr_config['filters'] = None + zarr_config["id"] = "lzma" + zarr_config["format"] = 1 # lzma.FORMAT_XZ + zarr_config["check"] = -1 + zarr_config["preset"] = compressor_config["preset"] + zarr_config["filters"] = None - elif codec_id == 'gzip': + elif codec_id == "gzip": - if 'useZlib' in compressor_config and compressor_config['useZlib']: - zarr_config['id'] = 'zlib' - zarr_config['level'] = compressor_config['level'] + if "useZlib" in compressor_config and compressor_config["useZlib"]: + zarr_config["id"] = "zlib" + zarr_config["level"] = compressor_config["level"] else: - zarr_config['id'] = 'gzip' - zarr_config['level'] = compressor_config['level'] + zarr_config["id"] = "gzip" + zarr_config["level"] = compressor_config["level"] - elif codec_id == 'raw': + elif codec_id == "raw": return None else: - zarr_config.update({k: v for k, v in compressor_config.items() if k != 'type'}) + zarr_config.update({k: v for k, v in compressor_config.items() if k != "type"}) return zarr_config class N5ChunkWrapper(Codec): - codec_id = 'n5_wrapper' + codec_id = "n5_wrapper" def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): self.dtype = np.dtype(dtype) self.chunk_shape = tuple(chunk_shape) # is the dtype a little endian format? - self._little_endian = ( - self.dtype.byteorder == '<' or - (self.dtype.byteorder == '=' and sys.byteorder == 'little') + self._little_endian = self.dtype.byteorder == "<" or ( + self.dtype.byteorder == "=" and sys.byteorder == "little" ) if compressor: @@ -853,9 +848,7 @@ def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): raise ValueError("Only one of compressor_config or compressor should be given.") compressor_config = compressor.get_config() - if ( - compressor_config is None and compressor is None or - compressor_config['id'] == 'raw'): + if compressor_config is None and compressor is None or compressor_config["id"] == "raw": self.compressor_config = None self._compressor = None else: @@ -863,10 +856,7 @@ def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): self.compressor_config = self._compressor.get_config() def get_config(self): - config = { - 'id': self.codec_id, - 'compressor_config': self.compressor_config - } + config = {"id": self.codec_id, "compressor_config": self.compressor_config} return config def encode(self, chunk): @@ -879,7 +869,7 @@ def encode(self, chunk): if self._compressor: return header + self._compressor.encode(chunk) else: - return header + chunk.tobytes(order='A') + return header + chunk.tobytes(order="A") def decode(self, chunk, out=None) -> bytes: @@ -889,10 +879,9 @@ def decode(self, chunk, out=None) -> bytes: if out is not None: # out should only be used if we read a complete chunk - assert chunk_shape == self.chunk_shape, ( - "Expected chunk of shape {}, found {}".format( - self.chunk_shape, - chunk_shape)) + assert chunk_shape == self.chunk_shape, "Expected chunk of shape {}, found {}".format( + self.chunk_shape, chunk_shape + ) if self._compressor: self._compressor.decode(chunk, out) @@ -927,25 +916,21 @@ def decode(self, chunk, out=None) -> bytes: @staticmethod def _create_header(chunk): - mode = struct.pack('>H', 0) - num_dims = struct.pack('>H', len(chunk.shape)) - shape = b''.join( - struct.pack('>I', d) - for d in chunk.shape[::-1] - ) + mode = struct.pack(">H", 0) + num_dims = struct.pack(">H", len(chunk.shape)) + shape = b"".join(struct.pack(">I", d) for d in chunk.shape[::-1]) return mode + num_dims + shape @staticmethod def _read_header(chunk): - num_dims = struct.unpack('>H', chunk[2:4])[0] + num_dims = struct.unpack(">H", chunk[2:4])[0] shape = tuple( - struct.unpack('>I', chunk[i:i+4])[0] - for i in range(4, num_dims*4 + 4, 4) + struct.unpack(">I", chunk[i : i + 4])[0] for i in range(4, num_dims * 4 + 4, 4) )[::-1] - len_header = 4 + num_dims*4 + len_header = 4 + num_dims * 4 return len_header, shape @@ -962,7 +947,7 @@ def _from_big_endian(self, data): if not self._little_endian: return data - a = np.frombuffer(data, self.dtype.newbyteorder('>')) + a = np.frombuffer(data, self.dtype.newbyteorder(">")) return a.astype(self.dtype) diff --git a/zarr/storage.py b/zarr/storage.py index ef1bd64955..37a821fc5a 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -36,11 +36,7 @@ import time from numcodecs.abc import Codec -from numcodecs.compat import ( - ensure_bytes, - ensure_text, - ensure_contiguous_ndarray_like -) +from numcodecs.compat import ensure_bytes, ensure_text, ensure_contiguous_ndarray_like from numcodecs.registry import codec_registry from zarr.context import Context @@ -53,45 +49,58 @@ ReadOnlyError, ) from zarr.meta import encode_array_metadata, encode_group_metadata -from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks, - normalize_dimension_separator, - normalize_dtype, normalize_fill_value, normalize_order, - normalize_shape, normalize_storage_path, retry_call, - ensure_contiguous_ndarray_or_bytes) +from zarr.util import ( + buffer_size, + json_loads, + nolock, + normalize_chunks, + normalize_dimension_separator, + normalize_dtype, + normalize_fill_value, + normalize_order, + normalize_shape, + normalize_storage_path, + retry_call, + ensure_contiguous_ndarray_or_bytes, +) from zarr._storage.absstore import ABSStore # noqa: F401 -from zarr._storage.store import (_get_hierarchy_metadata, # noqa: F401 - _get_metadata_suffix, - _listdir_from_keys, - _rename_from_keys, - _rename_metadata_v3, - _rmdir_from_keys, - _rmdir_from_keys_v3, - _path_to_prefix, - _prefix_to_array_key, - _prefix_to_group_key, - array_meta_key, - attrs_key, - data_root, - group_meta_key, - meta_root, - DEFAULT_ZARR_VERSION, - BaseStore, - Store) +from zarr._storage.store import ( # noqa: F401 + _get_hierarchy_metadata, + _get_metadata_suffix, + _listdir_from_keys, + _rename_from_keys, + _rename_metadata_v3, + _rmdir_from_keys, + _rmdir_from_keys_v3, + _path_to_prefix, + _prefix_to_array_key, + _prefix_to_group_key, + array_meta_key, + attrs_key, + data_root, + group_meta_key, + meta_root, + DEFAULT_ZARR_VERSION, + BaseStore, + Store, +) __doctest_requires__ = { - ('RedisStore', 'RedisStore.*'): ['redis'], - ('MongoDBStore', 'MongoDBStore.*'): ['pymongo'], - ('LRUStoreCache', 'LRUStoreCache.*'): ['s3fs'], + ("RedisStore", "RedisStore.*"): ["redis"], + ("MongoDBStore", "MongoDBStore.*"): ["pymongo"], + ("LRUStoreCache", "LRUStoreCache.*"): ["s3fs"], } try: # noinspection PyUnresolvedReferences from zarr.codecs import Blosc + default_compressor = Blosc() except ImportError: # pragma: no cover from zarr.codecs import Zlib + default_compressor = Zlib() @@ -113,7 +122,7 @@ def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> b path = normalize_storage_path(path) prefix = _path_to_prefix(path) key = _prefix_to_group_key(store, prefix) - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) if store_version == 2 or explicit_only: return key in store else: @@ -122,9 +131,9 @@ def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> b # for v3, need to also handle implicit groups sfx = _get_metadata_suffix(store) # type: ignore - implicit_prefix = key.replace('.group' + sfx, '') - if not implicit_prefix.endswith('/'): - implicit_prefix += '/' + implicit_prefix = key.replace(".group" + sfx, "") + if not implicit_prefix.endswith("/"): + implicit_prefix += "/" if store.list_prefix(implicit_prefix): # type: ignore return True return False @@ -132,7 +141,7 @@ def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> b def _normalize_store_arg_v2(store: Any, storage_options=None, mode="r") -> BaseStore: # default to v2 store for backward compatibility - zarr_version = getattr(store, '_store_version', 2) + zarr_version = getattr(store, "_store_version", 2) if zarr_version != 2: raise ValueError("store must be a version 2 store") if store is None: @@ -142,23 +151,27 @@ def _normalize_store_arg_v2(store: Any, storage_options=None, mode="r") -> BaseS store = os.fspath(store) if FSStore._fsspec_installed(): import fsspec + if isinstance(store, fsspec.FSMap): - return FSStore(store.root, - fs=store.fs, - mode=mode, - check=store.check, - create=store.create, - missing_exceptions=store.missing_exceptions, - **(storage_options or {})) + return FSStore( + store.root, + fs=store.fs, + mode=mode, + check=store.check, + create=store.create, + missing_exceptions=store.missing_exceptions, + **(storage_options or {}), + ) if isinstance(store, str): if "://" in store or "::" in store: return FSStore(store, mode=mode, **(storage_options or {})) elif storage_options: raise ValueError("storage_options passed with non-fsspec path") - if store.endswith('.zip'): + if store.endswith(".zip"): return ZipStore(store, mode=mode) - elif store.endswith('.n5'): + elif store.endswith(".n5"): from zarr.n5 import N5Store + return N5Store(store) else: return DirectoryStore(store) @@ -167,8 +180,9 @@ def _normalize_store_arg_v2(store: Any, storage_options=None, mode="r") -> BaseS return store -def normalize_store_arg(store: Any, storage_options=None, mode="r", *, - zarr_version=None) -> BaseStore: +def normalize_store_arg( + store: Any, storage_options=None, mode="r", *, zarr_version=None +) -> BaseStore: if zarr_version is None: # default to v2 store for backward compatibility zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) @@ -176,6 +190,7 @@ def normalize_store_arg(store: Any, storage_options=None, mode="r", *, normalize_store = _normalize_store_arg_v2 elif zarr_version == 3: from zarr._storage.v3 import _normalize_store_arg_v3 + normalize_store = _normalize_store_arg_v3 else: raise ValueError("zarr_version must be either 2 or 3") @@ -187,7 +202,7 @@ def rmdir(store: StoreLike, path: Path = None): this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore # pass through store.rmdir(path) # type: ignore @@ -205,7 +220,7 @@ def rename(store: Store, src_path: Path, dst_path: Path): `Store` interface.""" src_path = normalize_storage_path(src_path) dst_path = normalize_storage_path(dst_path) - if hasattr(store, 'rename'): + if hasattr(store, "rename"): # pass through store.rename(src_path, dst_path) else: @@ -218,7 +233,7 @@ def listdir(store: BaseStore, path: Path = None): method, this will be called, otherwise will fall back to implementation via the `MutableMapping` interface.""" path = normalize_storage_path(path) - if hasattr(store, 'listdir'): + if hasattr(store, "listdir"): # pass through return store.listdir(path) # type: ignore else: @@ -237,14 +252,14 @@ def _getsize(store: BaseStore, path: Path = None) -> int: v = store[path] size = buffer_size(v) else: - path = '' if path is None else normalize_storage_path(path) + path = "" if path is None else normalize_storage_path(path) size = 0 - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) if store_version == 3: - if path == '': + if path == "": # have to list the root folders without trailing / in this case - members = store.list_prefix(data_root.rstrip('/')) # type: ignore - members += store.list_prefix(meta_root.rstrip('/')) # type: ignore + members = store.list_prefix(data_root.rstrip("/")) # type: ignore + members += store.list_prefix(meta_root.rstrip("/")) # type: ignore else: members = store.list_prefix(data_root + path) # type: ignore members += store.list_prefix(meta_root + path) # type: ignore @@ -270,7 +285,7 @@ def _getsize(store: BaseStore, path: Path = None) -> int: def getsize(store: BaseStore, path: Path = None) -> int: """Compute size of stored items for a given path. If `store` provides a `getsize` method, this will be called, otherwise will return -1.""" - if hasattr(store, 'getsize'): + if hasattr(store, "getsize"): # pass through path = normalize_storage_path(path) return store.getsize(path) # type: ignore @@ -288,12 +303,11 @@ def _require_parent_group( ): # assume path is normalized if path: - segments = path.split('/') + segments = path.split("/") for i in range(len(segments)): - p = '/'.join(segments[:i]) + p = "/".join(segments[:i]) if contains_array(store, p): - _init_group_metadata(store, path=p, chunk_store=chunk_store, - overwrite=overwrite) + _init_group_metadata(store, path=p, chunk_store=chunk_store, overwrite=overwrite) elif not contains_group(store, p): _init_group_metadata(store, path=p, chunk_store=chunk_store) @@ -425,23 +439,31 @@ def init_array( # ensure parent group initialized store_version = getattr(store, "_store_version", 2) if store_version < 3: - _require_parent_group(path, store=store, chunk_store=chunk_store, - overwrite=overwrite) + _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) - if store_version == 3 and 'zarr.json' not in store: + if store_version == 3 and "zarr.json" not in store: # initialize with default zarr.json entry level metadata - store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore + store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore if not compressor: # compatibility with legacy tests using compressor=[] compressor = None - _init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype, - compressor=compressor, fill_value=fill_value, - order=order, overwrite=overwrite, path=path, - chunk_store=chunk_store, filters=filters, - object_codec=object_codec, - dimension_separator=dimension_separator, - storage_transformers=storage_transformers) + _init_array_metadata( + store, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + overwrite=overwrite, + path=path, + chunk_store=chunk_store, + filters=filters, + object_codec=object_codec, + dimension_separator=dimension_separator, + storage_transformers=storage_transformers, + ) def _init_array_metadata( @@ -461,7 +483,7 @@ def _init_array_metadata( storage_transformers=(), ): - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) @@ -486,11 +508,11 @@ def _init_array_metadata( if chunk_store is not None: chunk_store.erase_prefix(data_prefix) # type: ignore - if '/' in path: + if "/" in path: # path is a subfolder of an existing array, remove that array - parent_path = '/'.join(path.split('/')[:-1]) + parent_path = "/".join(path.split("/")[:-1]) sfx = _get_metadata_suffix(store) # type: ignore - array_key = meta_root + parent_path + '.array' + sfx + array_key = meta_root + parent_path + ".array" + sfx if array_key in store: store.erase(array_key) # type: ignore @@ -500,9 +522,9 @@ def _init_array_metadata( elif contains_group(store, path, explicit_only=False): raise ContainsGroupError(path) elif store_version == 3: - if '/' in path: + if "/" in path: # cannot create an array within an existing array path - parent_path = '/'.join(path.split('/')[:-1]) + parent_path = "/".join(path.split("/")[:-1]) if contains_array(store, parent_path): raise ContainsArrayError(path) @@ -523,10 +545,10 @@ def _init_array_metadata( if shape == (): # no point in compressing a 0-dimensional array, only a single value compressor = None - elif compressor == 'none': + elif compressor == "none": # compatibility compressor = None - elif compressor == 'default': + elif compressor == "default": compressor = default_compressor # obtain compressor config @@ -556,16 +578,19 @@ def _init_array_metadata( if object_codec is None: if not filters: # there are no filters so we can be sure there is no object codec - raise ValueError('missing object_codec for object array') + raise ValueError("missing object_codec for object array") else: # one of the filters may be an object codec, issue a warning rather # than raise an error to maintain backwards-compatibility - warnings.warn('missing object_codec for object array; this will raise a ' - 'ValueError in version 3.0', FutureWarning) + warnings.warn( + "missing object_codec for object array; this will raise a " + "ValueError in version 3.0", + FutureWarning, + ) else: filters_config.insert(0, object_codec.get_config()) elif object_codec is not None: - warnings.warn('an object_codec is only needed for object arrays') + warnings.warn("an object_codec is only needed for object arrays") # use null to indicate no filters if not filters_config: @@ -574,32 +599,34 @@ def _init_array_metadata( # initialize metadata # TODO: don't store redundant dimension_separator for v3? _compressor = compressor_config if store_version == 2 else compressor - meta = dict(shape=shape, compressor=_compressor, - fill_value=fill_value, - dimension_separator=dimension_separator) + meta = dict( + shape=shape, + compressor=_compressor, + fill_value=fill_value, + dimension_separator=dimension_separator, + ) if store_version < 3: - meta.update(dict(chunks=chunks, dtype=dtype, order=order, - filters=filters_config)) + meta.update(dict(chunks=chunks, dtype=dtype, order=order, filters=filters_config)) assert not storage_transformers else: if dimension_separator is None: dimension_separator = "/" if filters_config: - attributes = {'filters': filters_config} + attributes = {"filters": filters_config} else: attributes = {} meta.update( - dict(chunk_grid=dict(type="regular", - chunk_shape=chunks, - separator=dimension_separator), - chunk_memory_layout=order, - data_type=dtype, - attributes=attributes, - storage_transformers=storage_transformers) + dict( + chunk_grid=dict(type="regular", chunk_shape=chunks, separator=dimension_separator), + chunk_memory_layout=order, + data_type=dtype, + attributes=attributes, + storage_transformers=storage_transformers, + ) ) key = _prefix_to_array_key(store, _path_to_prefix(path)) - if hasattr(store, '_metadata_class'): + if hasattr(store, "_metadata_class"): store[key] = store._metadata_class.encode_array_metadata(meta) # type: ignore else: store[key] = encode_array_metadata(meta) @@ -635,19 +662,17 @@ def init_group( # normalize path path = normalize_storage_path(path) - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) if store_version < 3: # ensure parent group initialized - _require_parent_group(path, store=store, chunk_store=chunk_store, - overwrite=overwrite) + _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) - if store_version == 3 and 'zarr.json' not in store: + if store_version == 3 and "zarr.json" not in store: # initialize with default zarr.json entry level metadata - store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore + store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore # initialise metadata - _init_group_metadata(store=store, overwrite=overwrite, path=path, - chunk_store=chunk_store) + _init_group_metadata(store=store, overwrite=overwrite, path=path, chunk_store=chunk_store) if store_version == 3: # TODO: Should initializing a v3 group also create a corresponding @@ -663,7 +688,7 @@ def _init_group_metadata( chunk_store: Optional[StoreLike] = None, ): - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) # guard conditions @@ -694,9 +719,9 @@ def _init_group_metadata( raise ContainsArrayError(path) elif contains_group(store, path): raise ContainsGroupError(path) - elif store_version == 3 and '/' in path: + elif store_version == 3 and "/" in path: # cannot create a group overlapping with an existing array name - parent_path = '/'.join(path.split('/')[:-1]) + parent_path = "/".join(path.split("/")[:-1]) if contains_array(store, parent_path): raise ContainsArrayError(path) @@ -704,11 +729,11 @@ def _init_group_metadata( # N.B., currently no metadata properties are needed, however there may # be in future if store_version == 3: - meta = {'attributes': {}} # type: ignore + meta = {"attributes": {}} # type: ignore else: meta = {} # type: ignore key = _prefix_to_group_key(store, _path_to_prefix(path)) - if hasattr(store, '_metadata_class'): + if hasattr(store, "_metadata_class"): store[key] = store._metadata_class.encode_group_metadata(meta) # type: ignore else: store[key] = encode_group_metadata(meta) @@ -718,7 +743,7 @@ def _dict_store_keys(d: Dict, prefix="", cls=dict): for k in d.keys(): v = d[k] if isinstance(v, cls): - yield from _dict_store_keys(v, prefix + k + '/', cls) + yield from _dict_store_keys(v, prefix + k + "/", cls) else: yield prefix + k @@ -814,7 +839,7 @@ def __setstate__(self, state): def _get_parent(self, item: str): parent = self.root # split the item - segments = item.split('/') + segments = item.split("/") # find the parent container for k in segments[:-1]: parent = parent[k] @@ -825,7 +850,7 @@ def _get_parent(self, item: str): def _require_parent(self, item): parent = self.root # split the item - segments = item.split('/') + segments = item.split("/") # require the parent container for k in segments[:-1]: try: @@ -874,11 +899,7 @@ def __contains__(self, item: str): # type: ignore[override] return not isinstance(value, self.cls) def __eq__(self, other): - return ( - isinstance(other, MemoryStore) and - self.root == other.root and - self.cls == other.cls - ) + return isinstance(other, MemoryStore) and self.root == other.root and self.cls == other.cls def keys(self): yield from _dict_store_keys(self.root, cls=self.cls) @@ -963,12 +984,13 @@ def clear(self): class DictStore(MemoryStore): - def __init__(self, *args, **kwargs): - warnings.warn("DictStore has been renamed to MemoryStore in 2.4.0 and " - "will be removed in the future. Please use MemoryStore.", - DeprecationWarning, - stacklevel=2) + warnings.warn( + "DictStore has been renamed to MemoryStore in 2.4.0 and " + "will be removed in the future. Please use MemoryStore.", + DeprecationWarning, + stacklevel=2, + ) super().__init__(*args, **kwargs) @@ -1048,7 +1070,7 @@ def _normalize_key(self, key): @staticmethod def _fromfile(fn): - """ Read data from a file + """Read data from a file Parameters ---------- @@ -1060,12 +1082,12 @@ def _fromfile(fn): Subclasses should overload this method to specify any custom file reading logic. """ - with open(fn, 'rb') as f: + with open(fn, "rb") as f: return f.read() @staticmethod def _tofile(a, fn): - """ Write data to a file + """Write data to a file Parameters ---------- @@ -1079,7 +1101,7 @@ def _tofile(a, fn): Subclasses should overload this method to specify any custom file writing logic. """ - with open(fn, mode='wb') as f: + with open(fn, mode="wb") as f: f.write(a) def __getitem__(self, key): @@ -1116,7 +1138,7 @@ def __setitem__(self, key, value): # write to temporary file # note we're not using tempfile.NamedTemporaryFile to avoid restrictive file permissions - temp_name = file_name + '.' + uuid.uuid4().hex + '.partial' + temp_name = file_name + "." + uuid.uuid4().hex + ".partial" temp_path = os.path.join(dir_path, temp_name) try: self._tofile(value, temp_path) @@ -1149,10 +1171,7 @@ def __contains__(self, key): return os.path.isfile(file_path) def __eq__(self, other): - return ( - isinstance(other, DirectoryStore) and - self.path == other.path - ) + return isinstance(other, DirectoryStore) and self.path == other.path def keys(self): if os.path.exists(self.path): @@ -1184,8 +1203,11 @@ def dir_path(self, path=None): return dir_path def listdir(self, path=None): - return self._nested_listdir(path) if self._dimension_separator == "/" else \ - self._flat_listdir(path) + return ( + self._nested_listdir(path) + if self._dimension_separator == "/" + else self._flat_listdir(path) + ) def _flat_listdir(self, path=None): dir_path = self.dir_path(path) @@ -1208,9 +1230,9 @@ def _nested_listdir(self, path=None): for file_name in file_names: file_path = os.path.join(dir_path, file_name) rel_path = file_path.split(root_path + os.path.sep)[1] - new_children.append(rel_path.replace( - os.path.sep, - self._dimension_separator or '.')) + new_children.append( + rel_path.replace(os.path.sep, self._dimension_separator or ".") + ) else: new_children.append(entry) return sorted(new_children) @@ -1256,21 +1278,21 @@ def clear(self): shutil.rmtree(self.path) -def atexit_rmtree(path, - isdir=os.path.isdir, - rmtree=shutil.rmtree): # pragma: no cover +def atexit_rmtree(path, isdir=os.path.isdir, rmtree=shutil.rmtree): # pragma: no cover """Ensure directory removal at interpreter exit.""" if isdir(path): rmtree(path) # noinspection PyShadowingNames -def atexit_rmglob(path, - glob=glob.glob, - isdir=os.path.isdir, - isfile=os.path.isfile, - remove=os.remove, - rmtree=shutil.rmtree): # pragma: no cover +def atexit_rmglob( + path, + glob=glob.glob, + isdir=os.path.isdir, + isfile=os.path.isfile, + remove=os.remove, + rmtree=shutil.rmtree, +): # pragma: no cover """Ensure removal of multiple files at interpreter exit.""" for p in glob(path): if isfile(p): @@ -1316,19 +1338,25 @@ class FSStore(Store): storage_options : passed to the fsspec implementation. Cannot be used together with fs. """ + _array_meta_key = array_meta_key _group_meta_key = group_meta_key _attrs_key = attrs_key - def __init__(self, url, normalize_keys=False, key_separator=None, - mode='w', - exceptions=(KeyError, PermissionError, IOError), - dimension_separator=None, - fs=None, - check=False, - create=False, - missing_exceptions=None, - **storage_options): + def __init__( + self, + url, + normalize_keys=False, + key_separator=None, + mode="w", + exceptions=(KeyError, PermissionError, IOError), + dimension_separator=None, + fs=None, + check=False, + create=False, + missing_exceptions=None, + **storage_options, + ): if not self._fsspec_installed(): # pragma: no cover raise ImportError("`fsspec` is required to use zarr's FSStore") import fsspec @@ -1374,13 +1402,13 @@ def _default_key_separator(self): self.key_separator = "." def _normalize_key(self, key): - key = normalize_storage_path(key).lstrip('/') + key = normalize_storage_path(key).lstrip("/") if key: - *bits, end = key.split('/') + *bits, end = key.split("/") if end not in (self._array_meta_key, self._group_meta_key, self._attrs_key): - end = end.replace('.', self.key_separator) - key = '/'.join(bits + [end]) + end = end.replace(".", self.key_separator) + key = "/".join(bits + [end]) return key.lower() if self.normalize_keys else key @@ -1402,7 +1430,7 @@ def __getitem__(self, key): raise KeyError(key) from e def setitems(self, values): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() # Normalize keys and make sure the values are bytes @@ -1413,7 +1441,7 @@ def setitems(self, values): self.map.setitems(values) def __setitem__(self, key, value): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() key = self._normalize_key(key) value = ensure_contiguous_ndarray_or_bytes(value) @@ -1427,7 +1455,7 @@ def __setitem__(self, key, value): raise KeyError(key) from e def __delitem__(self, key): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() key = self._normalize_key(key) path = self.dir_path(key) @@ -1437,7 +1465,7 @@ def __delitem__(self, key): del self.map[key] def delitems(self, keys): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() # only remove the keys that exist in the store nkeys = [self._normalize_key(key) for key in keys if key in self] @@ -1450,8 +1478,7 @@ def __contains__(self, key): return key in self.map def __eq__(self, other): - return (type(self) is type(other) and self.map == other.map - and self.mode == other.mode) + return type(self) is type(other) and self.map == other.map and self.mode == other.mode def keys(self): return iter(self.map) @@ -1469,8 +1496,9 @@ def dir_path(self, path=None): def listdir(self, path=None): dir_path = self.dir_path(path) try: - children = sorted(p.rstrip('/').rsplit('/', 1)[-1] - for p in self.fs.ls(dir_path, detail=False)) + children = sorted( + p.rstrip("/").rsplit("/", 1)[-1] for p in self.fs.ls(dir_path, detail=False) + ) if self.key_separator != "/": return children else: @@ -1485,8 +1513,8 @@ def listdir(self, path=None): for file_name in self.fs.find(entry_path): file_path = os.path.join(dir_path, file_name) rel_path = file_path.split(root_path)[1] - rel_path = rel_path.lstrip('/') - new_children.append(rel_path.replace('/', '.')) + rel_path = rel_path.lstrip("/") + new_children.append(rel_path.replace("/", ".")) else: new_children.append(entry) return sorted(new_children) @@ -1496,7 +1524,7 @@ def listdir(self, path=None): return [] def rmdir(self, path=None): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() store_path = self.dir_path(path) if self.fs.isdir(store_path): @@ -1507,7 +1535,7 @@ def getsize(self, path=None): return self.fs.du(store_path, True, True) def clear(self): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() self.map.clear() @@ -1540,15 +1568,16 @@ class TempStore(DirectoryStore): """ # noinspection PyShadowingBuiltins - def __init__(self, suffix='', prefix='zarr', dir=None, normalize_keys=False, - dimension_separator=None): + def __init__( + self, suffix="", prefix="zarr", dir=None, normalize_keys=False, dimension_separator=None + ): path = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir) atexit.register(atexit_rmtree, path) super().__init__(path, normalize_keys=normalize_keys) -_prog_ckey = re.compile(r'^(\d+)(\.\d+)+$') -_prog_number = re.compile(r'^\d+$') +_prog_ckey = re.compile(r"^(\d+)(\.\d+)+$") +_prog_number = re.compile(r"^\d+$") class NestedDirectoryStore(DirectoryStore): @@ -1629,15 +1658,11 @@ def __init__(self, path, normalize_keys=False, dimension_separator="/"): if dimension_separator is None: dimension_separator = "/" elif dimension_separator != "/": - raise ValueError( - "NestedDirectoryStore only supports '/' as dimension_separator") + raise ValueError("NestedDirectoryStore only supports '/' as dimension_separator") self._dimension_separator = dimension_separator def __eq__(self, other): - return ( - isinstance(other, NestedDirectoryStore) and - self.path == other.path - ) + return isinstance(other, NestedDirectoryStore) and self.path == other.path # noinspection PyPep8Naming @@ -1735,8 +1760,14 @@ class also supports the context manager protocol, which ensures the ``close()`` _erasable = False - def __init__(self, path, compression=zipfile.ZIP_STORED, allowZip64=True, mode='a', - dimension_separator=None): + def __init__( + self, + path, + compression=zipfile.ZIP_STORED, + allowZip64=True, + mode="a", + dimension_separator=None, + ): # store properties path = os.path.abspath(path) @@ -1752,8 +1783,7 @@ def __init__(self, path, compression=zipfile.ZIP_STORED, allowZip64=True, mode=' self.mutex = RLock() # open zip file - self.zf = zipfile.ZipFile(path, mode=mode, compression=compression, - allowZip64=allowZip64) + self.zf = zipfile.ZipFile(path, mode=mode, compression=compression, allowZip64=allowZip64) def __getstate__(self): self.flush() @@ -1763,10 +1793,9 @@ def __setstate__(self, state): path, compression, allowZip64, mode = state # if initially opened with mode 'w' or 'x', re-open in mode 'a' so file doesn't # get clobbered - if mode in 'wx': - mode = 'a' - self.__init__(path=path, compression=compression, allowZip64=allowZip64, - mode=mode) + if mode in "wx": + mode = "a" + self.__init__(path=path, compression=compression, allowZip64=allowZip64, mode=mode) def close(self): """Closes the underlying zip file, ensuring all records are written.""" @@ -1776,14 +1805,14 @@ def close(self): def flush(self): """Closes the underlying zip file, ensuring all records are written, then re-opens the file for further modifications.""" - if self.mode != 'r': + if self.mode != "r": with self.mutex: self.zf.close() # N.B., re-open with mode 'a' regardless of initial mode so we don't wipe # what's been written - self.zf = zipfile.ZipFile(self.path, mode='a', - compression=self.compression, - allowZip64=self.allowZip64) + self.zf = zipfile.ZipFile( + self.path, mode="a", compression=self.compression, allowZip64=self.allowZip64 + ) def __enter__(self): return self @@ -1797,21 +1826,20 @@ def __getitem__(self, key): return f.read() def __setitem__(self, key, value): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() value = ensure_contiguous_ndarray_like(value).view("u1") with self.mutex: # writestr(key, value) writes with default permissions from # zipfile (600) that are too restrictive, build ZipInfo for # the key to work around limitation - keyinfo = zipfile.ZipInfo(filename=key, - date_time=time.localtime(time.time())[:6]) + keyinfo = zipfile.ZipInfo(filename=key, date_time=time.localtime(time.time())[:6]) keyinfo.compress_type = self.compression if keyinfo.filename[-1] == os.sep: - keyinfo.external_attr = 0o40775 << 16 # drwxrwxr-x - keyinfo.external_attr |= 0x10 # MS-DOS directory flag + keyinfo.external_attr = 0o40775 << 16 # drwxrwxr-x + keyinfo.external_attr |= 0x10 # MS-DOS directory flag else: - keyinfo.external_attr = 0o644 << 16 # ?rw-r--r-- + keyinfo.external_attr = 0o644 << 16 # ?rw-r--r-- self.zf.writestr(keyinfo, value) @@ -1820,10 +1848,10 @@ def __delitem__(self, key): def __eq__(self, other): return ( - isinstance(other, ZipStore) and - self.path == other.path and - self.compression == other.compression and - self.allowZip64 == other.allowZip64 + isinstance(other, ZipStore) + and self.path == other.path + and self.compression == other.compression + and self.allowZip64 == other.allowZip64 ) def keylist(self): @@ -1860,7 +1888,7 @@ def getsize(self, path=None): size = 0 for child in children: if path: - name = path + '/' + child + name = path + "/" + child else: name = child try: @@ -1880,14 +1908,14 @@ def getsize(self, path=None): return 0 def clear(self): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() with self.mutex: self.close() os.remove(self.path) - self.zf = zipfile.ZipFile(self.path, mode=self.mode, - compression=self.compression, - allowZip64=self.allowZip64) + self.zf = zipfile.ZipFile( + self.path, mode=self.mode, compression=self.compression, allowZip64=self.allowZip64 + ) def migrate_1to2(store): @@ -1909,37 +1937,38 @@ def migrate_1to2(store): # migrate metadata from zarr import meta_v1 - meta = meta_v1.decode_metadata(store['meta']) - del store['meta'] + + meta = meta_v1.decode_metadata(store["meta"]) + del store["meta"] # add empty filters - meta['filters'] = None + meta["filters"] = None # migration compression metadata - compression = meta['compression'] - if compression is None or compression == 'none': + compression = meta["compression"] + if compression is None or compression == "none": compressor_config = None else: - compression_opts = meta['compression_opts'] + compression_opts = meta["compression_opts"] codec_cls = codec_registry[compression] if isinstance(compression_opts, dict): compressor = codec_cls(**compression_opts) else: compressor = codec_cls(compression_opts) compressor_config = compressor.get_config() - meta['compressor'] = compressor_config - del meta['compression'] - del meta['compression_opts'] + meta["compressor"] = compressor_config + del meta["compression"] + del meta["compression_opts"] # store migrated metadata - if hasattr(store, '_metadata_class'): + if hasattr(store, "_metadata_class"): store[array_meta_key] = store._metadata_class.encode_array_metadata(meta) else: store[array_meta_key] = encode_array_metadata(meta) # migrate user attributes - store[attrs_key] = store['attrs'] - del store['attrs'] + store[attrs_key] = store["attrs"] + del store["attrs"] # noinspection PyShadowingBuiltins @@ -2024,11 +2053,19 @@ class DBMStore(Store): """ - def __init__(self, path, flag='c', mode=0o666, open=None, write_lock=True, - dimension_separator=None, - **open_kwargs): + def __init__( + self, + path, + flag="c", + mode=0o666, + open=None, + write_lock=True, + dimension_separator=None, + **open_kwargs, + ): if open is None: import dbm + open = dbm.open path = os.path.abspath(path) # noinspection PyArgumentList @@ -2053,27 +2090,25 @@ def __getstate__(self): except Exception: # flush may fail if db has already been closed pass - return (self.path, self.flag, self.mode, self.open, self.write_lock, - self.open_kwargs) + return (self.path, self.flag, self.mode, self.open, self.write_lock, self.open_kwargs) def __setstate__(self, state): path, flag, mode, open, write_lock, open_kws = state - if flag[0] == 'n': - flag = 'c' + flag[1:] # don't clobber an existing database - self.__init__(path=path, flag=flag, mode=mode, open=open, - write_lock=write_lock, **open_kws) + if flag[0] == "n": + flag = "c" + flag[1:] # don't clobber an existing database + self.__init__(path=path, flag=flag, mode=mode, open=open, write_lock=write_lock, **open_kws) def close(self): """Closes the underlying database file.""" - if hasattr(self.db, 'close'): + if hasattr(self.db, "close"): with self.write_mutex: self.db.close() def flush(self): """Synchronizes data to the underlying database file.""" - if self.flag[0] != 'r': + if self.flag[0] != "r": with self.write_mutex: - if hasattr(self.db, 'sync'): + if hasattr(self.db, "sync"): self.db.sync() else: # pragma: no cover # we don't cover this branch anymore as ndbm (oracle) is not packaged @@ -2081,8 +2116,8 @@ def flush(self): # https://github.com/conda-forge/staged-recipes/issues/4476 # fall-back, close and re-open, needed for ndbm flag = self.flag - if flag[0] == 'n': - flag = 'c' + flag[1:] # don't clobber an existing database + if flag[0] == "n": + flag = "c" + flag[1:] # don't clobber an existing database self.db.close() # noinspection PyArgumentList self.db = self.open(self.path, flag, self.mode, **self.open_kwargs) @@ -2113,11 +2148,12 @@ def __delitem__(self, key): def __eq__(self, other): return ( - isinstance(other, DBMStore) and - self.path == other.path and + isinstance(other, DBMStore) + and self.path == other.path + and # allow flag and mode to differ - self.open == other.open and - self.open_kwargs == other.open_kwargs + self.open == other.open + and self.open_kwargs == other.open_kwargs ) def keys(self): @@ -2200,28 +2236,28 @@ def __init__(self, path, buffers=True, dimension_separator=None, **kwargs): # set default memory map size to something larger than the lmdb default, which is # very likely to be too small for any moderate array (logic copied from zict) - map_size = (2**40 if sys.maxsize >= 2**32 else 2**28) - kwargs.setdefault('map_size', map_size) + map_size = 2**40 if sys.maxsize >= 2**32 else 2**28 + kwargs.setdefault("map_size", map_size) # don't initialize buffers to zero by default, shouldn't be necessary - kwargs.setdefault('meminit', False) + kwargs.setdefault("meminit", False) # decide whether to use the writemap option based on the operating system's # support for sparse files - writemap requires sparse file support otherwise # the whole# `map_size` may be reserved up front on disk (logic copied from zict) - writemap = sys.platform.startswith('linux') - kwargs.setdefault('writemap', writemap) + writemap = sys.platform.startswith("linux") + kwargs.setdefault("writemap", writemap) # decide options for when data are flushed to disk - choose to delay syncing # data to filesystem, otherwise pay a large performance penalty (zict also does # this) - kwargs.setdefault('metasync', False) - kwargs.setdefault('sync', False) - kwargs.setdefault('map_async', False) + kwargs.setdefault("metasync", False) + kwargs.setdefault("sync", False) + kwargs.setdefault("map_async", False) # set default option for number of cached transactions max_spare_txns = multiprocessing.cpu_count() - kwargs.setdefault('max_spare_txns', max_spare_txns) + kwargs.setdefault("max_spare_txns", max_spare_txns) # normalize path path = os.path.abspath(path) @@ -2312,7 +2348,7 @@ def __iter__(self): return self.keys() def __len__(self): - return self.db.stat()['entries'] + return self.db.stat()["entries"] class LRUStoreCache(Store): @@ -2364,14 +2400,30 @@ def __init__(self, store: StoreLike, max_size: int): self.hits = self.misses = 0 def __getstate__(self): - return (self._store, self._max_size, self._current_size, self._keys_cache, - self._contains_cache, self._listdir_cache, self._values_cache, self.hits, - self.misses) + return ( + self._store, + self._max_size, + self._current_size, + self._keys_cache, + self._contains_cache, + self._listdir_cache, + self._values_cache, + self.hits, + self.misses, + ) def __setstate__(self, state): - (self._store, self._max_size, self._current_size, self._keys_cache, - self._contains_cache, self._listdir_cache, self._values_cache, self.hits, - self.misses) = state + ( + self._store, + self._max_size, + self._current_size, + self._keys_cache, + self._contains_cache, + self._listdir_cache, + self._values_cache, + self.hits, + self.misses, + ) = state self._mutex = Lock() def __len__(self): @@ -2536,7 +2588,7 @@ def __init__(self, path, dimension_separator=None, **kwargs): self._dimension_separator = dimension_separator # normalize path - if path != ':memory:': + if path != ":memory:": path = os.path.abspath(path) # store properties @@ -2560,7 +2612,7 @@ def __init__(self, path, dimension_separator=None, **kwargs): detect_types=0, isolation_level=None, check_same_thread=check_same_thread, - **self.kwargs + **self.kwargs, ) # handle keys as `str`s @@ -2571,13 +2623,11 @@ def __init__(self, path, dimension_separator=None, **kwargs): # initialize database with our table if missing with self.lock: - self.cursor.execute( - 'CREATE TABLE IF NOT EXISTS zarr(k TEXT PRIMARY KEY, v BLOB)' - ) + self.cursor.execute("CREATE TABLE IF NOT EXISTS zarr(k TEXT PRIMARY KEY, v BLOB)") def __getstate__(self): - if self.path == ':memory:': - raise PicklingError('Cannot pickle in-memory SQLite databases') + if self.path == ":memory:": + raise PicklingError("Cannot pickle in-memory SQLite databases") return self.path, self.kwargs def __setstate__(self, state): @@ -2592,8 +2642,8 @@ def close(self): self.db.close() def __getitem__(self, key): - value = self.cursor.execute('SELECT v FROM zarr WHERE (k = ?)', (key,)) - for v, in value: + value = self.cursor.execute("SELECT v FROM zarr WHERE (k = ?)", (key,)) + for (v,) in value: return v raise KeyError(key) @@ -2602,38 +2652,36 @@ def __setitem__(self, key, value): def __delitem__(self, key): with self.lock: - self.cursor.execute('DELETE FROM zarr WHERE (k = ?)', (key,)) + self.cursor.execute("DELETE FROM zarr WHERE (k = ?)", (key,)) if self.cursor.rowcount < 1: raise KeyError(key) def __contains__(self, key): - cs = self.cursor.execute( - 'SELECT COUNT(*) FROM zarr WHERE (k = ?)', (key,) - ) - for has, in cs: + cs = self.cursor.execute("SELECT COUNT(*) FROM zarr WHERE (k = ?)", (key,)) + for (has,) in cs: has = bool(has) return has def items(self): - kvs = self.cursor.execute('SELECT k, v FROM zarr') + kvs = self.cursor.execute("SELECT k, v FROM zarr") yield from kvs def keys(self): - ks = self.cursor.execute('SELECT k FROM zarr') - for k, in ks: + ks = self.cursor.execute("SELECT k FROM zarr") + for (k,) in ks: yield k def values(self): - vs = self.cursor.execute('SELECT v FROM zarr') - for v, in vs: + vs = self.cursor.execute("SELECT v FROM zarr") + for (v,) in vs: yield v def __iter__(self): return self.keys() def __len__(self): - cs = self.cursor.execute('SELECT COUNT(*) FROM zarr') - for c, in cs: + cs = self.cursor.execute("SELECT COUNT(*) FROM zarr") + for (c,) in cs: return c def update(self, *args, **kwargs): @@ -2648,19 +2696,21 @@ def update(self, *args, **kwargs): kv_list.append((k, v)) with self.lock: - self.cursor.executemany('REPLACE INTO zarr VALUES (?, ?)', kv_list) + self.cursor.executemany("REPLACE INTO zarr VALUES (?, ?)", kv_list) def listdir(self, path=None): path = normalize_storage_path(path) - sep = '_' if path == '' else '/' + sep = "_" if path == "" else "/" keys = self.cursor.execute( - ''' + """ SELECT DISTINCT SUBSTR(m, 0, INSTR(m, "/")) AS l FROM ( SELECT LTRIM(SUBSTR(k, LENGTH(?) + 1), "/") || "/" AS m FROM zarr WHERE k LIKE (? || "{sep}%") ) ORDER BY l ASC - '''.format(sep=sep), - (path, path) + """.format( + sep=sep + ), + (path, path), ) keys = list(map(operator.itemgetter(0), keys)) return keys @@ -2668,35 +2718,33 @@ def listdir(self, path=None): def getsize(self, path=None): path = normalize_storage_path(path) size = self.cursor.execute( - ''' + """ SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr WHERE k LIKE (? || "%") AND 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") - ''', - (path, path) + """, + (path, path), ) - for s, in size: + for (s,) in size: return s def rmdir(self, path=None): path = normalize_storage_path(path) if path: with self.lock: - self.cursor.execute( - 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (path,) - ) + self.cursor.execute('DELETE FROM zarr WHERE k LIKE (? || "/%")', (path,)) else: self.clear() def clear(self): with self.lock: self.cursor.executescript( - ''' + """ BEGIN TRANSACTION; DROP TABLE zarr; CREATE TABLE zarr(k TEXT PRIMARY KEY, v BLOB); COMMIT TRANSACTION; - ''' + """ ) @@ -2725,11 +2773,16 @@ class MongoDBStore(Store): """ - _key = 'key' - _value = 'value' + _key = "key" + _value = "value" - def __init__(self, database='mongodb_zarr', collection='zarr_collection', - dimension_separator=None, **kwargs): + def __init__( + self, + database="mongodb_zarr", + collection="zarr_collection", + dimension_separator=None, + **kwargs, + ): import pymongo self._database = database @@ -2751,9 +2804,9 @@ def __getitem__(self, key): def __setitem__(self, key, value): value = ensure_bytes(value) - self.collection.replace_one({self._key: key}, - {self._key: key, self._value: value}, - upsert=True) + self.collection.replace_one( + {self._key: key}, {self._key: key, self._value: value}, upsert=True + ) def __delitem__(self, key): result = self.collection.delete_many({self._key: key}) @@ -2801,8 +2854,10 @@ class RedisStore(Store): Keyword arguments passed through to the `redis.Redis` function. """ - def __init__(self, prefix='zarr', dimension_separator=None, **kwargs): + + def __init__(self, prefix="zarr", dimension_separator=None, **kwargs): import redis + self._prefix = prefix self._kwargs = kwargs self._dimension_separator = dimension_separator @@ -2810,7 +2865,7 @@ def __init__(self, prefix='zarr', dimension_separator=None, **kwargs): self.client = redis.Redis(**kwargs) def _key(self, key): - return '{prefix}:{key}'.format(prefix=self._prefix, key=key) + return "{prefix}:{key}".format(prefix=self._prefix, key=key) def __getitem__(self, key): return self.client[self._key(key)] @@ -2825,9 +2880,8 @@ def __delitem__(self, key): raise KeyError(key) def keylist(self): - offset = len(self._key('')) # length of prefix - return [key[offset:].decode('utf-8') - for key in self.client.keys(self._key('*'))] + offset = len(self._key("")) # length of prefix + return [key[offset:].decode("utf-8") for key in self.client.keys(self._key("*"))] def keys(self): yield from self.keylist() @@ -2893,10 +2947,11 @@ def __init__(self, store: StoreLike, metadata_key=".zmetadata"): meta = json_loads(self.store[metadata_key]) # check format of consolidated metadata - consolidated_format = meta.get('zarr_consolidated_format', None) + consolidated_format = meta.get("zarr_consolidated_format", None) if consolidated_format != 1: - raise MetadataError('unsupported zarr consolidated metadata format: %s' % - consolidated_format) + raise MetadataError( + "unsupported zarr consolidated metadata format: %s" % consolidated_format + ) # decode metadata self.meta_store: Store = KVStore(meta["metadata"]) diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index d6151b4f29..7dd5b340a2 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -24,31 +24,30 @@ def _init_store(version): return KVStoreV3(dict()) -class TestAttributes(): - +class TestAttributes: def init_attributes(self, store, read_only=False, cache=True, zarr_version=2): - root = '.z' if zarr_version == 2 else meta_root - return Attributes(store, key=root + 'attrs', read_only=read_only, cache=cache) + root = ".z" if zarr_version == 2 else meta_root + return Attributes(store, key=root + "attrs", read_only=read_only, cache=cache) def test_storage(self, zarr_version): store = _init_store(zarr_version) - root = '.z' if zarr_version == 2 else meta_root - attrs_key = root + 'attrs' + root = ".z" if zarr_version == 2 else meta_root + attrs_key = root + "attrs" a = Attributes(store=store, key=attrs_key) assert isinstance(a.store, KVStore) - assert 'foo' not in a - assert 'bar' not in a + assert "foo" not in a + assert "bar" not in a assert dict() == a.asdict() - a['foo'] = 'bar' - a['baz'] = 42 + a["foo"] = "bar" + a["baz"] = 42 assert attrs_key in store assert isinstance(store[attrs_key], bytes) - d = json.loads(str(store[attrs_key], 'utf-8')) + d = json.loads(str(store[attrs_key], "utf-8")) if zarr_version == 3: - d = d['attributes'] - assert dict(foo='bar', baz=42) == d + d = d["attributes"] + assert dict(foo="bar", baz=42) == d def test_utf8_encoding(self, zarr_version): @@ -65,42 +64,42 @@ def test_utf8_encoding(self, zarr_version): # fixture data fixture = group(store=DirectoryStore(str(fixdir))) - assert fixture['utf8attrs'].attrs.asdict() == dict(foo='た') + assert fixture["utf8attrs"].attrs.asdict() == dict(foo="た") def test_get_set_del_contains(self, zarr_version): store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) - assert 'foo' not in a - a['foo'] = 'bar' - a['baz'] = 42 - assert 'foo' in a - assert 'baz' in a - assert 'bar' == a['foo'] - assert 42 == a['baz'] - del a['foo'] - assert 'foo' not in a + assert "foo" not in a + a["foo"] = "bar" + a["baz"] = 42 + assert "foo" in a + assert "baz" in a + assert "bar" == a["foo"] + assert 42 == a["baz"] + del a["foo"] + assert "foo" not in a with pytest.raises(KeyError): # noinspection PyStatementEffect - a['foo'] + a["foo"] def test_update_put(self, zarr_version): store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) - assert 'foo' not in a - assert 'bar' not in a - assert 'baz' not in a + assert "foo" not in a + assert "bar" not in a + assert "baz" not in a - a.update(foo='spam', bar=42, baz=4.2) - assert a['foo'] == 'spam' - assert a['bar'] == 42 - assert a['baz'] == 4.2 + a.update(foo="spam", bar=42, baz=4.2) + assert a["foo"] == "spam" + assert a["bar"] == 42 + assert a["baz"] == 4.2 - a.put(dict(foo='eggs', bar=84)) - assert a['foo'] == 'eggs' - assert a['bar'] == 84 - assert 'baz' not in a + a.put(dict(foo="eggs", bar=84)) + assert a["foo"] == "eggs" + assert a["bar"] == 84 + assert "baz" not in a def test_iterators(self, zarr_version): @@ -112,182 +111,182 @@ def test_iterators(self, zarr_version): assert set() == set(a.values()) assert set() == set(a.items()) - a['foo'] = 'bar' - a['baz'] = 42 + a["foo"] = "bar" + a["baz"] = 42 assert 2 == len(a) - assert {'foo', 'baz'} == set(a) - assert {'foo', 'baz'} == set(a.keys()) - assert {'bar', 42} == set(a.values()) - assert {('foo', 'bar'), ('baz', 42)} == set(a.items()) + assert {"foo", "baz"} == set(a) + assert {"foo", "baz"} == set(a.keys()) + assert {"bar", 42} == set(a.values()) + assert {("foo", "bar"), ("baz", 42)} == set(a.items()) def test_read_only(self, zarr_version): store = _init_store(zarr_version) a = self.init_attributes(store, read_only=True, zarr_version=zarr_version) if zarr_version == 2: - store['.zattrs'] = json.dumps(dict(foo='bar', baz=42)).encode('ascii') + store[".zattrs"] = json.dumps(dict(foo="bar", baz=42)).encode("ascii") else: - store['meta/root/attrs'] = json.dumps( - dict(attributes=dict(foo='bar', baz=42)) - ).encode('ascii') - assert a['foo'] == 'bar' - assert a['baz'] == 42 + store["meta/root/attrs"] = json.dumps(dict(attributes=dict(foo="bar", baz=42))).encode( + "ascii" + ) + assert a["foo"] == "bar" + assert a["baz"] == 42 with pytest.raises(PermissionError): - a['foo'] = 'quux' + a["foo"] = "quux" with pytest.raises(PermissionError): - del a['foo'] + del a["foo"] with pytest.raises(PermissionError): - a.update(foo='quux') + a.update(foo="quux") def test_key_completions(self, zarr_version): store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) d = a._ipython_key_completions_() - assert 'foo' not in d - assert '123' not in d - assert 'baz' not in d - assert 'asdf;' not in d - a['foo'] = 42 - a['123'] = 4.2 - a['asdf;'] = 'ghjkl;' + assert "foo" not in d + assert "123" not in d + assert "baz" not in d + assert "asdf;" not in d + a["foo"] = 42 + a["123"] = 4.2 + a["asdf;"] = "ghjkl;" d = a._ipython_key_completions_() - assert 'foo' in d - assert '123' in d - assert 'asdf;' in d - assert 'baz' not in d + assert "foo" in d + assert "123" in d + assert "asdf;" in d + assert "baz" not in d def test_caching_on(self, zarr_version): # caching is turned on by default # setup store store = CountingDict() if zarr_version == 2 else CountingDictV3() - attrs_key = '.zattrs' if zarr_version == 2 else 'meta/root/attrs' - assert 0 == store.counter['__getitem__', attrs_key] - assert 0 == store.counter['__setitem__', attrs_key] + attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" + assert 0 == store.counter["__getitem__", attrs_key] + assert 0 == store.counter["__setitem__", attrs_key] if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') - assert 0 == store.counter['__getitem__', attrs_key] - assert 1 == store.counter['__setitem__', attrs_key] + store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") + assert 0 == store.counter["__getitem__", attrs_key] + assert 1 == store.counter["__setitem__", attrs_key] # setup attributes a = self.init_attributes(store, zarr_version=zarr_version) # test __getitem__ causes all attributes to be cached - assert a['foo'] == 'xxx' - assert 1 == store.counter['__getitem__', attrs_key] - assert a['bar'] == 42 - assert 1 == store.counter['__getitem__', attrs_key] - assert a['foo'] == 'xxx' - assert 1 == store.counter['__getitem__', attrs_key] + assert a["foo"] == "xxx" + assert 1 == store.counter["__getitem__", attrs_key] + assert a["bar"] == 42 + assert 1 == store.counter["__getitem__", attrs_key] + assert a["foo"] == "xxx" + assert 1 == store.counter["__getitem__", attrs_key] # test __setitem__ updates the cache - a['foo'] = 'yyy' + a["foo"] = "yyy" get_cnt = 2 if zarr_version == 2 else 3 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 2 == store.counter['__setitem__', attrs_key] - assert a['foo'] == 'yyy' - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 2 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 2 == store.counter["__setitem__", attrs_key] + assert a["foo"] == "yyy" + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 2 == store.counter["__setitem__", attrs_key] # test update() updates the cache - a.update(foo='zzz', bar=84) + a.update(foo="zzz", bar=84) get_cnt = 3 if zarr_version == 2 else 5 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] - assert a['foo'] == 'zzz' - assert a['bar'] == 84 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] + assert a["foo"] == "zzz" + assert a["bar"] == 84 + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] # test __contains__ uses the cache - assert 'foo' in a - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] - assert 'spam' not in a - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] + assert "foo" in a + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] + assert "spam" not in a + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] # test __delitem__ updates the cache - del a['bar'] + del a["bar"] get_cnt = 4 if zarr_version == 2 else 7 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 4 == store.counter['__setitem__', attrs_key] - assert 'bar' not in a - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 4 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 4 == store.counter["__setitem__", attrs_key] + assert "bar" not in a + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 4 == store.counter["__setitem__", attrs_key] # test refresh() if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') - assert get_cnt == store.counter['__getitem__', attrs_key] + store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") + assert get_cnt == store.counter["__getitem__", attrs_key] a.refresh() get_cnt = 5 if zarr_version == 2 else 8 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert a['foo'] == 'xxx' - assert get_cnt == store.counter['__getitem__', attrs_key] - assert a['bar'] == 42 - assert get_cnt == store.counter['__getitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert a["foo"] == "xxx" + assert get_cnt == store.counter["__getitem__", attrs_key] + assert a["bar"] == 42 + assert get_cnt == store.counter["__getitem__", attrs_key] def test_caching_off(self, zarr_version): # setup store store = CountingDict() if zarr_version == 2 else CountingDictV3() - attrs_key = '.zattrs' if zarr_version == 2 else 'meta/root/attrs' - assert 0 == store.counter['__getitem__', attrs_key] - assert 0 == store.counter['__setitem__', attrs_key] + attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" + assert 0 == store.counter["__getitem__", attrs_key] + assert 0 == store.counter["__setitem__", attrs_key] if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') - assert 0 == store.counter['__getitem__', attrs_key] - assert 1 == store.counter['__setitem__', attrs_key] + store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") + assert 0 == store.counter["__getitem__", attrs_key] + assert 1 == store.counter["__setitem__", attrs_key] # setup attributes a = self.init_attributes(store, cache=False, zarr_version=zarr_version) # test __getitem__ - assert a['foo'] == 'xxx' - assert 1 == store.counter['__getitem__', attrs_key] - assert a['bar'] == 42 - assert 2 == store.counter['__getitem__', attrs_key] - assert a['foo'] == 'xxx' - assert 3 == store.counter['__getitem__', attrs_key] + assert a["foo"] == "xxx" + assert 1 == store.counter["__getitem__", attrs_key] + assert a["bar"] == 42 + assert 2 == store.counter["__getitem__", attrs_key] + assert a["foo"] == "xxx" + assert 3 == store.counter["__getitem__", attrs_key] # test __setitem__ - a['foo'] = 'yyy' + a["foo"] = "yyy" get_cnt = 4 if zarr_version == 2 else 5 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 2 == store.counter['__setitem__', attrs_key] - assert a['foo'] == 'yyy' + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 2 == store.counter["__setitem__", attrs_key] + assert a["foo"] == "yyy" get_cnt = 5 if zarr_version == 2 else 6 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 2 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 2 == store.counter["__setitem__", attrs_key] # test update() - a.update(foo='zzz', bar=84) + a.update(foo="zzz", bar=84) get_cnt = 6 if zarr_version == 2 else 8 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] - assert a['foo'] == 'zzz' - assert a['bar'] == 84 + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] + assert a["foo"] == "zzz" + assert a["bar"] == 84 get_cnt = 8 if zarr_version == 2 else 10 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] # test __contains__ - assert 'foo' in a + assert "foo" in a get_cnt = 9 if zarr_version == 2 else 11 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] - assert 'spam' not in a + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] + assert "spam" not in a get_cnt = 10 if zarr_version == 2 else 12 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] def test_wrong_keys(self, zarr_version): store = _init_store(zarr_version) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 45ed9c3e11..389ce90a9d 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -45,17 +45,17 @@ ) from zarr.tests.util import have_fsspec -_VERSIONS = ((2, 3) if v3_api_available else (2, )) +_VERSIONS = (2, 3) if v3_api_available else (2,) def _init_creation_kwargs(zarr_version): - kwargs = {'zarr_version': zarr_version} + kwargs = {"zarr_version": zarr_version} if zarr_version == 3: - kwargs['path'] = 'dataset' + kwargs["path"] = "dataset" return kwargs -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_array(path_type, zarr_version): store = tempfile.mkdtemp() @@ -64,24 +64,24 @@ def test_open_array(path_type, zarr_version): kwargs = _init_creation_kwargs(zarr_version) # open array, create if doesn't exist - z = open(store, mode='a', shape=100, **kwargs) + z = open(store, mode="a", shape=100, **kwargs) assert isinstance(z, Array) assert z.shape == (100,) # open array, overwrite - z = open(store, mode='w', shape=200, **kwargs) + z = open(store, mode="w", shape=200, **kwargs) assert isinstance(z, Array) assert z.shape == (200,) # open array, read-only - z = open(store, mode='r', **kwargs) + z = open(store, mode="r", **kwargs) assert isinstance(z, Array) assert z.shape == (200,) assert z.read_only # path not found with pytest.raises(ValueError): - open('doesnotexist', mode='r') + open("doesnotexist", mode="r") @pytest.mark.parametrize("zarr_version", _VERSIONS) @@ -93,18 +93,18 @@ def test_open_group(path_type, zarr_version): kwargs = _init_creation_kwargs(zarr_version) # open group, create if doesn't exist - g = open(store, mode='a', **kwargs) - g.create_group('foo') + g = open(store, mode="a", **kwargs) + g.create_group("foo") assert isinstance(g, Group) - assert 'foo' in g + assert "foo" in g # open group, overwrite - g = open(store, mode='w', **kwargs) + g = open(store, mode="w", **kwargs) assert isinstance(g, Group) - assert 'foo' not in g + assert "foo" not in g # open group, read-only - g = open(store, mode='r', **kwargs) + g = open(store, mode="r", **kwargs) assert isinstance(g, Group) assert g.read_only @@ -113,13 +113,13 @@ def test_open_group(path_type, zarr_version): def test_save_errors(zarr_version): with pytest.raises(ValueError): # no arrays provided - save_group('data/group.zarr', zarr_version=zarr_version) + save_group("data/group.zarr", zarr_version=zarr_version) with pytest.raises(TypeError): # no array provided - save_array('data/group.zarr', zarr_version=zarr_version) + save_array("data/group.zarr", zarr_version=zarr_version) with pytest.raises(ValueError): # no arrays provided - save('data/group.zarr', zarr_version=zarr_version) + save("data/group.zarr", zarr_version=zarr_version) @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") @@ -128,12 +128,12 @@ def test_zarr_v3_save_multiple_unnamed(): y = np.zeros(8) store = KVStoreV3(dict()) # no path provided - save_group(store, x, y, path='dataset', zarr_version=3) + save_group(store, x, y, path="dataset", zarr_version=3) # names become arr_{i} for unnamed *args - assert data_root + 'dataset/arr_0/c0' in store - assert data_root + 'dataset/arr_1/c0' in store - assert meta_root + 'dataset/arr_0.array.json' in store - assert meta_root + 'dataset/arr_1.array.json' in store + assert data_root + "dataset/arr_0/c0" in store + assert data_root + "dataset/arr_1/c0" in store + assert meta_root + "dataset/arr_0.array.json" in store + assert meta_root + "dataset/arr_1.array.json" in store @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") @@ -141,47 +141,47 @@ def test_zarr_v3_save_errors(): x = np.ones(8) with pytest.raises(ValueError): # no path provided - save_group('data/group.zr3', x, zarr_version=3) + save_group("data/group.zr3", x, zarr_version=3) with pytest.raises(ValueError): # no path provided - save_array('data/group.zr3', x, zarr_version=3) + save_array("data/group.zr3", x, zarr_version=3) with pytest.raises(ValueError): # no path provided - save('data/group.zr3', x, zarr_version=3) + save("data/group.zr3", x, zarr_version=3) @pytest.mark.parametrize("zarr_version", _VERSIONS) def test_lazy_loader(zarr_version): foo = np.arange(100) bar = np.arange(100, 0, -1) - store = 'data/group.zarr' if zarr_version == 2 else 'data/group.zr3' + store = "data/group.zarr" if zarr_version == 2 else "data/group.zr3" kwargs = _init_creation_kwargs(zarr_version) save(store, foo=foo, bar=bar, **kwargs) loader = load(store, **kwargs) - assert 'foo' in loader - assert 'bar' in loader - assert 'baz' not in loader + assert "foo" in loader + assert "bar" in loader + assert "baz" not in loader assert len(loader) == 2 - assert sorted(loader) == ['bar', 'foo'] - assert_array_equal(foo, loader['foo']) - assert_array_equal(bar, loader['bar']) - assert 'LazyLoader: ' in repr(loader) + assert sorted(loader) == ["bar", "foo"] + assert_array_equal(foo, loader["foo"]) + assert_array_equal(bar, loader["bar"]) + assert "LazyLoader: " in repr(loader) @pytest.mark.parametrize("zarr_version", _VERSIONS) def test_load_array(zarr_version): foo = np.arange(100) bar = np.arange(100, 0, -1) - store = 'data/group.zarr' if zarr_version == 2 else 'data/group.zr3' + store = "data/group.zarr" if zarr_version == 2 else "data/group.zr3" kwargs = _init_creation_kwargs(zarr_version) save(store, foo=foo, bar=bar, **kwargs) # can also load arrays directly into a numpy array - for array_name in ['foo', 'bar']: - array_path = 'dataset/' + array_name if zarr_version == 3 else array_name + for array_name in ["foo", "bar"]: + array_path = "dataset/" + array_name if zarr_version == 3 else array_name array = load(store, path=array_path, zarr_version=zarr_version) assert isinstance(array, np.ndarray) - if array_name == 'foo': + if array_name == "foo": assert_array_equal(foo, array) else: assert_array_equal(bar, array) @@ -191,27 +191,25 @@ def test_load_array(zarr_version): def test_tree(zarr_version): kwargs = _init_creation_kwargs(zarr_version) g1 = zarr.group(**kwargs) - g1.create_group('foo') - g3 = g1.create_group('bar') - g3.create_group('baz') - g5 = g3.create_group('qux') - g5.create_dataset('baz', shape=100, chunks=10) + g1.create_group("foo") + g3 = g1.create_group("bar") + g3.create_group("baz") + g5 = g3.create_group("qux") + g5.create_dataset("baz", shape=100, chunks=10) assert repr(zarr.tree(g1)) == repr(g1.tree()) assert str(zarr.tree(g1)) == str(g1.tree()) -@pytest.mark.parametrize('zarr_version', _VERSIONS) -@pytest.mark.parametrize('stores_from_path', [False, True]) +@pytest.mark.parametrize("zarr_version", _VERSIONS) +@pytest.mark.parametrize("stores_from_path", [False, True]) @pytest.mark.parametrize( - 'with_chunk_store,listable', + "with_chunk_store,listable", [(False, True), (True, True), (False, False)], - ids=['default-listable', 'with_chunk_store-listable', 'default-unlistable'] + ids=["default-listable", "with_chunk_store-listable", "default-unlistable"], ) -def test_consolidate_metadata(with_chunk_store, - zarr_version, - listable, - monkeypatch, - stores_from_path): +def test_consolidate_metadata( + with_chunk_store, zarr_version, listable, monkeypatch, stores_from_path +): # setup initial data if stores_from_path: @@ -222,7 +220,7 @@ def test_consolidate_metadata(with_chunk_store, atexit.register(atexit_rmtree, chunk_store) else: chunk_store = None - version_kwarg = {'zarr_version': zarr_version} + version_kwarg = {"zarr_version": zarr_version} else: if zarr_version == 2: store = MemoryStore() @@ -231,19 +229,19 @@ def test_consolidate_metadata(with_chunk_store, store = MemoryStoreV3() chunk_store = MemoryStoreV3() if with_chunk_store else None version_kwarg = {} - path = 'dataset' if zarr_version == 3 else None + path = "dataset" if zarr_version == 3 else None z = group(store, chunk_store=chunk_store, path=path, **version_kwarg) # Reload the actual store implementation in case str store_to_copy = z.store - z.create_group('g1') - g2 = z.create_group('g2') - g2.attrs['hello'] = 'world' - arr = g2.create_dataset('arr', shape=(20, 20), chunks=(5, 5), dtype='f8') + z.create_group("g1") + g2 = z.create_group("g2") + g2.attrs["hello"] = "world" + arr = g2.create_dataset("arr", shape=(20, 20), chunks=(5, 5), dtype="f8") assert 16 == arr.nchunks assert 0 == arr.nchunks_initialized - arr.attrs['data'] = 1 + arr.attrs["data"] = 1 arr[:] = 1.0 assert 16 == arr.nchunks_initialized @@ -259,31 +257,35 @@ def test_consolidate_metadata(with_chunk_store, consolidate_metadata(store_class, path=None) with pytest.raises(ValueError): - consolidate_metadata(store_class, path='') + consolidate_metadata(store_class, path="") # perform consolidation out = consolidate_metadata(store_class, path=path) assert isinstance(out, Group) - assert ['g1', 'g2'] == list(out) + assert ["g1", "g2"] == list(out) if not stores_from_path: if zarr_version == 2: assert isinstance(out._store, ConsolidatedMetadataStore) - assert '.zmetadata' in store - meta_keys = ['.zgroup', - 'g1/.zgroup', - 'g2/.zgroup', - 'g2/.zattrs', - 'g2/arr/.zarray', - 'g2/arr/.zattrs'] + assert ".zmetadata" in store + meta_keys = [ + ".zgroup", + "g1/.zgroup", + "g2/.zgroup", + "g2/.zattrs", + "g2/arr/.zarray", + "g2/arr/.zattrs", + ] else: assert isinstance(out._store, ConsolidatedMetadataStoreV3) - assert 'meta/root/consolidated/.zmetadata' in store - meta_keys = ['zarr.json', - meta_root + 'dataset.group.json', - meta_root + 'dataset/g1.group.json', - meta_root + 'dataset/g2.group.json', - meta_root + 'dataset/g2/arr.array.json', - 'meta/root/consolidated.group.json'] + assert "meta/root/consolidated/.zmetadata" in store + meta_keys = [ + "zarr.json", + meta_root + "dataset.group.json", + meta_root + "dataset/g1.group.json", + meta_root + "dataset/g2.group.json", + meta_root + "dataset/g2/arr.array.json", + "meta/root/consolidated.group.json", + ] for key in meta_keys: del store[key] @@ -307,9 +309,9 @@ def test_consolidate_metadata(with_chunk_store, # open consolidated z2 = open_consolidated(store_to_open, chunk_store=chunk_store, path=path, **version_kwarg) - assert ['g1', 'g2'] == list(z2) - assert 'world' == z2.g2.attrs['hello'] - assert 1 == z2.g2.arr.attrs['data'] + assert ["g1", "g2"] == list(z2) + assert "world" == z2.g2.attrs["hello"] + assert 1 == z2.g2.arr.attrs["data"] assert (z2.g2.arr[:] == 1.0).all() assert 16 == z2.g2.arr.nchunks if listable: @@ -332,32 +334,32 @@ def test_consolidate_metadata(with_chunk_store, if zarr_version == 2: cmd = ConsolidatedMetadataStore(store) with pytest.raises(PermissionError): - del cmd['.zgroup'] + del cmd[".zgroup"] with pytest.raises(PermissionError): - cmd['.zgroup'] = None + cmd[".zgroup"] = None else: cmd = ConsolidatedMetadataStoreV3(store) with pytest.raises(PermissionError): - del cmd[meta_root + 'dataset.group.json'] + del cmd[meta_root + "dataset.group.json"] with pytest.raises(PermissionError): - cmd[meta_root + 'dataset.group.json'] = None + cmd[meta_root + "dataset.group.json"] = None # test getsize on the store assert isinstance(getsize(cmd), Integral) # test new metadata are not writeable with pytest.raises(PermissionError): - z2.create_group('g3') + z2.create_group("g3") with pytest.raises(PermissionError): - z2.create_dataset('spam', shape=42, chunks=7, dtype='i4') + z2.create_dataset("spam", shape=42, chunks=7, dtype="i4") with pytest.raises(PermissionError): - del z2['g2'] + del z2["g2"] # test consolidated metadata are not writeable with pytest.raises(PermissionError): - z2.g2.attrs['hello'] = 'universe' + z2.g2.attrs["hello"] = "universe" with pytest.raises(PermissionError): - z2.g2.arr.attrs['foo'] = 'bar' + z2.g2.arr.attrs["foo"] = "bar" # test the data are writeable z2.g2.arr[:] = 2 @@ -365,24 +367,31 @@ def test_consolidate_metadata(with_chunk_store, # test invalid modes with pytest.raises(ValueError): - open_consolidated(store, chunk_store=chunk_store, mode='a', path=path) + open_consolidated(store, chunk_store=chunk_store, mode="a", path=path) with pytest.raises(ValueError): - open_consolidated(store, chunk_store=chunk_store, mode='w', path=path) + open_consolidated(store, chunk_store=chunk_store, mode="w", path=path) with pytest.raises(ValueError): - open_consolidated(store, chunk_store=chunk_store, mode='w-', path=path) + open_consolidated(store, chunk_store=chunk_store, mode="w-", path=path) # make sure keyword arguments are passed through without error open_consolidated( - store, chunk_store=chunk_store, path=path, cache_attrs=True, synchronizer=None, + store, + chunk_store=chunk_store, + path=path, + cache_attrs=True, + synchronizer=None, **version_kwarg, ) -@pytest.mark.parametrize("options", ( - {"dimension_separator": "/"}, - {"dimension_separator": "."}, - {"dimension_separator": None}, -)) +@pytest.mark.parametrize( + "options", + ( + {"dimension_separator": "/"}, + {"dimension_separator": "."}, + {"dimension_separator": None}, + ), +) def test_save_array_separator(tmpdir, options): data = np.arange(6).reshape((3, 2)) url = tmpdir.join("test.zarr") @@ -395,9 +404,9 @@ class TestCopyStore(unittest.TestCase): def setUp(self): source = dict() - source['foo'] = b'xxx' - source['bar/baz'] = b'yyy' - source['bar/qux'] = b'zzz' + source["foo"] = b"xxx" + source["bar/baz"] = b"yyy" + source["bar/qux"] = b"zzz" self.source = source def _get_dest_store(self): @@ -414,13 +423,13 @@ def test_no_paths(self): def test_source_path(self): source = self.source # paths should be normalized - for source_path in 'bar', 'bar/', '/bar', '/bar/': + for source_path in "bar", "bar/", "/bar", "/bar/": dest = self._get_dest_store() copy_store(source, dest, source_path=source_path) assert 2 == len(dest) for key in source: - if key.startswith('bar/'): - dest_key = key.split('bar/')[1] + if key.startswith("bar/"): + dest_key = key.split("bar/")[1] assert source[key] == dest[dest_key] else: assert key not in dest @@ -428,64 +437,63 @@ def test_source_path(self): def test_dest_path(self): source = self.source # paths should be normalized - for dest_path in 'new', 'new/', '/new', '/new/': + for dest_path in "new", "new/", "/new", "/new/": dest = self._get_dest_store() copy_store(source, dest, dest_path=dest_path) assert len(source) == len(dest) for key in source: if self._version == 3: - dest_key = key[:10] + 'new/' + key[10:] + dest_key = key[:10] + "new/" + key[10:] else: - dest_key = 'new/' + key + dest_key = "new/" + key assert source[key] == dest[dest_key] def test_source_dest_path(self): source = self.source # paths should be normalized - for source_path in 'bar', 'bar/', '/bar', '/bar/': - for dest_path in 'new', 'new/', '/new', '/new/': + for source_path in "bar", "bar/", "/bar", "/bar/": + for dest_path in "new", "new/", "/new", "/new/": dest = self._get_dest_store() - copy_store(source, dest, source_path=source_path, - dest_path=dest_path) + copy_store(source, dest, source_path=source_path, dest_path=dest_path) assert 2 == len(dest) for key in source: - if key.startswith('bar/'): - dest_key = 'new/' + key.split('bar/')[1] + if key.startswith("bar/"): + dest_key = "new/" + key.split("bar/")[1] assert source[key] == dest[dest_key] else: assert key not in dest - assert ('new/' + key) not in dest + assert ("new/" + key) not in dest def test_excludes_includes(self): source = self.source # single excludes dest = self._get_dest_store() - excludes = 'f.*' + excludes = "f.*" copy_store(source, dest, excludes=excludes) assert len(dest) == 2 - root = '' if self._version == 2 else meta_root - assert root + 'foo' not in dest + root = "" if self._version == 2 else meta_root + assert root + "foo" not in dest # multiple excludes dest = self._get_dest_store() - excludes = 'b.z', '.*x' + excludes = "b.z", ".*x" copy_store(source, dest, excludes=excludes) assert len(dest) == 1 - assert root + 'foo' in dest - assert root + 'bar/baz' not in dest - assert root + 'bar/qux' not in dest + assert root + "foo" in dest + assert root + "bar/baz" not in dest + assert root + "bar/qux" not in dest # excludes and includes dest = self._get_dest_store() - excludes = 'b.*' - includes = '.*x' + excludes = "b.*" + includes = ".*x" copy_store(source, dest, excludes=excludes, includes=includes) assert len(dest) == 2 - assert root + 'foo' in dest - assert root + 'bar/baz' not in dest - assert root + 'bar/qux' in dest + assert root + "foo" in dest + assert root + "bar/baz" not in dest + assert root + "bar/qux" in dest def test_dry_run(self): source = self.source @@ -496,8 +504,8 @@ def test_dry_run(self): def test_if_exists(self): source = self.source dest = self._get_dest_store() - root = '' if self._version == 2 else meta_root - dest[root + 'bar/baz'] = b'mmm' + root = "" if self._version == 2 else meta_root + dest[root + "bar/baz"] = b"mmm" # default ('raise') with pytest.raises(CopyError): @@ -505,25 +513,25 @@ def test_if_exists(self): # explicit 'raise' with pytest.raises(CopyError): - copy_store(source, dest, if_exists='raise') + copy_store(source, dest, if_exists="raise") # skip - copy_store(source, dest, if_exists='skip') + copy_store(source, dest, if_exists="skip") assert 3 == len(dest) - assert dest[root + 'foo'] == b'xxx' - assert dest[root + 'bar/baz'] == b'mmm' - assert dest[root + 'bar/qux'] == b'zzz' + assert dest[root + "foo"] == b"xxx" + assert dest[root + "bar/baz"] == b"mmm" + assert dest[root + "bar/qux"] == b"zzz" # replace - copy_store(source, dest, if_exists='replace') + copy_store(source, dest, if_exists="replace") assert 3 == len(dest) - assert dest[root + 'foo'] == b'xxx' - assert dest[root + 'bar/baz'] == b'yyy' - assert dest[root + 'bar/qux'] == b'zzz' + assert dest[root + "foo"] == b"xxx" + assert dest[root + "bar/baz"] == b"yyy" + assert dest[root + "bar/qux"] == b"zzz" # invalid option with pytest.raises(ValueError): - copy_store(source, dest, if_exists='foobar') + copy_store(source, dest, if_exists="foobar") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") @@ -533,9 +541,9 @@ class TestCopyStoreV3(TestCopyStore): def setUp(self): source = KVStoreV3(dict()) - source['meta/root/foo'] = b'xxx' - source['meta/root/bar/baz'] = b'yyy' - source['meta/root/bar/qux'] = b'zzz' + source["meta/root/foo"] = b"xxx" + source["meta/root/bar/baz"] = b"yyy" + source["meta/root/bar/qux"] = b"zzz" self.source = source def _get_dest_store(self): @@ -548,12 +556,11 @@ def test_mismatched_store_versions(self): copy_store(self.source, dest) -def check_copied_array(original, copied, without_attrs=False, - expect_props=None): +def check_copied_array(original, copied, without_attrs=False, expect_props=None): # setup - source_h5py = original.__module__.startswith('h5py.') - dest_h5py = copied.__module__.startswith('h5py.') + source_h5py = original.__module__.startswith("h5py.") + dest_h5py = copied.__module__.startswith("h5py.") zarr_to_zarr = not (source_h5py or dest_h5py) h5py_to_h5py = source_h5py and dest_h5py zarr_to_h5py = not source_h5py and dest_h5py @@ -564,25 +571,32 @@ def check_copied_array(original, copied, without_attrs=False, expect_props = expect_props.copy() # common properties in zarr and h5py - for p in 'dtype', 'shape', 'chunks': + for p in "dtype", "shape", "chunks": expect_props.setdefault(p, getattr(original, p)) # zarr-specific properties if zarr_to_zarr: - for p in 'compressor', 'filters', 'order', 'fill_value': + for p in "compressor", "filters", "order", "fill_value": expect_props.setdefault(p, getattr(original, p)) # h5py-specific properties if h5py_to_h5py: - for p in ('maxshape', 'compression', 'compression_opts', 'shuffle', - 'scaleoffset', 'fletcher32', 'fillvalue'): + for p in ( + "maxshape", + "compression", + "compression_opts", + "shuffle", + "scaleoffset", + "fletcher32", + "fillvalue", + ): expect_props.setdefault(p, getattr(original, p)) # common properties with some name differences if h5py_to_zarr: - expect_props.setdefault('fill_value', original.fillvalue) + expect_props.setdefault("fill_value", original.fillvalue) if zarr_to_h5py: - expect_props.setdefault('fillvalue', original.fill_value) + expect_props.setdefault("fillvalue", original.fill_value) # compare properties for k, v in expect_props.items(): @@ -596,18 +610,17 @@ def check_copied_array(original, copied, without_attrs=False, for k in original.attrs.keys(): assert k not in copied.attrs else: - if dest_h5py and 'filters' in original.attrs: + if dest_h5py and "filters" in original.attrs: # special case in v3 (storing filters metadata under attributes) # we explicitly do not copy this info over to HDF5 original_attrs = original.attrs.asdict().copy() - original_attrs.pop('filters') + original_attrs.pop("filters") else: original_attrs = original.attrs assert sorted(original_attrs.items()) == sorted(copied.attrs.items()) -def check_copied_group(original, copied, without_attrs=False, expect_props=None, - shallow=False): +def check_copied_group(original, copied, without_attrs=False, expect_props=None, shallow=False): # setup if expect_props is None: @@ -617,16 +630,20 @@ def check_copied_group(original, copied, without_attrs=False, expect_props=None, # compare children for k, v in original.items(): - if hasattr(v, 'shape'): + if hasattr(v, "shape"): assert k in copied - check_copied_array(v, copied[k], without_attrs=without_attrs, - expect_props=expect_props) + check_copied_array(v, copied[k], without_attrs=without_attrs, expect_props=expect_props) elif shallow: assert k not in copied else: assert k in copied - check_copied_group(v, copied[k], without_attrs=without_attrs, - shallow=shallow, expect_props=expect_props) + check_copied_group( + v, + copied[k], + without_attrs=without_attrs, + shallow=shallow, + expect_props=expect_props, + ) # compare attrs if without_attrs: @@ -657,7 +674,7 @@ def test_copy_all(): dry_run=False, ) - assert 'subgroup' in destination_group + assert "subgroup" in destination_group assert destination_group.attrs["info"] == "group attrs" assert destination_group.subgroup.attrs["info"] == "sub attrs" @@ -670,10 +687,10 @@ def test_copy_all_v3(): copy_all used to not copy attributes as `.keys()` """ - original_group = zarr.group(store=MemoryStoreV3(), path='group1', overwrite=True) + original_group = zarr.group(store=MemoryStoreV3(), path="group1", overwrite=True) original_group.create_group("subgroup") - destination_group = zarr.group(store=MemoryStoreV3(), path='group2', overwrite=True) + destination_group = zarr.group(store=MemoryStoreV3(), path="group2", overwrite=True) # copy from memory to directory store copy_all( @@ -681,200 +698,212 @@ def test_copy_all_v3(): destination_group, dry_run=False, ) - assert 'subgroup' in destination_group + assert "subgroup" in destination_group class TestCopy: - @pytest.fixture(params=[False, True], ids=['zarr', 'hdf5']) + @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) def source(self, request, tmpdir): def prep_source(source): - foo = source.create_group('foo') - foo.attrs['experiment'] = 'weird science' - baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,)) - baz.attrs['units'] = 'metres' + foo = source.create_group("foo") + foo.attrs["experiment"] = "weird science" + baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) + baz.attrs["units"] = "metres" if request.param: - extra_kws = dict(compression='gzip', compression_opts=3, fillvalue=84, - shuffle=True, fletcher32=True) + extra_kws = dict( + compression="gzip", + compression_opts=3, + fillvalue=84, + shuffle=True, + fletcher32=True, + ) else: - extra_kws = dict(compressor=Zlib(3), order='F', fill_value=42, filters=[Adler32()]) - source.create_dataset('spam', data=np.arange(100, 200).reshape(20, 5), - chunks=(10, 2), dtype='i2', **extra_kws) + extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) + source.create_dataset( + "spam", + data=np.arange(100, 200).reshape(20, 5), + chunks=(10, 2), + dtype="i2", + **extra_kws, + ) return source if request.param: - h5py = pytest.importorskip('h5py') - fn = tmpdir.join('source.h5') - with h5py.File(str(fn), mode='w') as h5f: + h5py = pytest.importorskip("h5py") + fn = tmpdir.join("source.h5") + with h5py.File(str(fn), mode="w") as h5f: yield prep_source(h5f) else: yield prep_source(group()) - @pytest.fixture(params=[False, True], ids=['zarr', 'hdf5']) + @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) def dest(self, request, tmpdir): if request.param: - h5py = pytest.importorskip('h5py') - fn = tmpdir.join('dest.h5') - with h5py.File(str(fn), mode='w') as h5f: + h5py = pytest.importorskip("h5py") + fn = tmpdir.join("dest.h5") + with h5py.File(str(fn), mode="w") as h5f: yield h5f else: yield group() def test_copy_array(self, source, dest): # copy array with default options - copy(source['foo/bar/baz'], dest) - check_copied_array(source['foo/bar/baz'], dest['baz']) - copy(source['spam'], dest) - check_copied_array(source['spam'], dest['spam']) + copy(source["foo/bar/baz"], dest) + check_copied_array(source["foo/bar/baz"], dest["baz"]) + copy(source["spam"], dest) + check_copied_array(source["spam"], dest["spam"]) def test_copy_bad_dest(self, source, dest): # try to copy to an array, dest must be a group - dest = dest.create_dataset('eggs', shape=(100,)) + dest = dest.create_dataset("eggs", shape=(100,)) with pytest.raises(ValueError): - copy(source['foo/bar/baz'], dest) + copy(source["foo/bar/baz"], dest) def test_copy_array_name(self, source, dest): # copy array with name - copy(source['foo/bar/baz'], dest, name='qux') - assert 'baz' not in dest - check_copied_array(source['foo/bar/baz'], dest['qux']) + copy(source["foo/bar/baz"], dest, name="qux") + assert "baz" not in dest + check_copied_array(source["foo/bar/baz"], dest["qux"]) def test_copy_array_create_options(self, source, dest): - dest_h5py = dest.__module__.startswith('h5py.') + dest_h5py = dest.__module__.startswith("h5py.") # copy array, provide creation options compressor = Zlib(9) create_kws = dict(chunks=(10,)) if dest_h5py: - create_kws.update(compression='gzip', compression_opts=9, - shuffle=True, fletcher32=True, fillvalue=42) + create_kws.update( + compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 + ) else: - create_kws.update(compressor=compressor, fill_value=42, order='F', - filters=[Adler32()]) - copy(source['foo/bar/baz'], dest, without_attrs=True, **create_kws) - check_copied_array(source['foo/bar/baz'], dest['baz'], - without_attrs=True, expect_props=create_kws) + create_kws.update(compressor=compressor, fill_value=42, order="F", filters=[Adler32()]) + copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) + check_copied_array( + source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws + ) def test_copy_array_exists_array(self, source, dest): # copy array, dest array in the way - dest.create_dataset('baz', shape=(10,)) + dest.create_dataset("baz", shape=(10,)) # raise with pytest.raises(CopyError): # should raise by default - copy(source['foo/bar/baz'], dest) - assert (10,) == dest['baz'].shape + copy(source["foo/bar/baz"], dest) + assert (10,) == dest["baz"].shape with pytest.raises(CopyError): - copy(source['foo/bar/baz'], dest, if_exists='raise') - assert (10,) == dest['baz'].shape + copy(source["foo/bar/baz"], dest, if_exists="raise") + assert (10,) == dest["baz"].shape # skip - copy(source['foo/bar/baz'], dest, if_exists='skip') - assert (10,) == dest['baz'].shape + copy(source["foo/bar/baz"], dest, if_exists="skip") + assert (10,) == dest["baz"].shape # replace - copy(source['foo/bar/baz'], dest, if_exists='replace') - check_copied_array(source['foo/bar/baz'], dest['baz']) + copy(source["foo/bar/baz"], dest, if_exists="replace") + check_copied_array(source["foo/bar/baz"], dest["baz"]) # invalid option with pytest.raises(ValueError): - copy(source['foo/bar/baz'], dest, if_exists='foobar') + copy(source["foo/bar/baz"], dest, if_exists="foobar") def test_copy_array_exists_group(self, source, dest): # copy array, dest group in the way - dest.create_group('baz') + dest.create_group("baz") # raise with pytest.raises(CopyError): - copy(source['foo/bar/baz'], dest) - assert not hasattr(dest['baz'], 'shape') + copy(source["foo/bar/baz"], dest) + assert not hasattr(dest["baz"], "shape") with pytest.raises(CopyError): - copy(source['foo/bar/baz'], dest, if_exists='raise') - assert not hasattr(dest['baz'], 'shape') + copy(source["foo/bar/baz"], dest, if_exists="raise") + assert not hasattr(dest["baz"], "shape") # skip - copy(source['foo/bar/baz'], dest, if_exists='skip') - assert not hasattr(dest['baz'], 'shape') + copy(source["foo/bar/baz"], dest, if_exists="skip") + assert not hasattr(dest["baz"], "shape") # replace - copy(source['foo/bar/baz'], dest, if_exists='replace') - check_copied_array(source['foo/bar/baz'], dest['baz']) + copy(source["foo/bar/baz"], dest, if_exists="replace") + check_copied_array(source["foo/bar/baz"], dest["baz"]) def test_copy_array_skip_initialized(self, source, dest): - dest_h5py = dest.__module__.startswith('h5py.') + dest_h5py = dest.__module__.startswith("h5py.") - dest.create_dataset('baz', shape=(100,), chunks=(10,), dtype='i8') - assert not np.all(source['foo/bar/baz'][:] == dest['baz'][:]) + dest.create_dataset("baz", shape=(100,), chunks=(10,), dtype="i8") + assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) if dest_h5py: with pytest.raises(ValueError): # not available with copy to h5py - copy(source['foo/bar/baz'], dest, if_exists='skip_initialized') + copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") else: # copy array, dest array exists but not yet initialized - copy(source['foo/bar/baz'], dest, if_exists='skip_initialized') - check_copied_array(source['foo/bar/baz'], dest['baz']) + copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") + check_copied_array(source["foo/bar/baz"], dest["baz"]) # copy array, dest array exists and initialized, will be skipped - dest['baz'][:] = np.arange(100, 200) - copy(source['foo/bar/baz'], dest, if_exists='skip_initialized') - assert_array_equal(np.arange(100, 200), dest['baz'][:]) - assert not np.all(source['foo/bar/baz'][:] == dest['baz'][:]) + dest["baz"][:] = np.arange(100, 200) + copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") + assert_array_equal(np.arange(100, 200), dest["baz"][:]) + assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) def test_copy_group(self, source, dest): # copy group, default options - copy(source['foo'], dest) - check_copied_group(source['foo'], dest['foo']) + copy(source["foo"], dest) + check_copied_group(source["foo"], dest["foo"]) def test_copy_group_no_name(self, source, dest): with pytest.raises(TypeError): # need a name if copy root copy(source, dest) - copy(source, dest, name='root') - check_copied_group(source, dest['root']) + copy(source, dest, name="root") + check_copied_group(source, dest["root"]) def test_copy_group_options(self, source, dest): # copy group, non-default options - copy(source['foo'], dest, name='qux', without_attrs=True) - assert 'foo' not in dest - check_copied_group(source['foo'], dest['qux'], without_attrs=True) + copy(source["foo"], dest, name="qux", without_attrs=True) + assert "foo" not in dest + check_copied_group(source["foo"], dest["qux"], without_attrs=True) def test_copy_group_shallow(self, source, dest): # copy group, shallow - copy(source, dest, name='eggs', shallow=True) - check_copied_group(source, dest['eggs'], shallow=True) + copy(source, dest, name="eggs", shallow=True) + check_copied_group(source, dest["eggs"], shallow=True) def test_copy_group_exists_group(self, source, dest): # copy group, dest groups exist - dest.create_group('foo/bar') - copy(source['foo'], dest) - check_copied_group(source['foo'], dest['foo']) + dest.create_group("foo/bar") + copy(source["foo"], dest) + check_copied_group(source["foo"], dest["foo"]) def test_copy_group_exists_array(self, source, dest): # copy group, dest array in the way - dest.create_dataset('foo/bar', shape=(10,)) + dest.create_dataset("foo/bar", shape=(10,)) # raise with pytest.raises(CopyError): - copy(source['foo'], dest) - assert dest['foo/bar'].shape == (10,) + copy(source["foo"], dest) + assert dest["foo/bar"].shape == (10,) with pytest.raises(CopyError): - copy(source['foo'], dest, if_exists='raise') - assert dest['foo/bar'].shape == (10,) + copy(source["foo"], dest, if_exists="raise") + assert dest["foo/bar"].shape == (10,) # skip - copy(source['foo'], dest, if_exists='skip') - assert dest['foo/bar'].shape == (10,) + copy(source["foo"], dest, if_exists="skip") + assert dest["foo/bar"].shape == (10,) # replace - copy(source['foo'], dest, if_exists='replace') - check_copied_group(source['foo'], dest['foo']) + copy(source["foo"], dest, if_exists="replace") + check_copied_group(source["foo"], dest["foo"]) def test_copy_group_dry_run(self, source, dest): # dry run, empty destination - n_copied, n_skipped, n_bytes_copied = \ - copy(source['foo'], dest, dry_run=True, return_stats=True) + n_copied, n_skipped, n_bytes_copied = copy( + source["foo"], dest, dry_run=True, return_stats=True + ) assert 0 == len(dest) assert 3 == n_copied assert 0 == n_skipped @@ -882,133 +911,144 @@ def test_copy_group_dry_run(self, source, dest): # dry run, array exists in destination baz = np.arange(100, 200) - dest.create_dataset('foo/bar/baz', data=baz) - assert not np.all(source['foo/bar/baz'][:] == dest['foo/bar/baz'][:]) + dest.create_dataset("foo/bar/baz", data=baz) + assert not np.all(source["foo/bar/baz"][:] == dest["foo/bar/baz"][:]) assert 1 == len(dest) # raise with pytest.raises(CopyError): - copy(source['foo'], dest, dry_run=True) + copy(source["foo"], dest, dry_run=True) assert 1 == len(dest) # skip - n_copied, n_skipped, n_bytes_copied = \ - copy(source['foo'], dest, dry_run=True, if_exists='skip', - return_stats=True) + n_copied, n_skipped, n_bytes_copied = copy( + source["foo"], dest, dry_run=True, if_exists="skip", return_stats=True + ) assert 1 == len(dest) assert 2 == n_copied assert 1 == n_skipped assert 0 == n_bytes_copied - assert_array_equal(baz, dest['foo/bar/baz']) + assert_array_equal(baz, dest["foo/bar/baz"]) # replace - n_copied, n_skipped, n_bytes_copied = \ - copy(source['foo'], dest, dry_run=True, if_exists='replace', - return_stats=True) + n_copied, n_skipped, n_bytes_copied = copy( + source["foo"], dest, dry_run=True, if_exists="replace", return_stats=True + ) assert 1 == len(dest) assert 3 == n_copied assert 0 == n_skipped assert 0 == n_bytes_copied - assert_array_equal(baz, dest['foo/bar/baz']) + assert_array_equal(baz, dest["foo/bar/baz"]) def test_logging(self, source, dest, tmpdir): # callable log - copy(source['foo'], dest, dry_run=True, log=print) + copy(source["foo"], dest, dry_run=True, log=print) # file name - fn = str(tmpdir.join('log_name')) - copy(source['foo'], dest, dry_run=True, log=fn) + fn = str(tmpdir.join("log_name")) + copy(source["foo"], dest, dry_run=True, log=fn) # file - with tmpdir.join('log_file').open(mode='w') as f: - copy(source['foo'], dest, dry_run=True, log=f) + with tmpdir.join("log_file").open(mode="w") as f: + copy(source["foo"], dest, dry_run=True, log=f) # bad option with pytest.raises(TypeError): - copy(source['foo'], dest, dry_run=True, log=True) + copy(source["foo"], dest, dry_run=True, log=True) @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestCopyV3(TestCopy): - - @pytest.fixture(params=['zarr', 'hdf5']) + @pytest.fixture(params=["zarr", "hdf5"]) def source(self, request, tmpdir): def prep_source(source): - foo = source.create_group('foo') - foo.attrs['experiment'] = 'weird science' - baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,)) - baz.attrs['units'] = 'metres' - if request.param == 'hdf5': - extra_kws = dict(compression='gzip', compression_opts=3, fillvalue=84, - shuffle=True, fletcher32=True) + foo = source.create_group("foo") + foo.attrs["experiment"] = "weird science" + baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) + baz.attrs["units"] = "metres" + if request.param == "hdf5": + extra_kws = dict( + compression="gzip", + compression_opts=3, + fillvalue=84, + shuffle=True, + fletcher32=True, + ) else: - extra_kws = dict(compressor=Zlib(3), order='F', fill_value=42, filters=[Adler32()]) - source.create_dataset('spam', data=np.arange(100, 200).reshape(20, 5), - chunks=(10, 2), dtype='i2', **extra_kws) + extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) + source.create_dataset( + "spam", + data=np.arange(100, 200).reshape(20, 5), + chunks=(10, 2), + dtype="i2", + **extra_kws, + ) return source - if request.param == 'hdf5': - h5py = pytest.importorskip('h5py') - fn = tmpdir.join('source.h5') - with h5py.File(str(fn), mode='w') as h5f: + if request.param == "hdf5": + h5py = pytest.importorskip("h5py") + fn = tmpdir.join("source.h5") + with h5py.File(str(fn), mode="w") as h5f: yield prep_source(h5f) - elif request.param == 'zarr': - yield prep_source(group(path='group1', zarr_version=3)) + elif request.param == "zarr": + yield prep_source(group(path="group1", zarr_version=3)) # Test with various destination StoreV3 types as TestCopyV3 covers rmdir - destinations = ['hdf5', 'zarr', 'zarr_kvstore', 'zarr_directorystore', 'zarr_sqlitestore'] + destinations = ["hdf5", "zarr", "zarr_kvstore", "zarr_directorystore", "zarr_sqlitestore"] if have_fsspec: - destinations += ['zarr_fsstore'] + destinations += ["zarr_fsstore"] @pytest.fixture(params=destinations) def dest(self, request, tmpdir): - if request.param == 'hdf5': - h5py = pytest.importorskip('h5py') - fn = tmpdir.join('dest.h5') - with h5py.File(str(fn), mode='w') as h5f: + if request.param == "hdf5": + h5py = pytest.importorskip("h5py") + fn = tmpdir.join("dest.h5") + with h5py.File(str(fn), mode="w") as h5f: yield h5f - elif request.param == 'zarr': - yield group(path='group2', zarr_version=3) - elif request.param == 'zarr_kvstore': + elif request.param == "zarr": + yield group(path="group2", zarr_version=3) + elif request.param == "zarr_kvstore": store = KVStoreV3(dict()) - yield group(store, path='group2', zarr_version=3) - elif request.param == 'zarr_fsstore': - fn = tmpdir.join('dest.zr3') + yield group(store, path="group2", zarr_version=3) + elif request.param == "zarr_fsstore": + fn = tmpdir.join("dest.zr3") store = FSStoreV3(str(fn), auto_mkdir=True) - yield group(store, path='group2', zarr_version=3) - elif request.param == 'zarr_directorystore': - fn = tmpdir.join('dest.zr3') + yield group(store, path="group2", zarr_version=3) + elif request.param == "zarr_directorystore": + fn = tmpdir.join("dest.zr3") store = DirectoryStoreV3(str(fn)) - yield group(store, path='group2', zarr_version=3) - elif request.param == 'zarr_sqlitestore': - fn = tmpdir.join('dest.db') + yield group(store, path="group2", zarr_version=3) + elif request.param == "zarr_sqlitestore": + fn = tmpdir.join("dest.db") store = SQLiteStoreV3(str(fn)) - yield group(store, path='group2', zarr_version=3) + yield group(store, path="group2", zarr_version=3) def test_copy_array_create_options(self, source, dest): - dest_h5py = dest.__module__.startswith('h5py.') + dest_h5py = dest.__module__.startswith("h5py.") # copy array, provide creation options compressor = Zlib(9) create_kws = dict(chunks=(10,)) if dest_h5py: - create_kws.update(compression='gzip', compression_opts=9, - shuffle=True, fletcher32=True, fillvalue=42) + create_kws.update( + compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 + ) else: # v3 case has no filters argument in zarr create_kws - create_kws.update(compressor=compressor, fill_value=42, order='F') - copy(source['foo/bar/baz'], dest, without_attrs=True, **create_kws) - check_copied_array(source['foo/bar/baz'], dest['baz'], - without_attrs=True, expect_props=create_kws) + create_kws.update(compressor=compressor, fill_value=42, order="F") + copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) + check_copied_array( + source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws + ) def test_copy_group_no_name(self, source, dest): - if source.__module__.startswith('h5py'): + if source.__module__.startswith("h5py"): with pytest.raises(TypeError): copy(source, dest) else: # For v3, dest.name will be inferred from source.name copy(source, dest) - check_copied_group(source, dest[source.name.lstrip('/')]) + check_copied_group(source, dest[source.name.lstrip("/")]) - copy(source, dest, name='root') - check_copied_group(source, dest['root']) + copy(source, dest, name="root") + check_copied_group(source, dest["root"]) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index ab1a6e8aa7..d86c3bf39b 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -10,9 +10,22 @@ import numpy as np import packaging.version import pytest -from numcodecs import (BZ2, JSON, LZ4, Blosc, Categorize, Delta, - FixedScaleOffset, GZip, MsgPack, Pickle, VLenArray, - VLenBytes, VLenUTF8, Zlib) +from numcodecs import ( + BZ2, + JSON, + LZ4, + Blosc, + Categorize, + Delta, + FixedScaleOffset, + GZip, + MsgPack, + Pickle, + VLenArray, + VLenBytes, + VLenUTF8, + Zlib, +) from numcodecs.compat import ensure_bytes, ensure_ndarray from numcodecs.tests.common import greetings from numpy.testing import assert_array_almost_equal, assert_array_equal @@ -65,7 +78,7 @@ class TestArray(unittest.TestCase): version = 2 - root = '' + root = "" KVStoreClass = KVStore def test_array_init(self): @@ -77,7 +90,7 @@ def test_array_init(self): assert isinstance(a, Array) assert (100,) == a.shape assert (10,) == a.chunks - assert '' == a.path + assert "" == a.path assert a.name is None assert a.basename is None assert store is a.store @@ -89,14 +102,14 @@ def test_array_init(self): # initialize at path store = self.KVStoreClass(dict()) - init_array(store, shape=100, chunks=10, path='foo/bar', dtype='')) + a2 = self.create_array(shape=1000, chunks=100, dtype=dtype.newbyteorder(">")) a2[:] = 1 x2 = a2[:] assert_array_equal(x1, x2) @@ -1543,46 +1558,52 @@ def test_endian(self): a2.store.close() def test_attributes(self): - a = self.create_array(shape=10, chunks=10, dtype='i8') - a.attrs['foo'] = 'bar' + a = self.create_array(shape=10, chunks=10, dtype="i8") + a.attrs["foo"] = "bar" assert a.attrs.key in a.store attrs = json_loads(a.store[a.attrs.key]) if self.version > 2: # in v3, attributes are in a sub-dictionary of the metadata - attrs = attrs['attributes'] - assert 'foo' in attrs and attrs['foo'] == 'bar' + attrs = attrs["attributes"] + assert "foo" in attrs and attrs["foo"] == "bar" - a.attrs['bar'] = 'foo' + a.attrs["bar"] = "foo" assert a.attrs.key in a.store attrs = json_loads(a.store[a.attrs.key]) if self.version > 2: # in v3, attributes are in a sub-dictionary of the metadata - attrs = attrs['attributes'] - assert 'foo' in attrs and attrs['foo'] == 'bar' - assert 'bar' in attrs and attrs['bar'] == 'foo' + attrs = attrs["attributes"] + assert "foo" in attrs and attrs["foo"] == "bar" + assert "bar" in attrs and attrs["bar"] == "foo" a.store.close() def test_structured_with_object(self): - a = self.create_array(fill_value=(0.0, None), - shape=10, - chunks=10, - dtype=[('x', float), ('y', object)], - object_codec=Pickle()) + a = self.create_array( + fill_value=(0.0, None), + shape=10, + chunks=10, + dtype=[("x", float), ("y", object)], + object_codec=Pickle(), + ) assert tuple(a[0]) == (0.0, None) class TestArrayWithPath(TestArray): - @staticmethod def create_array(read_only=False, **kwargs): store = KVStore(dict()) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, path='foo/bar', **kwargs) - return Array(store, path='foo/bar', read_only=read_only, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, - write_empty_chunks=write_empty_chunks) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) + init_array(store, path="foo/bar", **kwargs) + return Array( + store, + path="foo/bar", + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) def test_nchunks_initialized(self): pass @@ -1593,42 +1614,46 @@ def expected(self): "1437428e69754b1e1a38bd7fc9e43669577620db", "6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe", "4c0a76fb1222498e09dcd92f7f9221d6cea8b40e", - "05b0663ffe1785f38d3a459dec17e57a18f254af" + "05b0663ffe1785f38d3a459dec17e57a18f254af", ] def test_nbytes_stored(self): # MemoryStore as store z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) - for k, v in z.store.items() - if k.startswith('foo/bar/')) + expect_nbytes_stored = sum( + buffer_size(v) for k, v in z.store.items() if k.startswith("foo/bar/") + ) assert expect_nbytes_stored == z.nbytes_stored z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) - for k, v in z.store.items() - if k.startswith('foo/bar/')) + expect_nbytes_stored = sum( + buffer_size(v) for k, v in z.store.items() if k.startswith("foo/bar/") + ) assert expect_nbytes_stored == z.nbytes_stored # mess with store - z.store[z._key_prefix + 'foo'] = list(range(10)) + z.store[z._key_prefix + "foo"] = list(range(10)) assert -1 == z.nbytes_stored class TestArrayWithChunkStore(TestArray): - @staticmethod def create_array(read_only=False, **kwargs): store = KVStore(dict()) # separate chunk store chunk_store = KVStore(dict()) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) init_array(store, chunk_store=chunk_store, **kwargs) - return Array(store, read_only=read_only, chunk_store=chunk_store, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, - write_empty_chunks=write_empty_chunks) + return Array( + store, + read_only=read_only, + chunk_store=chunk_store, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) def expected(self): return [ @@ -1636,41 +1661,43 @@ def expected(self): "1437428e69754b1e1a38bd7fc9e43669577620db", "6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe", "4c0a76fb1222498e09dcd92f7f9221d6cea8b40e", - "05b0663ffe1785f38d3a459dec17e57a18f254af" + "05b0663ffe1785f38d3a459dec17e57a18f254af", ] def test_nbytes_stored(self): z = self.create_array(shape=1000, chunks=100) expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) - expect_nbytes_stored += sum(buffer_size(v) - for v in z.chunk_store.values()) + expect_nbytes_stored += sum(buffer_size(v) for v in z.chunk_store.values()) assert expect_nbytes_stored == z.nbytes_stored z[:] = 42 expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) - expect_nbytes_stored += sum(buffer_size(v) - for v in z.chunk_store.values()) + expect_nbytes_stored += sum(buffer_size(v) for v in z.chunk_store.values()) assert expect_nbytes_stored == z.nbytes_stored # mess with store - z.chunk_store[z._key_prefix + 'foo'] = list(range(10)) + z.chunk_store[z._key_prefix + "foo"] = list(range(10)) assert -1 == z.nbytes_stored class TestArrayWithDirectoryStore(TestArray): - @staticmethod def create_array(read_only=False, **kwargs): path = mkdtemp() atexit.register(shutil.rmtree, path) store = DirectoryStore(path) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) + kwargs.setdefault("compressor", Zlib(1)) init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return Array( + store, + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) def test_nbytes_stored(self): @@ -1695,7 +1722,6 @@ def test_array_init_from_dict(): @skip_test_env_var("ZARR_TEST_ABS") class TestArrayWithABSStore(TestArray): - @staticmethod def absstore(): client = abs_container() @@ -1705,13 +1731,18 @@ def absstore(): def create_array(self, read_only=False, **kwargs): store = self.absstore() - kwargs.setdefault('compressor', Zlib(1)) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) + kwargs.setdefault("compressor", Zlib(1)) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return Array( + store, + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) @pytest.mark.xfail def test_nbytes_stored(self): @@ -1724,19 +1755,23 @@ def test_pickle(self): class TestArrayWithNestedDirectoryStore(TestArrayWithDirectoryStore): - @staticmethod def create_array(read_only=False, **kwargs): path = mkdtemp() atexit.register(shutil.rmtree, path) store = NestedDirectoryStore(path) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) + kwargs.setdefault("compressor", Zlib(1)) init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return Array( + store, + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) def expected(self): return [ @@ -1749,19 +1784,23 @@ def expected(self): class TestArrayWithN5Store(TestArrayWithDirectoryStore): - @staticmethod def create_array(read_only=False, **kwargs): path = mkdtemp() atexit.register(shutil.rmtree, path) store = N5Store(path) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) + kwargs.setdefault("compressor", Zlib(1)) init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return Array( + store, + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) def test_array_0d(self): # test behaviour for array with 0 dimensions @@ -1816,8 +1855,7 @@ def test_array_1d_fill_value(self): a = np.arange(nvalues, dtype=dtype) f = np.empty_like(a) f.fill(fill_value or 0) - z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, - fill_value=fill_value) + z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, fill_value=fill_value) z[190:310] = a[190:310] assert_array_equal(f[:190], z[:190]) @@ -1825,21 +1863,18 @@ def test_array_1d_fill_value(self): assert_array_equal(f[310:], z[310:]) with pytest.raises(ValueError): - z = self.create_array(shape=(nvalues,), chunks=100, dtype=dtype, - fill_value=1) + z = self.create_array(shape=(nvalues,), chunks=100, dtype=dtype, fill_value=1) def test_nchunks_initialized(self): fill_value = 0 - dtype = 'int' - z = self.create_array(shape=100, - chunks=10, - fill_value=fill_value, - dtype=dtype, - write_empty_chunks=True) + dtype = "int" + z = self.create_array( + shape=100, chunks=10, fill_value=fill_value, dtype=dtype, write_empty_chunks=True + ) assert 0 == z.nchunks_initialized # manually put something into the store to confuse matters - z.store['foo'] = b'bar' + z.store["foo"] = b"bar" assert 0 == z.nchunks_initialized z[:] = 42 assert 10 == z.nchunks_initialized @@ -1849,11 +1884,9 @@ def test_nchunks_initialized(self): # second round of similar tests with write_empty_chunks set to # False - z = self.create_array(shape=100, - chunks=10, - fill_value=fill_value, - dtype=dtype, - write_empty_chunks=False) + z = self.create_array( + shape=100, chunks=10, fill_value=fill_value, dtype=dtype, write_empty_chunks=False + ) z[:] = 42 assert 10 == z.nchunks_initialized # manually remove a chunk from the store @@ -1866,61 +1899,69 @@ def test_array_order(self): # N5 only supports 'C' at the moment with pytest.raises(ValueError): - self.create_array(shape=(10, 11), chunks=(10, 11), dtype='i8', - order='F') + self.create_array(shape=(10, 11), chunks=(10, 11), dtype="i8", order="F") # 1D a = np.arange(1050) - z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, - order='C') - assert z.order == 'C' + z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, order="C") + assert z.order == "C" assert z[:].flags.c_contiguous z[:] = a assert_array_equal(a, z[:]) # 2D a = np.arange(10000).reshape((100, 100)) - z = self.create_array(shape=a.shape, chunks=(10, 10), - dtype=a.dtype, order='C') + z = self.create_array(shape=a.shape, chunks=(10, 10), dtype=a.dtype, order="C") - assert z.order == 'C' + assert z.order == "C" assert z[:].flags.c_contiguous z[:] = a actual = z[:] assert_array_equal(a, actual) def test_structured_array(self): - d = np.array([(b'aaa', 1, 4.2), - (b'bbb', 2, 8.4), - (b'ccc', 3, 12.6)], - dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - fill_values = None, b'', (b'zzz', 42, 16.8) + d = np.array( + [(b"aaa", 1, 4.2), (b"bbb", 2, 8.4), (b"ccc", 3, 12.6)], + dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")], + ) + fill_values = None, b"", (b"zzz", 42, 16.8) with pytest.raises(TypeError): self.check_structured_array(d, fill_values) def test_structured_array_subshapes(self): - d = np.array([(0, ((0, 1, 2), (1, 2, 3)), b'aaa'), - (1, ((1, 2, 3), (2, 3, 4)), b'bbb'), - (2, ((2, 3, 4), (3, 4, 5)), b'ccc')], - dtype=[('foo', 'i8'), ('bar', '(2, 3)f4'), ('baz', 'S3')]) - fill_values = None, b'', (0, ((0, 0, 0), (1, 1, 1)), b'zzz') + d = np.array( + [ + (0, ((0, 1, 2), (1, 2, 3)), b"aaa"), + (1, ((1, 2, 3), (2, 3, 4)), b"bbb"), + (2, ((2, 3, 4), (3, 4, 5)), b"ccc"), + ], + dtype=[("foo", "i8"), ("bar", "(2, 3)f4"), ("baz", "S3")], + ) + fill_values = None, b"", (0, ((0, 0, 0), (1, 1, 1)), b"zzz") with pytest.raises(TypeError): self.check_structured_array(d, fill_values) def test_structured_array_nested(self): - d = np.array([(0, (0, ((0, 1), (1, 2), (2, 3)), 0), b'aaa'), - (1, (1, ((1, 2), (2, 3), (3, 4)), 1), b'bbb'), - (2, (2, ((2, 3), (3, 4), (4, 5)), 2), b'ccc')], - dtype=[('foo', 'i8'), ('bar', [('foo', 'i4'), ('bar', '(3, 2)f4'), - ('baz', 'u1')]), ('baz', 'S3')]) - fill_values = None, b'', (0, (0, ((0, 0), (1, 1), (2, 2)), 0), b'zzz') + d = np.array( + [ + (0, (0, ((0, 1), (1, 2), (2, 3)), 0), b"aaa"), + (1, (1, ((1, 2), (2, 3), (3, 4)), 1), b"bbb"), + (2, (2, ((2, 3), (3, 4), (4, 5)), 2), b"ccc"), + ], + dtype=[ + ("foo", "i8"), + ("bar", [("foo", "i4"), ("bar", "(3, 2)f4"), ("baz", "u1")]), + ("baz", "S3"), + ], + ) + fill_values = None, b"", (0, (0, ((0, 0), (1, 1), (2, 2)), 0), b"zzz") with pytest.raises(TypeError): self.check_structured_array(d, fill_values) def test_dtypes(self): # integers - for dtype in 'u1', 'u2', 'u4', 'u8', 'i1', 'i2', 'i4', 'i8': + for dtype in "u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8": z = self.create_array(shape=10, chunks=3, dtype=dtype) assert z.dtype == np.dtype(dtype) a = np.arange(z.shape[0], dtype=dtype) @@ -1928,7 +1969,7 @@ def test_dtypes(self): assert_array_equal(a, z[:]) # floats - for dtype in 'f2', 'f4', 'f8': + for dtype in "f2", "f4", "f8": z = self.create_array(shape=10, chunks=3, dtype=dtype) assert z.dtype == np.dtype(dtype) a = np.linspace(0, 1, z.shape[0], dtype=dtype) @@ -1937,9 +1978,9 @@ def test_dtypes(self): # check that datetime generic units are not allowed with pytest.raises(ValueError): - self.create_array(shape=100, dtype='M8') + self.create_array(shape=100, dtype="M8") with pytest.raises(ValueError): - self.create_array(shape=100, dtype='m8') + self.create_array(shape=100, dtype="m8") def test_object_arrays(self): @@ -1970,7 +2011,7 @@ def test_object_arrays_vlen_text(self): def test_object_arrays_vlen_bytes(self): - greetings_bytes = [g.encode('utf8') for g in greetings] + greetings_bytes = [g.encode("utf8") for g in greetings] data = np.array(greetings_bytes * 1000, dtype=object) with pytest.raises(ValueError): @@ -1982,19 +2023,19 @@ def test_object_arrays_vlen_bytes(self): def test_object_arrays_vlen_array(self): - data = np.array([np.array([1, 3, 7]), - np.array([5]), - np.array([2, 8, 12])] * 1000, dtype=object) + data = np.array( + [np.array([1, 3, 7]), np.array([5]), np.array([2, 8, 12])] * 1000, dtype=object + ) - codecs = VLenArray(int), VLenArray(' 2 and g1.store.is_erasable(): - arr_path = g1.path + '/arr1' + arr_path = g1.path + "/arr1" sfx = _get_metadata_suffix(g1.store) - array_meta_file = meta_root + arr_path + '.array' + sfx + array_meta_file = meta_root + arr_path + ".array" + sfx assert array_meta_file in g1.store - group_meta_file = meta_root + g2.path + '.group' + sfx + group_meta_file = meta_root + g2.path + ".group" + sfx assert group_meta_file in g1.store # rmdir on the array path should also remove the metadata file @@ -280,21 +308,21 @@ def test_rmdir_group_and_array_metadata_files(self): assert group_meta_file not in g1.store def _dataset_path(self, group, path): - path = path.rstrip('/') - absolute = path.startswith('/') + path = path.rstrip("/") + absolute = path.startswith("/") if absolute: dataset_path = path else: - dataset_path = '/'.join([group.path, path]) - dataset_path = dataset_path.lstrip('/') - dataset_name = '/' + dataset_path + dataset_path = "/".join([group.path, path]) + dataset_path = dataset_path.lstrip("/") + dataset_name = "/" + dataset_path return dataset_path, dataset_name def test_create_dataset(self): g = self.create_group() # create as immediate child - dpath = 'foo' + dpath = "foo" d1 = g.create_dataset(dpath, shape=1000, chunks=100) path, name = self._dataset_path(g, dpath) assert isinstance(d1, Array) @@ -305,32 +333,39 @@ def test_create_dataset(self): assert g.store is d1.store # create as descendant - dpath = '/a/b/c/' - d2 = g.create_dataset(dpath, shape=2000, chunks=200, dtype='i1', - compression='zlib', compression_opts=9, - fill_value=42, order='F') + dpath = "/a/b/c/" + d2 = g.create_dataset( + dpath, + shape=2000, + chunks=200, + dtype="i1", + compression="zlib", + compression_opts=9, + fill_value=42, + order="F", + ) path, name = self._dataset_path(g, dpath) assert isinstance(d2, Array) assert (2000,) == d2.shape assert (200,) == d2.chunks - assert np.dtype('i1') == d2.dtype - assert 'zlib' == d2.compressor.codec_id + assert np.dtype("i1") == d2.dtype + assert "zlib" == d2.compressor.codec_id assert 9 == d2.compressor.level assert 42 == d2.fill_value - assert 'F' == d2.order + assert "F" == d2.order assert path == d2.path assert name == d2.name assert g.store is d2.store # create with data - data = np.arange(3000, dtype='u2') - dpath = 'bar' + data = np.arange(3000, dtype="u2") + dpath = "bar" d3 = g.create_dataset(dpath, data=data, chunks=300) path, name = self._dataset_path(g, dpath) assert isinstance(d3, Array) assert (3000,) == d3.shape assert (300,) == d3.chunks - assert np.dtype('u2') == d3.dtype + assert np.dtype("u2") == d3.dtype assert_array_equal(data, d3[:]) assert path == d3.path assert name == d3.name @@ -339,35 +374,39 @@ def test_create_dataset(self): # compression arguments handling follows... # compression_opts as dict - d = g.create_dataset('aaa', shape=1000, dtype='u1', - compression='blosc', - compression_opts=dict(cname='zstd', clevel=1, shuffle=2)) - assert d.compressor.codec_id == 'blosc' - assert 'zstd' == d.compressor.cname + d = g.create_dataset( + "aaa", + shape=1000, + dtype="u1", + compression="blosc", + compression_opts=dict(cname="zstd", clevel=1, shuffle=2), + ) + assert d.compressor.codec_id == "blosc" + assert "zstd" == d.compressor.cname assert 1 == d.compressor.clevel assert 2 == d.compressor.shuffle # compression_opts as sequence - d = g.create_dataset('bbb', shape=1000, dtype='u1', - compression='blosc', - compression_opts=('zstd', 1, 2)) - assert d.compressor.codec_id == 'blosc' - assert 'zstd' == d.compressor.cname + d = g.create_dataset( + "bbb", shape=1000, dtype="u1", compression="blosc", compression_opts=("zstd", 1, 2) + ) + assert d.compressor.codec_id == "blosc" + assert "zstd" == d.compressor.cname assert 1 == d.compressor.clevel assert 2 == d.compressor.shuffle # None compression_opts - d = g.create_dataset('ccc', shape=1000, dtype='u1', compression='zlib') - assert d.compressor.codec_id == 'zlib' + d = g.create_dataset("ccc", shape=1000, dtype="u1", compression="zlib") + assert d.compressor.codec_id == "zlib" assert 1 == d.compressor.level # None compression - d = g.create_dataset('ddd', shape=1000, dtype='u1', compression=None) + d = g.create_dataset("ddd", shape=1000, dtype="u1", compression=None) assert d.compressor is None # compressor as compression - d = g.create_dataset('eee', shape=1000, dtype='u1', compression=Zlib(1)) - assert d.compressor.codec_id == 'zlib' + d = g.create_dataset("eee", shape=1000, dtype="u1", compression=Zlib(1)) + assert d.compressor.codec_id == "zlib" assert 1 == d.compressor.level g.store.close() @@ -376,25 +415,25 @@ def test_require_dataset(self): g = self.create_group() # create - dpath = 'foo' - d1 = g.require_dataset(dpath, shape=1000, chunks=100, dtype='f4') + dpath = "foo" + d1 = g.require_dataset(dpath, shape=1000, chunks=100, dtype="f4") d1[:] = np.arange(1000) path, name = self._dataset_path(g, dpath) assert isinstance(d1, Array) assert (1000,) == d1.shape assert (100,) == d1.chunks - assert np.dtype('f4') == d1.dtype + assert np.dtype("f4") == d1.dtype assert path == d1.path assert name == d1.name assert g.store is d1.store assert_array_equal(np.arange(1000), d1[:]) # require - d2 = g.require_dataset(dpath, shape=1000, chunks=100, dtype='f4') + d2 = g.require_dataset(dpath, shape=1000, chunks=100, dtype="f4") assert isinstance(d2, Array) assert (1000,) == d2.shape assert (100,) == d2.chunks - assert np.dtype('f4') == d2.dtype + assert np.dtype("f4") == d2.dtype assert path == d2.path assert name == d2.name assert g.store is d2.store @@ -403,20 +442,19 @@ def test_require_dataset(self): # bad shape - use TypeError for h5py compatibility with pytest.raises(TypeError): - g.require_dataset('foo', shape=2000, chunks=100, dtype='f4') + g.require_dataset("foo", shape=2000, chunks=100, dtype="f4") # dtype matching # can cast - d3 = g.require_dataset('foo', shape=1000, chunks=100, dtype='i2') - assert np.dtype('f4') == d3.dtype + d3 = g.require_dataset("foo", shape=1000, chunks=100, dtype="i2") + assert np.dtype("f4") == d3.dtype assert d1 == d3 with pytest.raises(TypeError): # cannot cast - g.require_dataset('foo', shape=1000, chunks=100, dtype='i4') + g.require_dataset("foo", shape=1000, chunks=100, dtype="i4") with pytest.raises(TypeError): # can cast but not exact match - g.require_dataset('foo', shape=1000, chunks=100, dtype='i2', - exact=True) + g.require_dataset("foo", shape=1000, chunks=100, dtype="i2", exact=True) g.store.close() @@ -424,80 +462,76 @@ def test_create_errors(self): g = self.create_group() # array obstructs group, array - g.create_dataset('foo', shape=100, chunks=10) + g.create_dataset("foo", shape=100, chunks=10) with pytest.raises(ValueError): - g.create_group('foo/bar') + g.create_group("foo/bar") with pytest.raises(ValueError): - g.require_group('foo/bar') + g.require_group("foo/bar") with pytest.raises(ValueError): - g.create_dataset('foo/bar', shape=100, chunks=10) + g.create_dataset("foo/bar", shape=100, chunks=10) with pytest.raises(ValueError): - g.require_dataset('foo/bar', shape=100, chunks=10) + g.require_dataset("foo/bar", shape=100, chunks=10) # array obstructs group, array - g.create_dataset('a/b', shape=100, chunks=10) + g.create_dataset("a/b", shape=100, chunks=10) with pytest.raises(ValueError): - g.create_group('a/b') + g.create_group("a/b") with pytest.raises(ValueError): - g.require_group('a/b') + g.require_group("a/b") with pytest.raises(ValueError): - g.create_dataset('a/b', shape=100, chunks=10) + g.create_dataset("a/b", shape=100, chunks=10) # group obstructs array - g.create_group('c/d') + g.create_group("c/d") with pytest.raises(ValueError): - g.create_dataset('c', shape=100, chunks=10) + g.create_dataset("c", shape=100, chunks=10) with pytest.raises(ValueError): - g.require_dataset('c', shape=100, chunks=10) + g.require_dataset("c", shape=100, chunks=10) with pytest.raises(ValueError): - g.create_dataset('c/d', shape=100, chunks=10) + g.create_dataset("c/d", shape=100, chunks=10) with pytest.raises(ValueError): - g.require_dataset('c/d', shape=100, chunks=10) + g.require_dataset("c/d", shape=100, chunks=10) # h5py compatibility, accept 'fillvalue' - d = g.create_dataset('x', shape=100, chunks=10, fillvalue=42) + d = g.create_dataset("x", shape=100, chunks=10, fillvalue=42) assert 42 == d.fill_value # h5py compatibility, ignore 'shuffle' with pytest.warns(UserWarning, match="ignoring keyword argument 'shuffle'"): - g.create_dataset('y', shape=100, chunks=10, shuffle=True) + g.create_dataset("y", shape=100, chunks=10, shuffle=True) # read-only g = self.create_group(read_only=True) with pytest.raises(PermissionError): - g.create_group('zzz') + g.create_group("zzz") with pytest.raises(PermissionError): - g.require_group('zzz') + g.require_group("zzz") with pytest.raises(PermissionError): - g.create_dataset('zzz', shape=100, chunks=10) + g.create_dataset("zzz", shape=100, chunks=10) with pytest.raises(PermissionError): - g.require_dataset('zzz', shape=100, chunks=10) + g.require_dataset("zzz", shape=100, chunks=10) g.store.close() def test_create_overwrite(self): try: - for method_name in 'create_dataset', 'create', 'empty', 'zeros', \ - 'ones': + for method_name in "create_dataset", "create", "empty", "zeros", "ones": g = self.create_group() - getattr(g, method_name)('foo', shape=100, chunks=10) + getattr(g, method_name)("foo", shape=100, chunks=10) # overwrite array with array - d = getattr(g, method_name)('foo', shape=200, chunks=20, - overwrite=True) + d = getattr(g, method_name)("foo", shape=200, chunks=20, overwrite=True) assert (200,) == d.shape # overwrite array with group - g2 = g.create_group('foo', overwrite=True) + g2 = g.create_group("foo", overwrite=True) assert 0 == len(g2) # overwrite group with array - d = getattr(g, method_name)('foo', shape=300, chunks=30, - overwrite=True) + d = getattr(g, method_name)("foo", shape=300, chunks=30, overwrite=True) assert (300,) == d.shape # overwrite array with group - d = getattr(g, method_name)('foo/bar', shape=400, chunks=40, - overwrite=True) + d = getattr(g, method_name)("foo/bar", shape=400, chunks=40, overwrite=True) assert (400,) == d.shape - assert isinstance(g['foo'], Group) + assert isinstance(g["foo"], Group) g.store.close() except NotImplementedError: @@ -506,84 +540,84 @@ def test_create_overwrite(self): def test_getitem_contains_iterators(self): # setup g1 = self.create_group() - g2 = g1.create_group('foo/bar') + g2 = g1.create_group("foo/bar") if g1._version == 2: - d1 = g2.create_dataset('/a/b/c', shape=1000, chunks=100) + d1 = g2.create_dataset("/a/b/c", shape=1000, chunks=100) else: # v3: cannot create a dataset at the root by starting with / # instead, need to create the dataset on g1 directly - d1 = g1.create_dataset('a/b/c', shape=1000, chunks=100) + d1 = g1.create_dataset("a/b/c", shape=1000, chunks=100) d1[:] = np.arange(1000) - d2 = g1.create_dataset('foo/baz', shape=3000, chunks=300) + d2 = g1.create_dataset("foo/baz", shape=3000, chunks=300) d2[:] = np.arange(3000) # test __getitem__ - assert isinstance(g1['foo'], Group) - assert isinstance(g1['foo']['bar'], Group) - assert isinstance(g1['foo/bar'], Group) + assert isinstance(g1["foo"], Group) + assert isinstance(g1["foo"]["bar"], Group) + assert isinstance(g1["foo/bar"], Group) if g1._version == 2: - assert isinstance(g1['/foo/bar/'], Group) + assert isinstance(g1["/foo/bar/"], Group) else: # start or end with / raises KeyError # TODO: should we allow stripping of these on v3? with pytest.raises(KeyError): - assert isinstance(g1['/foo/bar/'], Group) - assert isinstance(g1['foo/baz'], Array) - assert g2 == g1['foo/bar'] - assert g1['foo']['bar'] == g1['foo/bar'] - assert d2 == g1['foo/baz'] - assert_array_equal(d2[:], g1['foo/baz']) - assert isinstance(g1['a'], Group) - assert isinstance(g1['a']['b'], Group) - assert isinstance(g1['a/b'], Group) - assert isinstance(g1['a']['b']['c'], Array) - assert isinstance(g1['a/b/c'], Array) - assert d1 == g1['a/b/c'] - assert g1['a']['b']['c'] == g1['a/b/c'] - assert_array_equal(d1[:], g1['a/b/c'][:]) + assert isinstance(g1["/foo/bar/"], Group) + assert isinstance(g1["foo/baz"], Array) + assert g2 == g1["foo/bar"] + assert g1["foo"]["bar"] == g1["foo/bar"] + assert d2 == g1["foo/baz"] + assert_array_equal(d2[:], g1["foo/baz"]) + assert isinstance(g1["a"], Group) + assert isinstance(g1["a"]["b"], Group) + assert isinstance(g1["a/b"], Group) + assert isinstance(g1["a"]["b"]["c"], Array) + assert isinstance(g1["a/b/c"], Array) + assert d1 == g1["a/b/c"] + assert g1["a"]["b"]["c"] == g1["a/b/c"] + assert_array_equal(d1[:], g1["a/b/c"][:]) # test __contains__ - assert 'foo' in g1 - assert 'foo/bar' in g1 - assert 'foo/baz' in g1 - assert 'bar' in g1['foo'] - assert 'a' in g1 - assert 'a/b' in g1 - assert 'a/b/c' in g1 - assert 'baz' not in g1 - assert 'a/b/c/d' not in g1 - assert 'a/z' not in g1 - assert 'quux' not in g1['foo'] + assert "foo" in g1 + assert "foo/bar" in g1 + assert "foo/baz" in g1 + assert "bar" in g1["foo"] + assert "a" in g1 + assert "a/b" in g1 + assert "a/b/c" in g1 + assert "baz" not in g1 + assert "a/b/c/d" not in g1 + assert "a/z" not in g1 + assert "quux" not in g1["foo"] # test key errors with pytest.raises(KeyError): - g1['baz'] + g1["baz"] with pytest.raises(KeyError): - g1['x/y/z'] + g1["x/y/z"] # test __len__ assert 2 == len(g1) - assert 2 == len(g1['foo']) - assert 0 == len(g1['foo/bar']) - assert 1 == len(g1['a']) - assert 1 == len(g1['a/b']) + assert 2 == len(g1["foo"]) + assert 0 == len(g1["foo/bar"]) + assert 1 == len(g1["a"]) + assert 1 == len(g1["a/b"]) # test __iter__, keys() if g1._version == 2: # currently assumes sorted by key - assert ['a', 'foo'] == list(g1) - assert ['a', 'foo'] == list(g1.keys()) - assert ['bar', 'baz'] == list(g1['foo']) - assert ['bar', 'baz'] == list(g1['foo'].keys()) + assert ["a", "foo"] == list(g1) + assert ["a", "foo"] == list(g1.keys()) + assert ["bar", "baz"] == list(g1["foo"]) + assert ["bar", "baz"] == list(g1["foo"].keys()) else: # v3 is not necessarily sorted by key - assert ['a', 'foo'] == sorted(list(g1)) - assert ['a', 'foo'] == sorted(list(g1.keys())) - assert ['bar', 'baz'] == sorted(list(g1['foo'])) - assert ['bar', 'baz'] == sorted(list(g1['foo'].keys())) - assert [] == sorted(g1['foo/bar']) - assert [] == sorted(g1['foo/bar'].keys()) + assert ["a", "foo"] == sorted(list(g1)) + assert ["a", "foo"] == sorted(list(g1.keys())) + assert ["bar", "baz"] == sorted(list(g1["foo"])) + assert ["bar", "baz"] == sorted(list(g1["foo"].keys())) + assert [] == sorted(g1["foo/bar"]) + assert [] == sorted(g1["foo/bar"].keys()) # test items(), values() # currently assumes sorted by key @@ -593,24 +627,24 @@ def test_getitem_contains_iterators(self): if g1._version == 3: # v3 are not automatically sorted by key items, values = zip(*sorted(zip(items, values), key=lambda x: x[0])) - assert 'a' == items[0][0] - assert g1['a'] == items[0][1] - assert g1['a'] == values[0] - assert 'foo' == items[1][0] - assert g1['foo'] == items[1][1] - assert g1['foo'] == values[1] - - items = list(g1['foo'].items()) - values = list(g1['foo'].values()) + assert "a" == items[0][0] + assert g1["a"] == items[0][1] + assert g1["a"] == values[0] + assert "foo" == items[1][0] + assert g1["foo"] == items[1][1] + assert g1["foo"] == values[1] + + items = list(g1["foo"].items()) + values = list(g1["foo"].values()) if g1._version == 3: # v3 are not automatically sorted by key items, values = zip(*sorted(zip(items, values), key=lambda x: x[0])) - assert 'bar' == items[0][0] - assert g1['foo']['bar'] == items[0][1] - assert g1['foo']['bar'] == values[0] - assert 'baz' == items[1][0] - assert g1['foo']['baz'] == items[1][1] - assert g1['foo']['baz'] == values[1] + assert "bar" == items[0][0] + assert g1["foo"]["bar"] == items[0][1] + assert g1["foo"]["bar"] == values[0] + assert "baz" == items[1][0] + assert g1["foo"]["baz"] == items[1][1] + assert g1["foo"]["baz"] == values[1] # test array_keys(), arrays(), group_keys(), groups() @@ -618,29 +652,29 @@ def test_getitem_contains_iterators(self): arrays = list(g1.arrays()) if g1._version == 2: # currently assumes sorted by key - assert ['a', 'foo'] == list(g1.group_keys()) + assert ["a", "foo"] == list(g1.group_keys()) else: - assert ['a', 'foo'] == sorted(list(g1.group_keys())) + assert ["a", "foo"] == sorted(list(g1.group_keys())) groups = sorted(groups) arrays = sorted(arrays) - assert 'a' == groups[0][0] - assert g1['a'] == groups[0][1] - assert 'foo' == groups[1][0] - assert g1['foo'] == groups[1][1] + assert "a" == groups[0][0] + assert g1["a"] == groups[0][1] + assert "foo" == groups[1][0] + assert g1["foo"] == groups[1][1] assert [] == list(g1.array_keys()) assert [] == arrays - assert ['bar'] == list(g1['foo'].group_keys()) - assert ['baz'] == list(g1['foo'].array_keys()) - groups = list(g1['foo'].groups()) - arrays = list(g1['foo'].arrays()) + assert ["bar"] == list(g1["foo"].group_keys()) + assert ["baz"] == list(g1["foo"].array_keys()) + groups = list(g1["foo"].groups()) + arrays = list(g1["foo"].arrays()) if g1._version == 3: groups = sorted(groups) arrays = sorted(arrays) - assert 'bar' == groups[0][0] - assert g1['foo']['bar'] == groups[0][1] - assert 'baz' == arrays[0][0] - assert g1['foo']['baz'] == arrays[0][1] + assert "bar" == groups[0][0] + assert g1["foo"]["bar"] == groups[0][1] + assert "baz" == arrays[0][0] + assert g1["foo"]["baz"] == arrays[0][1] # visitor collection tests items = [] @@ -666,7 +700,7 @@ def visitor4(name, obj): "foo/baz", ] if g1._version == 3: - expected_items = [g1.path + '/' + i for i in expected_items] + expected_items = [g1.path + "/" + i for i in expected_items] assert expected_items == items del items[:] @@ -676,7 +710,7 @@ def visitor4(name, obj): "foo/baz", ] if g1._version == 3: - expected_items = [g1.path + '/' + i for i in expected_items] + expected_items = [g1.path + "/" + i for i in expected_items] assert expected_items == items del items[:] @@ -753,7 +787,7 @@ def visitor0(val, *args): # noinspection PyUnusedLocal def visitor1(val, *args): name = getattr(val, "path", val) - if name.startswith('group/'): + if name.startswith("group/"): # strip the group path for v3 name = name[6:] if name == "a/b/c": @@ -779,8 +813,7 @@ def test_double_counting_group_v3(self): sub_group.create("bar", shape=10, dtype="i4") assert list(root_group.group_keys()) == sorted(group_names) assert list(root_group.groups()) == [ - (name, root_group[name]) - for name in sorted(group_names) + (name, root_group[name]) for name in sorted(group_names) ] def test_empty_getitem_contains_iterators(self): @@ -791,47 +824,47 @@ def test_empty_getitem_contains_iterators(self): assert [] == list(g) assert [] == list(g.keys()) assert 0 == len(g) - assert 'foo' not in g + assert "foo" not in g g.store.close() def test_iterators_recurse(self): # setup g1 = self.create_group() - g2 = g1.create_group('foo/bar') - d1 = g2.create_dataset('/a/b/c', shape=1000, chunks=100) + g2 = g1.create_group("foo/bar") + d1 = g2.create_dataset("/a/b/c", shape=1000, chunks=100) d1[:] = np.arange(1000) - d2 = g1.create_dataset('foo/baz', shape=3000, chunks=300) + d2 = g1.create_dataset("foo/baz", shape=3000, chunks=300) d2[:] = np.arange(3000) - d3 = g2.create_dataset('zab', shape=2000, chunks=200) + d3 = g2.create_dataset("zab", shape=2000, chunks=200) d3[:] = np.arange(2000) # test recursive array_keys - array_keys = list(g1['foo'].array_keys(recurse=False)) - array_keys_recurse = list(g1['foo'].array_keys(recurse=True)) + array_keys = list(g1["foo"].array_keys(recurse=False)) + array_keys_recurse = list(g1["foo"].array_keys(recurse=True)) assert len(array_keys_recurse) > len(array_keys) - assert sorted(array_keys_recurse) == ['baz', 'zab'] + assert sorted(array_keys_recurse) == ["baz", "zab"] # test recursive arrays - arrays = list(g1['foo'].arrays(recurse=False)) - arrays_recurse = list(g1['foo'].arrays(recurse=True)) + arrays = list(g1["foo"].arrays(recurse=False)) + arrays_recurse = list(g1["foo"].arrays(recurse=True)) assert len(arrays_recurse) > len(arrays) - assert 'zab' == arrays_recurse[0][0] - assert g1['foo']['bar']['zab'] == arrays_recurse[0][1] + assert "zab" == arrays_recurse[0][0] + assert g1["foo"]["bar"]["zab"] == arrays_recurse[0][1] g1.store.close() def test_getattr(self): # setup g1 = self.create_group() - g2 = g1.create_group('foo') - g2.create_dataset('bar', shape=100) + g2 = g1.create_group("foo") + g2.create_dataset("bar", shape=100) # test - assert g1['foo'] == g1.foo - assert g2['bar'] == g2.bar + assert g1["foo"] == g1.foo + assert g2["bar"] == g2.bar # test that hasattr returns False instead of an exception (issue #88) - assert not hasattr(g1, 'unexistingattribute') + assert not hasattr(g1, "unexistingattribute") g1.store.close() @@ -839,46 +872,46 @@ def test_setitem(self): g = self.create_group() try: data = np.arange(100) - g['foo'] = data - assert_array_equal(data, g['foo']) + g["foo"] = data + assert_array_equal(data, g["foo"]) data = np.arange(200) - g['foo'] = data - assert_array_equal(data, g['foo']) + g["foo"] = data + assert_array_equal(data, g["foo"]) # 0d array - g['foo'] = 42 - assert () == g['foo'].shape - assert 42 == g['foo'][()] + g["foo"] = 42 + assert () == g["foo"].shape + assert 42 == g["foo"][()] except NotImplementedError: pass g.store.close() def test_delitem(self): g = self.create_group() - g.create_group('foo') - g.create_dataset('bar/baz', shape=100, chunks=10) - assert 'foo' in g - assert 'bar' in g - assert 'bar/baz' in g + g.create_group("foo") + g.create_dataset("bar/baz", shape=100, chunks=10) + assert "foo" in g + assert "bar" in g + assert "bar/baz" in g try: - del g['bar'] + del g["bar"] with pytest.raises(KeyError): - del g['xxx'] + del g["xxx"] except NotImplementedError: pass else: - assert 'foo' in g - assert 'bar' not in g - assert 'bar/baz' not in g + assert "foo" in g + assert "bar" not in g + assert "bar/baz" not in g g.store.close() def test_move(self): g = self.create_group() data = np.arange(100) - g['boo'] = data + g["boo"] = data data = np.arange(100) - g['foo'] = data + g["foo"] = data g.move("foo", "bar") assert "foo" not in g @@ -911,11 +944,11 @@ def test_move(self): # meta/data/bar. This is outside the `g` group located at # /meta/root/group, so bar is no longer within `g`. assert "bar" not in g - assert 'meta/root/bar.array.json' in g._store + assert "meta/root/bar.array.json" in g._store if g._chunk_store: - assert 'data/root/bar/c0' in g._chunk_store + assert "data/root/bar/c0" in g._chunk_store else: - assert 'data/root/bar/c0' in g._store + assert "data/root/bar/c0" in g._store assert isinstance(g["foo2"], Group) if g2._version == 2: assert_array_equal(data, g["bar"]) @@ -938,35 +971,35 @@ def test_move(self): def test_array_creation(self): grp = self.create_group() - a = grp.create('a', shape=100, chunks=10) + a = grp.create("a", shape=100, chunks=10) assert isinstance(a, Array) - b = grp.empty('b', shape=100, chunks=10) + b = grp.empty("b", shape=100, chunks=10) assert isinstance(b, Array) assert b.fill_value is None - c = grp.zeros('c', shape=100, chunks=10) + c = grp.zeros("c", shape=100, chunks=10) assert isinstance(c, Array) assert 0 == c.fill_value - d = grp.ones('d', shape=100, chunks=10) + d = grp.ones("d", shape=100, chunks=10) assert isinstance(d, Array) assert 1 == d.fill_value - e = grp.full('e', shape=100, chunks=10, fill_value=42) + e = grp.full("e", shape=100, chunks=10, fill_value=42) assert isinstance(e, Array) assert 42 == e.fill_value - f = grp.empty_like('f', a) + f = grp.empty_like("f", a) assert isinstance(f, Array) assert f.fill_value is None - g = grp.zeros_like('g', a) + g = grp.zeros_like("g", a) assert isinstance(g, Array) assert 0 == g.fill_value - h = grp.ones_like('h', a) + h = grp.ones_like("h", a) assert isinstance(h, Array) assert 1 == h.fill_value - i = grp.full_like('i', e) + i = grp.full_like("i", e) assert isinstance(i, Array) assert 42 == i.fill_value - j = grp.array('j', data=np.arange(100), chunks=10) + j = grp.array("j", data=np.arange(100), chunks=10) assert isinstance(j, Array) assert_array_equal(np.arange(100), j[:]) @@ -974,81 +1007,80 @@ def test_array_creation(self): grp = self.create_group(read_only=True) with pytest.raises(PermissionError): - grp.create('aa', shape=100, chunks=10) + grp.create("aa", shape=100, chunks=10) with pytest.raises(PermissionError): - grp.empty('aa', shape=100, chunks=10) + grp.empty("aa", shape=100, chunks=10) with pytest.raises(PermissionError): - grp.zeros('aa', shape=100, chunks=10) + grp.zeros("aa", shape=100, chunks=10) with pytest.raises(PermissionError): - grp.ones('aa', shape=100, chunks=10) + grp.ones("aa", shape=100, chunks=10) with pytest.raises(PermissionError): - grp.full('aa', shape=100, chunks=10, fill_value=42) + grp.full("aa", shape=100, chunks=10, fill_value=42) with pytest.raises(PermissionError): - grp.array('aa', data=np.arange(100), chunks=10) + grp.array("aa", data=np.arange(100), chunks=10) with pytest.raises(PermissionError): - grp.create('aa', shape=100, chunks=10) + grp.create("aa", shape=100, chunks=10) with pytest.raises(PermissionError): - grp.empty_like('aa', a) + grp.empty_like("aa", a) with pytest.raises(PermissionError): - grp.zeros_like('aa', a) + grp.zeros_like("aa", a) with pytest.raises(PermissionError): - grp.ones_like('aa', a) + grp.ones_like("aa", a) with pytest.raises(PermissionError): - grp.full_like('aa', a) + grp.full_like("aa", a) grp.store.close() def test_paths(self): g1 = self.create_group() - g2 = g1.create_group('foo/bar') + g2 = g1.create_group("foo/bar") if g1._version == 2: - assert g1 == g1['/'] - assert g1 == g1['//'] - assert g1 == g1['///'] - assert g1 == g2['/'] - assert g1 == g2['//'] - assert g1 == g2['///'] - assert g2 == g1['foo/bar'] - assert g2 == g1['/foo/bar'] - assert g2 == g1['foo/bar/'] - assert g2 == g1['//foo/bar'] - assert g2 == g1['//foo//bar//'] - assert g2 == g1['///foo///bar///'] - assert g2 == g2['/foo/bar'] + assert g1 == g1["/"] + assert g1 == g1["//"] + assert g1 == g1["///"] + assert g1 == g2["/"] + assert g1 == g2["//"] + assert g1 == g2["///"] + assert g2 == g1["foo/bar"] + assert g2 == g1["/foo/bar"] + assert g2 == g1["foo/bar/"] + assert g2 == g1["//foo/bar"] + assert g2 == g1["//foo//bar//"] + assert g2 == g1["///foo///bar///"] + assert g2 == g2["/foo/bar"] else: # the expected key format gives a match - assert g2 == g1['foo/bar'] + assert g2 == g1["foo/bar"] # TODO: Should presence of a trailing slash raise KeyError? # The spec says "the final character is not a / character" # but we currently strip trailing '/' as done for v2. - assert g2 == g1['foo/bar/'] + assert g2 == g1["foo/bar/"] # double slash also currently works (spec doesn't mention this # case, but have kept it for v2 behavior compatibility) - assert g2 == g1['foo//bar'] + assert g2 == g1["foo//bar"] # TODO, root: fix these cases # v3: leading / implies we are at the root, not within a group, # so these all raise KeyError - for path in ['/foo/bar', '//foo/bar', '//foo//bar//', - '///fooo///bar///']: + for path in ["/foo/bar", "//foo/bar", "//foo//bar//", "///fooo///bar///"]: with pytest.raises(KeyError): g1[path] with pytest.raises(ValueError): - g1['.'] + g1["."] with pytest.raises(ValueError): - g1['..'] + g1[".."] with pytest.raises(ValueError): - g1['foo/.'] + g1["foo/."] with pytest.raises(ValueError): - g1['foo/..'] + g1["foo/.."] with pytest.raises(ValueError): - g1['foo/./bar'] + g1["foo/./bar"] with pytest.raises(ValueError): - g1['foo/../bar'] + g1["foo/../bar"] g1.store.close() @@ -1056,7 +1088,7 @@ def test_pickle(self): # setup group g = self.create_group() - d = g.create_dataset('foo/bar', shape=100, chunks=10) + d = g.create_dataset("foo/bar", shape=100, chunks=10) d[:] = np.arange(100) path = g.path name = g.name @@ -1075,19 +1107,19 @@ def test_pickle(self): assert name == g2.name assert n == len(g2) assert keys == list(g2) - assert isinstance(g2['foo'], Group) - assert isinstance(g2['foo/bar'], Array) + assert isinstance(g2["foo"], Group) + assert isinstance(g2["foo/bar"], Array) g2.store.close() def test_context_manager(self): with self.create_group() as g: - d = g.create_dataset('foo/bar', shape=100, chunks=10) + d = g.create_dataset("foo/bar", shape=100, chunks=10) d[:] = np.arange(100) -@pytest.mark.parametrize('chunk_dict', [False, True]) +@pytest.mark.parametrize("chunk_dict", [False, True]) def test_group_init_from_dict(chunk_dict): if chunk_dict: store, chunk_store = dict(), dict() @@ -1106,20 +1138,25 @@ def test_group_init_from_dict(chunk_dict): # noinspection PyStatementEffect @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3(TestGroup, unittest.TestCase): - @staticmethod def create_store(): # can be overridden in sub-classes return KVStoreV3(dict()), None - def create_group(self, store=None, path='group', read_only=False, - chunk_store=None, synchronizer=None): + def create_group( + self, store=None, path="group", read_only=False, chunk_store=None, synchronizer=None + ): # can be overridden in sub-classes if store is None: store, chunk_store = self.create_store() init_group(store, path=path, chunk_store=chunk_store) - g = Group(store, path=path, read_only=read_only, - chunk_store=chunk_store, synchronizer=synchronizer) + g = Group( + store, + path=path, + read_only=read_only, + chunk_store=chunk_store, + synchronizer=synchronizer, + ) return g def test_group_init_1(self): @@ -1132,13 +1169,13 @@ def test_group_init_1(self): assert chunk_store is g.chunk_store assert not g.read_only # different path/name in v3 case - assert 'group' == g.path - assert '/group' == g.name - assert 'group' == g.basename + assert "group" == g.path + assert "/group" == g.name + assert "group" == g.basename assert isinstance(g.attrs, Attributes) - g.attrs['foo'] = 'bar' - assert g.attrs['foo'] == 'bar' + g.attrs["foo"] = "bar" + assert g.attrs["foo"] == "bar" assert isinstance(g.info, InfoReporter) assert isinstance(repr(g.info), str) @@ -1147,7 +1184,7 @@ def test_group_init_1(self): def test_group_init_errors_2(self): store, chunk_store = self.create_store() - path = 'tmp' + path = "tmp" init_array(store, path=path, shape=1000, chunks=100, chunk_store=chunk_store) # array blocks group with pytest.raises(ValueError): @@ -1156,7 +1193,6 @@ def test_group_init_errors_2(self): class TestGroupWithMemoryStore(TestGroup): - @staticmethod def create_store(): return MemoryStore(), None @@ -1165,14 +1201,12 @@ def create_store(): # noinspection PyStatementEffect @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithMemoryStore(TestGroupWithMemoryStore, TestGroupV3): - @staticmethod def create_store(): return MemoryStoreV3(), None class TestGroupWithDirectoryStore(TestGroup): - @staticmethod def create_store(): path = tempfile.mkdtemp() @@ -1183,7 +1217,6 @@ def create_store(): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithDirectoryStore(TestGroupWithDirectoryStore, TestGroupV3): - @staticmethod def create_store(): path = tempfile.mkdtemp() @@ -1194,7 +1227,6 @@ def create_store(): @skip_test_env_var("ZARR_TEST_ABS") class TestGroupWithABSStore(TestGroup): - @staticmethod def create_store(): container_client = abs_container() @@ -1211,7 +1243,6 @@ def test_pickle(self): @skip_test_env_var("ZARR_TEST_ABS") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithABSStore(TestGroupV3): - @staticmethod def create_store(): container_client = abs_container() @@ -1226,7 +1257,6 @@ def test_pickle(self): class TestGroupWithNestedDirectoryStore(TestGroup): - @staticmethod def create_store(): path = tempfile.mkdtemp() @@ -1237,7 +1267,6 @@ def create_store(): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestGroupWithFSStore(TestGroup): - @staticmethod def create_store(): path = tempfile.mkdtemp() @@ -1247,21 +1276,19 @@ def create_store(): def test_round_trip_nd(self): data = np.arange(1000).reshape(10, 10, 10) - name = 'raw' + name = "raw" store, _ = self.create_store() - f = open_group(store, mode='w') - f.create_dataset(name, data=data, chunks=(5, 5, 5), - compressor=None) + f = open_group(store, mode="w") + f.create_dataset(name, data=data, chunks=(5, 5, 5), compressor=None) assert name in f - h = open_group(store, mode='r') + h = open_group(store, mode="r") np.testing.assert_array_equal(h[name][:], data) @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithFSStore(TestGroupWithFSStore, TestGroupV3): - @staticmethod def create_store(): path = tempfile.mkdtemp() @@ -1271,80 +1298,78 @@ def create_store(): def test_round_trip_nd(self): data = np.arange(1000).reshape(10, 10, 10) - name = 'raw' + name = "raw" store, _ = self.create_store() - f = open_group(store, path='group', mode='w') - f.create_dataset(name, data=data, chunks=(5, 5, 5), - compressor=None) - h = open_group(store, path='group', mode='r') + f = open_group(store, path="group", mode="w") + f.create_dataset(name, data=data, chunks=(5, 5, 5), compressor=None) + h = open_group(store, path="group", mode="r") np.testing.assert_array_equal(h[name][:], data) - f = open_group(store, path='group2', mode='w') + f = open_group(store, path="group2", mode="w") data_size = data.nbytes - group_meta_size = buffer_size(store[meta_root + 'group.group.json']) - group2_meta_size = buffer_size(store[meta_root + 'group2.group.json']) - array_meta_size = buffer_size(store[meta_root + 'group/raw.array.json']) + group_meta_size = buffer_size(store[meta_root + "group.group.json"]) + group2_meta_size = buffer_size(store[meta_root + "group2.group.json"]) + array_meta_size = buffer_size(store[meta_root + "group/raw.array.json"]) assert store.getsize() == data_size + group_meta_size + group2_meta_size + array_meta_size # added case with path to complete coverage - assert store.getsize('group') == data_size + group_meta_size + array_meta_size - assert store.getsize('group2') == group2_meta_size - assert store.getsize('group/raw') == data_size + array_meta_size + assert store.getsize("group") == data_size + group_meta_size + array_meta_size + assert store.getsize("group2") == group2_meta_size + assert store.getsize("group/raw") == data_size + array_meta_size @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestGroupWithNestedFSStore(TestGroupWithFSStore): - @staticmethod def create_store(): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = FSStore(path, key_separator='/', auto_mkdir=True) + store = FSStore(path, key_separator="/", auto_mkdir=True) return store, None def test_inconsistent_dimension_separator(self): data = np.arange(1000).reshape(10, 10, 10) - name = 'raw' + name = "raw" store, _ = self.create_store() - f = open_group(store, mode='w') + f = open_group(store, mode="w") # cannot specify dimension_separator that conflicts with the store with pytest.raises(ValueError): - f.create_dataset(name, data=data, chunks=(5, 5, 5), - compressor=None, dimension_separator='.') + f.create_dataset( + name, data=data, chunks=(5, 5, 5), compressor=None, dimension_separator="." + ) @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithNestedFSStore(TestGroupV3WithFSStore): - @staticmethod def create_store(): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = FSStoreV3(path, key_separator='/', auto_mkdir=True) + store = FSStoreV3(path, key_separator="/", auto_mkdir=True) return store, None def test_inconsistent_dimension_separator(self): data = np.arange(1000).reshape(10, 10, 10) - name = 'raw' + name = "raw" store, _ = self.create_store() - f = open_group(store, path='group', mode='w') + f = open_group(store, path="group", mode="w") # cannot specify dimension_separator that conflicts with the store with pytest.raises(ValueError): - f.create_dataset(name, data=data, chunks=(5, 5, 5), - compressor=None, dimension_separator='.') + f.create_dataset( + name, data=data, chunks=(5, 5, 5), compressor=None, dimension_separator="." + ) class TestGroupWithZipStore(TestGroup): - @staticmethod def create_store(): - path = mktemp(suffix='.zip') + path = mktemp(suffix=".zip") atexit.register(os.remove, path) store = ZipStore(path) return store, None @@ -1353,7 +1378,7 @@ def test_context_manager(self): with self.create_group() as g: store = g.store - d = g.create_dataset('foo/bar', shape=100, chunks=10) + d = g.create_dataset("foo/bar", shape=100, chunks=10) d[:] = np.arange(100) # Check that exiting the context manager closes the store, @@ -1369,65 +1394,59 @@ def test_move(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithZipStore(TestGroupWithZipStore, TestGroupV3): - @staticmethod def create_store(): - path = mktemp(suffix='.zip') + path = mktemp(suffix=".zip") atexit.register(os.remove, path) store = ZipStoreV3(path) return store, None class TestGroupWithDBMStore(TestGroup): - @staticmethod def create_store(): - path = mktemp(suffix='.anydbm') - atexit.register(atexit_rmglob, path + '*') - store = DBMStore(path, flag='n') + path = mktemp(suffix=".anydbm") + atexit.register(atexit_rmglob, path + "*") + store = DBMStore(path, flag="n") return store, None @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithDBMStore(TestGroupWithDBMStore, TestGroupV3): - @staticmethod def create_store(): - path = mktemp(suffix='.anydbm') - atexit.register(atexit_rmglob, path + '*') - store = DBMStoreV3(path, flag='n') + path = mktemp(suffix=".anydbm") + atexit.register(atexit_rmglob, path + "*") + store = DBMStoreV3(path, flag="n") return store, None class TestGroupWithDBMStoreBerkeleyDB(TestGroup): - @staticmethod def create_store(): bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix='.dbm') + path = mktemp(suffix=".dbm") atexit.register(os.remove, path) - store = DBMStore(path, flag='n', open=bsddb3.btopen) + store = DBMStore(path, flag="n", open=bsddb3.btopen) return store, None @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithDBMStoreBerkeleyDB(TestGroupWithDBMStoreBerkeleyDB, TestGroupV3): - @staticmethod def create_store(): bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix='.dbm') + path = mktemp(suffix=".dbm") atexit.register(os.remove, path) - store = DBMStoreV3(path, flag='n', open=bsddb3.btopen) + store = DBMStoreV3(path, flag="n", open=bsddb3.btopen) return store, None class TestGroupWithLMDBStore(TestGroup): - @staticmethod def create_store(): pytest.importorskip("lmdb") - path = mktemp(suffix='.lmdb') + path = mktemp(suffix=".lmdb") atexit.register(atexit_rmtree, path) store = LMDBStore(path) return store, None @@ -1435,21 +1454,19 @@ def create_store(): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithLMDBStore(TestGroupWithLMDBStore, TestGroupV3): - @staticmethod def create_store(): pytest.importorskip("lmdb") - path = mktemp(suffix='.lmdb') + path = mktemp(suffix=".lmdb") atexit.register(atexit_rmtree, path) store = LMDBStoreV3(path) return store, None class TestGroupWithSQLiteStore(TestGroup): - def create_store(self): pytest.importorskip("sqlite3") - path = mktemp(suffix='.db') + path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStore(path) return store, None @@ -1457,17 +1474,15 @@ def create_store(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithSQLiteStore(TestGroupWithSQLiteStore, TestGroupV3): - def create_store(self): pytest.importorskip("sqlite3") - path = mktemp(suffix='.db') + path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStoreV3(path) return store, None class TestGroupWithChunkStore(TestGroup): - @staticmethod def create_store(): return KVStore(dict()), KVStore(dict()) @@ -1482,24 +1497,23 @@ def test_chunk_store(self): assert chunk_store is g.chunk_store # create array - a = g.zeros('foo', shape=100, chunks=10) + a = g.zeros("foo", shape=100, chunks=10) assert store is a.store assert chunk_store is a.chunk_store a[:] = np.arange(100) assert_array_equal(np.arange(100), a[:]) # check store keys - expect = sorted([group_meta_key, 'foo/' + array_meta_key]) + expect = sorted([group_meta_key, "foo/" + array_meta_key]) actual = sorted(store.keys()) assert expect == actual - expect = ['foo/' + str(i) for i in range(10)] + expect = ["foo/" + str(i) for i in range(10)] actual = sorted(chunk_store.keys()) assert expect == actual @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithChunkStore(TestGroupWithChunkStore, TestGroupV3): - @staticmethod def create_store(): return KVStoreV3(dict()), KVStoreV3(dict()) @@ -1507,7 +1521,7 @@ def create_store(): def test_chunk_store(self): # setup store, chunk_store = self.create_store() - path = 'group1' + path = "group1" g = self.create_group(store, path=path, chunk_store=chunk_store) # check attributes @@ -1515,26 +1529,25 @@ def test_chunk_store(self): assert chunk_store is g.chunk_store # create array - a = g.zeros('foo', shape=100, chunks=10) + a = g.zeros("foo", shape=100, chunks=10) assert store is a.store assert chunk_store is a.chunk_store a[:] = np.arange(100) assert_array_equal(np.arange(100), a[:]) # check store keys - group_key = meta_root + path + '.group.json' - array_key = meta_root + path + '/foo' + '.array.json' - expect = sorted([group_key, array_key, 'zarr.json']) + group_key = meta_root + path + ".group.json" + array_key = meta_root + path + "/foo" + ".array.json" + expect = sorted([group_key, array_key, "zarr.json"]) actual = sorted(store.keys()) assert expect == actual - expect = [data_root + path + '/foo/c' + str(i) for i in range(10)] - expect += ['zarr.json'] + expect = [data_root + path + "/foo/c" + str(i) for i in range(10)] + expect += ["zarr.json"] actual = sorted(chunk_store.keys()) assert expect == actual class TestGroupWithStoreCache(TestGroup): - @staticmethod def create_store(): store = LRUStoreCache(dict(), max_size=None) @@ -1543,26 +1556,25 @@ def create_store(): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithStoreCache(TestGroupWithStoreCache, TestGroupV3): - @staticmethod def create_store(): store = LRUStoreCacheV3(dict(), max_size=None) return store, None -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_group(zarr_version): # test the group() convenience function # basic usage if zarr_version == 2: g = group() - assert '' == g.path - assert '/' == g.name + assert "" == g.path + assert "/" == g.name else: - g = group(path='group1', zarr_version=zarr_version) - assert 'group1' == g.path - assert '/group1' == g.name + g = group(path="group1", zarr_version=zarr_version) + assert "group1" == g.path + assert "/group1" == g.name assert isinstance(g, Group) # usage with custom store @@ -1571,7 +1583,7 @@ def test_group(zarr_version): path = None else: store = KVStoreV3(dict()) - path = 'foo' + path = "foo" g = group(store=store, path=path) assert isinstance(g, Group) assert store is g.store @@ -1582,7 +1594,7 @@ def test_group(zarr_version): path = None else: store = KVStoreV3(dict()) - path = 'foo' + path = "foo" init_array(store, path=path, shape=100, chunks=10) with pytest.raises(ValueError): group(store, path=path) @@ -1591,8 +1603,8 @@ def test_group(zarr_version): assert store is g.store -@pytest.mark.skipif(have_fsspec is False, reason='needs fsspec') -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_group_writeable_mode(zarr_version, tmp_path): # Regression test for https://github.com/zarr-developers/zarr-python/issues/1353 import fsspec @@ -1602,179 +1614,179 @@ def test_group_writeable_mode(zarr_version, tmp_path): assert zg.store.map == store -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_group(zarr_version): # test the open_group() convenience function - store = 'data/group.zarr' + store = "data/group.zarr" expected_store_type = DirectoryStore if zarr_version == 2 else DirectoryStoreV3 # mode == 'w' - path = None if zarr_version == 2 else 'group1' - g = open_group(store, path=path, mode='w', zarr_version=zarr_version) + path = None if zarr_version == 2 else "group1" + g = open_group(store, path=path, mode="w", zarr_version=zarr_version) assert isinstance(g, Group) assert isinstance(g.store, expected_store_type) assert 0 == len(g) - g.create_groups('foo', 'bar') + g.create_groups("foo", "bar") assert 2 == len(g) # mode in 'r', 'r+' - open_array('data/array.zarr', shape=100, chunks=10, mode='w') - for mode in 'r', 'r+': + open_array("data/array.zarr", shape=100, chunks=10, mode="w") + for mode in "r", "r+": with pytest.raises(ValueError): - open_group('doesnotexist', mode=mode) + open_group("doesnotexist", mode=mode) with pytest.raises(ValueError): - open_group('data/array.zarr', mode=mode) - g = open_group(store, mode='r') + open_group("data/array.zarr", mode=mode) + g = open_group(store, mode="r") assert isinstance(g, Group) assert 2 == len(g) with pytest.raises(PermissionError): - g.create_group('baz') - g = open_group(store, mode='r+') + g.create_group("baz") + g = open_group(store, mode="r+") assert isinstance(g, Group) assert 2 == len(g) - g.create_groups('baz', 'quux') + g.create_groups("baz", "quux") assert 4 == len(g) # mode == 'a' shutil.rmtree(store) - g = open_group(store, path=path, mode='a', zarr_version=zarr_version) + g = open_group(store, path=path, mode="a", zarr_version=zarr_version) assert isinstance(g, Group) assert isinstance(g.store, expected_store_type) assert 0 == len(g) - g.create_groups('foo', 'bar') + g.create_groups("foo", "bar") assert 2 == len(g) if zarr_version == 2: with pytest.raises(ValueError): - open_group('data/array.zarr', mode='a', zarr_version=zarr_version) + open_group("data/array.zarr", mode="a", zarr_version=zarr_version) else: # TODO, root: should this raise an error? - open_group('data/array.zarr', mode='a', zarr_version=zarr_version) + open_group("data/array.zarr", mode="a", zarr_version=zarr_version) # mode in 'w-', 'x' - for mode in 'w-', 'x': + for mode in "w-", "x": shutil.rmtree(store) g = open_group(store, path=path, mode=mode, zarr_version=zarr_version) assert isinstance(g, Group) assert isinstance(g.store, expected_store_type) assert 0 == len(g) - g.create_groups('foo', 'bar') + g.create_groups("foo", "bar") assert 2 == len(g) with pytest.raises(ValueError): open_group(store, path=path, mode=mode, zarr_version=zarr_version) if zarr_version == 2: with pytest.raises(ValueError): - open_group('data/array.zarr', mode=mode) + open_group("data/array.zarr", mode=mode) # open with path - g = open_group(store, path='foo/bar', zarr_version=zarr_version) + g = open_group(store, path="foo/bar", zarr_version=zarr_version) assert isinstance(g, Group) - assert 'foo/bar' == g.path + assert "foo/bar" == g.path -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_group_completions(zarr_version): - path = None if zarr_version == 2 else 'group1' + path = None if zarr_version == 2 else "group1" g = group(path=path, zarr_version=zarr_version) d = dir(g) - assert 'foo' not in d - assert 'bar' not in d - assert 'baz' not in d - assert 'qux' not in d - assert 'xxx' not in d - assert 'yyy' not in d - assert 'zzz' not in d - assert '123' not in d - assert '456' not in d - g.create_groups('foo', 'bar', 'baz/qux', '123') - g.zeros('xxx', shape=100) - g.zeros('yyy', shape=100) - g.zeros('zzz', shape=100) - g.zeros('456', shape=100) + assert "foo" not in d + assert "bar" not in d + assert "baz" not in d + assert "qux" not in d + assert "xxx" not in d + assert "yyy" not in d + assert "zzz" not in d + assert "123" not in d + assert "456" not in d + g.create_groups("foo", "bar", "baz/qux", "123") + g.zeros("xxx", shape=100) + g.zeros("yyy", shape=100) + g.zeros("zzz", shape=100) + g.zeros("456", shape=100) d = dir(g) - assert 'foo' in d - assert 'bar' in d - assert 'baz' in d - assert 'qux' not in d - assert 'xxx' in d - assert 'yyy' in d - assert 'zzz' in d - assert '123' not in d # not valid identifier - assert '456' not in d # not valid identifier - - -@pytest.mark.parametrize('zarr_version', _VERSIONS) + assert "foo" in d + assert "bar" in d + assert "baz" in d + assert "qux" not in d + assert "xxx" in d + assert "yyy" in d + assert "zzz" in d + assert "123" not in d # not valid identifier + assert "456" not in d # not valid identifier + + +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_group_key_completions(zarr_version): - path = None if zarr_version == 2 else 'group1' + path = None if zarr_version == 2 else "group1" g = group(path=path, zarr_version=zarr_version) d = dir(g) # noinspection PyProtectedMember k = g._ipython_key_completions_() # none of these names should be an attribute - assert 'foo' not in d - assert 'bar' not in d - assert 'baz' not in d - assert 'qux' not in d - assert 'xxx' not in d - assert 'yyy' not in d - assert 'zzz' not in d - assert '123' not in d - assert '456' not in d - assert 'asdf;' not in d + assert "foo" not in d + assert "bar" not in d + assert "baz" not in d + assert "qux" not in d + assert "xxx" not in d + assert "yyy" not in d + assert "zzz" not in d + assert "123" not in d + assert "456" not in d + assert "asdf;" not in d # none of these names should be an item - assert 'foo' not in k - assert 'bar' not in k - assert 'baz' not in k - assert 'qux' not in k - assert 'xxx' not in k - assert 'yyy' not in k - assert 'zzz' not in k - assert '123' not in k - assert '456' not in k - assert 'asdf;' not in k - - g.create_groups('foo', 'bar', 'baz/qux', '123') - g.zeros('xxx', shape=100) - g.zeros('yyy', shape=100) - g.zeros('zzz', shape=100) - g.zeros('456', shape=100) + assert "foo" not in k + assert "bar" not in k + assert "baz" not in k + assert "qux" not in k + assert "xxx" not in k + assert "yyy" not in k + assert "zzz" not in k + assert "123" not in k + assert "456" not in k + assert "asdf;" not in k + + g.create_groups("foo", "bar", "baz/qux", "123") + g.zeros("xxx", shape=100) + g.zeros("yyy", shape=100) + g.zeros("zzz", shape=100) + g.zeros("456", shape=100) if zarr_version == 2: - g.zeros('asdf;', shape=100) + g.zeros("asdf;", shape=100) else: # cannot have ; in key name for v3 with pytest.raises(ValueError): - g.zeros('asdf;', shape=100) + g.zeros("asdf;", shape=100) d = dir(g) # noinspection PyProtectedMember k = g._ipython_key_completions_() - assert 'foo' in d - assert 'bar' in d - assert 'baz' in d - assert 'qux' not in d - assert 'xxx' in d - assert 'yyy' in d - assert 'zzz' in d - assert '123' not in d # not valid identifier - assert '456' not in d # not valid identifier + assert "foo" in d + assert "bar" in d + assert "baz" in d + assert "qux" not in d + assert "xxx" in d + assert "yyy" in d + assert "zzz" in d + assert "123" not in d # not valid identifier + assert "456" not in d # not valid identifier if zarr_version == 2: - assert 'asdf;' not in d # not valid identifier - - assert 'foo' in k - assert 'bar' in k - assert 'baz' in k - assert 'qux' not in k - assert 'xxx' in k - assert 'yyy' in k - assert 'zzz' in k - assert '123' in k - assert '456' in k + assert "asdf;" not in d # not valid identifier + + assert "foo" in k + assert "bar" in k + assert "baz" in k + assert "qux" not in k + assert "xxx" in k + assert "yyy" in k + assert "zzz" in k + assert "123" in k + assert "456" in k if zarr_version == 2: - assert 'asdf;' in k + assert "asdf;" in k def _check_tree(g, expect_bytes, expect_text): @@ -1788,72 +1800,88 @@ def _check_tree(g, expect_bytes, expect_text): isinstance(widget, ipytree.Tree) -@pytest.mark.parametrize('zarr_version', _VERSIONS) -@pytest.mark.parametrize('at_root', [False, True]) +@pytest.mark.parametrize("zarr_version", _VERSIONS) +@pytest.mark.parametrize("at_root", [False, True]) def test_tree(zarr_version, at_root): # setup - path = None if at_root else 'group1' + path = None if at_root else "group1" g1 = group(path=path, zarr_version=zarr_version) - g2 = g1.create_group('foo') - g3 = g1.create_group('bar') - g3.create_group('baz') - g5 = g3.create_group('quux') - g5.create_dataset('baz', shape=100, chunks=10) + g2 = g1.create_group("foo") + g3 = g1.create_group("bar") + g3.create_group("baz") + g5 = g3.create_group("quux") + g5.create_dataset("baz", shape=100, chunks=10) - tree_path = '/' if at_root else path + tree_path = "/" if at_root else path # test root group if zarr_version == 2: - expect_bytes = textwrap.dedent(f"""\ + expect_bytes = textwrap.dedent( + f"""\ {tree_path} +-- bar | +-- baz | +-- quux | +-- baz (100,) float64 - +-- foo""").encode() - expect_text = textwrap.dedent(f"""\ + +-- foo""" + ).encode() + expect_text = textwrap.dedent( + f"""\ {tree_path} ├── bar │ ├── baz │ └── quux │ └── baz (100,) float64 - └── foo""") + └── foo""" + ) else: # Almost the same as for v2, but has a path name and the # subgroups are not necessarily sorted alphabetically. - expect_bytes = textwrap.dedent(f"""\ + expect_bytes = textwrap.dedent( + f"""\ {tree_path} +-- foo +-- bar +-- baz +-- quux - +-- baz (100,) float64""").encode() - expect_text = textwrap.dedent(f"""\ + +-- baz (100,) float64""" + ).encode() + expect_text = textwrap.dedent( + f"""\ {tree_path} ├── foo └── bar ├── baz └── quux - └── baz (100,) float64""") + └── baz (100,) float64""" + ) _check_tree(g1, expect_bytes, expect_text) # test different group - expect_bytes = textwrap.dedent("""\ - foo""").encode() - expect_text = textwrap.dedent("""\ - foo""") + expect_bytes = textwrap.dedent( + """\ + foo""" + ).encode() + expect_text = textwrap.dedent( + """\ + foo""" + ) _check_tree(g2, expect_bytes, expect_text) # test different group - expect_bytes = textwrap.dedent("""\ + expect_bytes = textwrap.dedent( + """\ bar +-- baz +-- quux - +-- baz (100,) float64""").encode() - expect_text = textwrap.dedent("""\ + +-- baz (100,) float64""" + ).encode() + expect_text = textwrap.dedent( + """\ bar ├── baz └── quux - └── baz (100,) float64""") + └── baz (100,) float64""" + ) _check_tree(g3, expect_bytes, expect_text) @@ -1866,38 +1894,38 @@ def test_group_mismatched_store_versions(): chunk_store_v2 = KVStore(dict()) chunk_store_v3 = KVStoreV3(dict()) - init_group(store_v2, path='group1', chunk_store=chunk_store_v2) - init_group(store_v3, path='group1', chunk_store=chunk_store_v3) + init_group(store_v2, path="group1", chunk_store=chunk_store_v2) + init_group(store_v3, path="group1", chunk_store=chunk_store_v3) - g1_v3 = Group(store_v3, path='group1', read_only=True, chunk_store=chunk_store_v3) + g1_v3 = Group(store_v3, path="group1", read_only=True, chunk_store=chunk_store_v3) assert isinstance(g1_v3._store, KVStoreV3) - g1_v2 = Group(store_v2, path='group1', read_only=True, chunk_store=chunk_store_v2) + g1_v2 = Group(store_v2, path="group1", read_only=True, chunk_store=chunk_store_v2) assert isinstance(g1_v2._store, KVStore) # store and chunk_store must have the same zarr protocol version with pytest.raises(ValueError): - Group(store_v3, path='group1', read_only=False, chunk_store=chunk_store_v2) + Group(store_v3, path="group1", read_only=False, chunk_store=chunk_store_v2) with pytest.raises(ValueError): - Group(store_v2, path='group1', read_only=False, chunk_store=chunk_store_v3) + Group(store_v2, path="group1", read_only=False, chunk_store=chunk_store_v3) with pytest.raises(ValueError): - open_group(store_v2, path='group1', chunk_store=chunk_store_v3) + open_group(store_v2, path="group1", chunk_store=chunk_store_v3) with pytest.raises(ValueError): - open_group(store_v3, path='group1', chunk_store=chunk_store_v2) + open_group(store_v3, path="group1", chunk_store=chunk_store_v2) # raises Value if read_only and path is not a pre-existing group with pytest.raises(ValueError): - Group(store_v3, path='group2', read_only=True, chunk_store=chunk_store_v3) + Group(store_v3, path="group2", read_only=True, chunk_store=chunk_store_v3) with pytest.raises(ValueError): - Group(store_v3, path='group2', read_only=True, chunk_store=chunk_store_v3) + Group(store_v3, path="group2", read_only=True, chunk_store=chunk_store_v3) -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_group_from_paths(zarr_version): """Verify zarr_version is applied to both the store and chunk_store.""" store = tempfile.mkdtemp() chunk_store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) atexit.register(atexit_rmtree, chunk_store) - path = 'g1' + path = "g1" g = open_group(store, path=path, chunk_store=chunk_store, zarr_version=zarr_version) assert g._store._store_version == g._chunk_store._store_version == zarr_version diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py index 61e76c63da..8a34c1e715 100644 --- a/zarr/tests/test_indexing.py +++ b/zarr/tests/test_indexing.py @@ -51,22 +51,20 @@ def test_replace_ellipsis(): assert (slice(None), 0) == replace_ellipsis((slice(None), 0), (100, 100)) # 2D slice - assert ((slice(None), slice(None)) == - replace_ellipsis(Ellipsis, (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis(slice(None), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((slice(None), slice(None)), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((Ellipsis, slice(None)), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((slice(None), Ellipsis), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((slice(None), Ellipsis, slice(None)), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((Ellipsis, slice(None), slice(None)), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((slice(None), slice(None), Ellipsis), (100, 100))) + assert (slice(None), slice(None)) == replace_ellipsis(Ellipsis, (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis(slice(None), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis((slice(None), slice(None)), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis((Ellipsis, slice(None)), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis((slice(None), Ellipsis), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis( + (slice(None), Ellipsis, slice(None)), (100, 100) + ) + assert (slice(None), slice(None)) == replace_ellipsis( + (Ellipsis, slice(None), slice(None)), (100, 100) + ) + assert (slice(None), slice(None)) == replace_ellipsis( + (slice(None), slice(None), Ellipsis), (100, 100) + ) def test_get_basic_selection_0d(): @@ -87,25 +85,25 @@ def test_get_basic_selection_0d(): assert_array_equal(a, b) # test structured array - value = (b'aaa', 1, 4.2) - a = np.array(value, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) + value = (b"aaa", 1, 4.2) + a = np.array(value, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) z[()] = value assert_array_equal(a, z.get_basic_selection(Ellipsis)) assert_array_equal(a, z[...]) assert a[()] == z.get_basic_selection(()) assert a[()] == z[()] - assert b'aaa' == z.get_basic_selection((), fields='foo') - assert b'aaa' == z['foo'] - assert a[['foo', 'bar']] == z.get_basic_selection((), fields=['foo', 'bar']) - assert a[['foo', 'bar']] == z['foo', 'bar'] + assert b"aaa" == z.get_basic_selection((), fields="foo") + assert b"aaa" == z["foo"] + assert a[["foo", "bar"]] == z.get_basic_selection((), fields=["foo", "bar"]) + assert a[["foo", "bar"]] == z["foo", "bar"] # test out param b = np.zeros_like(a) z.get_basic_selection(Ellipsis, out=b) assert_array_equal(a, b) - c = np.zeros_like(a[['foo', 'bar']]) - z.get_basic_selection(Ellipsis, out=c, fields=['foo', 'bar']) - assert_array_equal(a[['foo', 'bar']], c) + c = np.zeros_like(a[["foo", "bar"]]) + z.get_basic_selection(Ellipsis, out=c, fields=["foo", "bar"]) + assert_array_equal(a[["foo", "bar"]], c) basic_selections_1d = [ @@ -175,8 +173,8 @@ def test_get_basic_selection_0d(): slice(-1, 0, -1), # bad stuff 2.3, - 'foo', - b'xxx', + "foo", + b"xxx", None, (0, 0), (slice(None), slice(None)), @@ -252,8 +250,8 @@ def test_get_basic_selection_1d(): basic_selections_2d_bad = [ # bad stuff 2.3, - 'foo', - b'xxx', + "foo", + b"xxx", None, (2.3, slice(None)), # only positive step supported @@ -300,71 +298,34 @@ def test_fancy_indexing_fallback_on_get_setitem(): [0, 0, 0, 1], ], ) - np.testing.assert_array_equal( - z[[1, 2, 3], [1, 2, 3]], 1 - ) + np.testing.assert_array_equal(z[[1, 2, 3], [1, 2, 3]], 1) # test broadcasting - np.testing.assert_array_equal( - z[1, [1, 2, 3]], [1, 0, 0] - ) + np.testing.assert_array_equal(z[1, [1, 2, 3]], [1, 0, 0]) # test 1D fancy indexing z2 = zarr.zeros(5) z2[[1, 2, 3]] = 1 - np.testing.assert_array_equal( - z2, [0, 1, 1, 1, 0] - ) + np.testing.assert_array_equal(z2, [0, 1, 1, 1, 0]) -@pytest.mark.parametrize("index,expected_result", - [ - # Single iterable of integers - ( - [0, 1], - [[0, 1, 2], - [3, 4, 5]] - ), - # List first, then slice - ( - ([0, 1], slice(None)), - [[0, 1, 2], - [3, 4, 5]] - ), - # List first, then slice - ( - ([0, 1], slice(1, None)), - [[1, 2], - [4, 5]] - ), - # Slice first, then list - ( - (slice(0, 2), [0, 2]), - [[0, 2], - [3, 5]] - ), - # Slices only - ( - (slice(0, 2), slice(0, 2)), - [[0, 1], - [3, 4]] - ), - # List with repeated index - ( - ([1, 0, 1], slice(1, None)), - [[4, 5], - [1, 2], - [4, 5]] - ), - # 1D indexing - ( - ([1, 0, 1]), - [ - [3, 4, 5], - [0, 1, 2], - [3, 4, 5] - ] - ) - - ]) +@pytest.mark.parametrize( + "index,expected_result", + [ + # Single iterable of integers + ([0, 1], [[0, 1, 2], [3, 4, 5]]), + # List first, then slice + (([0, 1], slice(None)), [[0, 1, 2], [3, 4, 5]]), + # List first, then slice + (([0, 1], slice(1, None)), [[1, 2], [4, 5]]), + # Slice first, then list + ((slice(0, 2), [0, 2]), [[0, 2], [3, 5]]), + # Slices only + ((slice(0, 2), slice(0, 2)), [[0, 1], [3, 4]]), + # List with repeated index + (([1, 0, 1], slice(1, None)), [[4, 5], [1, 2], [4, 5]]), + # 1D indexing + (([1, 0, 1]), [[3, 4, 5], [0, 1, 2], [3, 4, 5]]), + ], +) def test_orthogonal_indexing_fallback_on_getitem_2d(index, expected_result): """ Tests the orthogonal indexing fallback on __getitem__ for a 2D matrix. @@ -382,34 +343,19 @@ def test_orthogonal_indexing_fallback_on_getitem_2d(index, expected_result): np.testing.assert_array_equal(z[index], expected_result) -@pytest.mark.parametrize("index,expected_result", - [ - # Single iterable of integers - ( - [0, 1], - [[[0, 1, 2], - [3, 4, 5], - [6, 7, 8]], - [[9, 10, 11], - [12, 13, 14], - [15, 16, 17]]] - ), - # One slice, two integers - ( - (slice(0, 2), 1, 1), - [4, 13] - ), - # One integer, two slices - ( - (slice(0, 2), 1, slice(0, 2)), - [[3, 4], [12, 13]] - ), - # Two slices and a list - ( - (slice(0, 2), [1, 2], slice(0, 2)), - [[[3, 4], [6, 7]], [[12, 13], [15, 16]]] - ), - ]) +@pytest.mark.parametrize( + "index,expected_result", + [ + # Single iterable of integers + ([0, 1], [[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14], [15, 16, 17]]]), + # One slice, two integers + ((slice(0, 2), 1, 1), [4, 13]), + # One integer, two slices + ((slice(0, 2), 1, slice(0, 2)), [[3, 4], [12, 13]]), + # Two slices and a list + ((slice(0, 2), [1, 2], slice(0, 2)), [[[3, 4], [6, 7]], [[12, 13], [15, 16]]]), + ], +) def test_orthogonal_indexing_fallback_on_getitem_3d(index, expected_result): """ Tests the orthogonal indexing fallback on __getitem__ for a 3D matrix. @@ -439,36 +385,14 @@ def test_orthogonal_indexing_fallback_on_getitem_3d(index, expected_result): "index,expected_result", [ # Single iterable of integers - ( - [0, 1], - [ - [1, 1, 1], - [1, 1, 1], - [0, 0, 0] - ] - ), + ([0, 1], [[1, 1, 1], [1, 1, 1], [0, 0, 0]]), # List and slice combined - ( - ([0, 1], slice(1, 3)), - [[0, 1, 1], - [0, 1, 1], - [0, 0, 0]] - ), + (([0, 1], slice(1, 3)), [[0, 1, 1], [0, 1, 1], [0, 0, 0]]), # Index repetition is ignored on setitem - ( - ([0, 1, 1, 1, 1, 1, 1], slice(1, 3)), - [[0, 1, 1], - [0, 1, 1], - [0, 0, 0]] - ), + (([0, 1, 1, 1, 1, 1, 1], slice(1, 3)), [[0, 1, 1], [0, 1, 1], [0, 0, 0]]), # Slice with step - ( - ([0, 2], slice(None, None, 2)), - [[1, 0, 1], - [0, 0, 0], - [1, 0, 1]] - ) - ] + (([0, 2], slice(None, None, 2)), [[1, 0, 1], [0, 0, 0], [1, 0, 1]]), + ], ) def test_orthogonal_indexing_fallback_on_setitem_2d(index, expected_result): """ @@ -482,12 +406,8 @@ def test_orthogonal_indexing_fallback_on_setitem_2d(index, expected_result): z = zarr.array(a) z[index] = 1 a[index] = 1 - np.testing.assert_array_equal( - z, expected_result - ) - np.testing.assert_array_equal( - z, a, err_msg="Indexing disagrees with numpy" - ) + np.testing.assert_array_equal(z, expected_result) + np.testing.assert_array_equal(z, a, err_msg="Indexing disagrees with numpy") def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): @@ -495,15 +415,11 @@ def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): with pytest.raises(IndexError): z2[[1, 2, 3], [1, 2, 3]] = 2 with pytest.raises(IndexError): - np.testing.assert_array_equal( - z2[[1, 2, 3], [1, 2, 3]], 0 - ) + np.testing.assert_array_equal(z2[[1, 2, 3], [1, 2, 3]], 0) with pytest.raises(IndexError): z2[..., [1, 2, 3]] = 2 with pytest.raises(IndexError): - np.testing.assert_array_equal( - z2[..., [1, 2, 3]], 0 - ) + np.testing.assert_array_equal(z2[..., [1, 2, 3]], 0) def test_set_basic_selection_0d(): @@ -523,8 +439,8 @@ def test_set_basic_selection_0d(): assert_array_equal(v, z) # test structured array - value = (b'aaa', 1, 4.2) - v = np.array(value, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) + value = (b"aaa", 1, 4.2) + v = np.array(value, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) a = np.zeros_like(v) z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) @@ -538,19 +454,19 @@ def test_set_basic_selection_0d(): z[...] = a assert_array_equal(a, z) # with fields - z.set_basic_selection(Ellipsis, v['foo'], fields='foo') - assert v['foo'] == z['foo'] - assert a['bar'] == z['bar'] - assert a['baz'] == z['baz'] - z['bar'] = v['bar'] - assert v['foo'] == z['foo'] - assert v['bar'] == z['bar'] - assert a['baz'] == z['baz'] + z.set_basic_selection(Ellipsis, v["foo"], fields="foo") + assert v["foo"] == z["foo"] + assert a["bar"] == z["bar"] + assert a["baz"] == z["baz"] + z["bar"] = v["bar"] + assert v["foo"] == z["foo"] + assert v["bar"] == z["bar"] + assert a["baz"] == z["baz"] # multiple field assignment not supported with pytest.raises(IndexError): - z.set_basic_selection(Ellipsis, v[['foo', 'bar']], fields=['foo', 'bar']) + z.set_basic_selection(Ellipsis, v[["foo", "bar"]], fields=["foo", "bar"]) with pytest.raises(IndexError): - z[..., 'foo', 'bar'] = v[['foo', 'bar']] + z[..., "foo", "bar"] = v[["foo", "bar"]] def _test_get_orthogonal_selection(a, z, selection): @@ -610,7 +526,6 @@ def test_get_orthogonal_selection_1d_int(): [0, 3, 10, -23, -12, -1], # explicit test not sorted [3, 105, 23, 127], - ] for selection in selections: _test_get_orthogonal_selection(a, z, selection) @@ -671,7 +586,7 @@ def test_get_orthogonal_selection_2d(): # integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) _test_get_orthogonal_selection_2d(a, z, ix0, ix1) ix0.sort() ix1.sort() @@ -738,14 +653,14 @@ def test_get_orthogonal_selection_3d(): # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool) - ix2 = np.random.binomial(1, .5, size=a.shape[2]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) + ix2 = np.random.binomial(1, 0.5, size=a.shape[2]).astype(bool) _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) # integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True) - ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * .5), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) + ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * 0.5), replace=True) _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) ix0.sort() ix1.sort() @@ -846,12 +761,12 @@ def test_set_orthogonal_selection_2d(): # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) # integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) ix0.sort() ix1.sort() @@ -904,14 +819,14 @@ def test_set_orthogonal_selection_3d(): # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool) - ix2 = np.random.binomial(1, .5, size=a.shape[2]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) + ix2 = np.random.binomial(1, 0.5, size=a.shape[2]).astype(bool) _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) # integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True) - ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * .5), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) + ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * 0.5), replace=True) _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) # sorted increasing @@ -939,19 +854,13 @@ def test_orthogonal_indexing_fallback_on_get_setitem(): [0, 0, 0, 1], ], ) - np.testing.assert_array_equal( - z[[1, 2, 3], [1, 2, 3]], 1 - ) + np.testing.assert_array_equal(z[[1, 2, 3], [1, 2, 3]], 1) # test broadcasting - np.testing.assert_array_equal( - z[1, [1, 2, 3]], [1, 0, 0] - ) + np.testing.assert_array_equal(z[1, [1, 2, 3]], [1, 0, 0]) # test 1D fancy indexing z2 = zarr.zeros(5) z2[[1, 2, 3]] = 1 - np.testing.assert_array_equal( - z2, [0, 1, 1, 1, 0] - ) + np.testing.assert_array_equal(z2, [0, 1, 1, 1, 0]) def _test_get_coordinate_selection(a, z, selection): @@ -969,8 +878,8 @@ def _test_get_coordinate_selection(a, z, selection): Ellipsis, # bad stuff 2.3, - 'foo', - b'xxx', + "foo", + b"xxx", None, (0, 0), (slice(None), slice(None)), @@ -1060,10 +969,8 @@ def test_get_coordinate_selection_2d(): _test_get_coordinate_selection(a, z, (ix0, ix1)) # multi-dimensional selection - ix0 = np.array([[1, 1, 2], - [2, 2, 5]]) - ix1 = np.array([[1, 3, 2], - [1, 0, 0]]) + ix0 = np.array([[1, 1, 2], [2, 2, 5]]) + ix1 = np.array([[1, 3, 2], [1, 0, 0]]) _test_get_coordinate_selection(a, z, (ix0, ix1)) with pytest.raises(IndexError): @@ -1146,10 +1053,8 @@ def test_set_coordinate_selection_2d(): _test_set_coordinate_selection(v, a, z, selection) # multi-dimensional selection - ix0 = np.array([[1, 2, 3], - [4, 5, 6]]) - ix1 = np.array([[1, 3, 2], - [2, 0, 5]]) + ix0 = np.array([[1, 2, 3], [4, 5, 6]]) + ix1 = np.array([[1, 3, 2], [2, 0, 5]]) _test_set_coordinate_selection(v, a, z, (ix0, ix1)) @@ -1196,12 +1101,12 @@ def _test_get_block_selection(a, z, selection, expected_idx): slice(3, 8, 2), # bad stuff 2.3, - 'foo', - b'xxx', + "foo", + b"xxx", None, (0, 0), (slice(None), slice(None)), - [0, 5, 3] + [0, 5, 3], ] @@ -1211,8 +1116,7 @@ def test_get_block_selection_1d(): z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) z[:] = a - for selection, expected_idx in \ - zip(block_selections_1d, block_selections_1d_array_projection): + for selection, expected_idx in zip(block_selections_1d, block_selections_1d_array_projection): _test_get_block_selection(a, z, selection, expected_idx) bad_selections = block_selections_1d_bad + [ @@ -1264,8 +1168,7 @@ def test_get_block_selection_2d(): z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) z[:] = a - for selection, expected_idx in \ - zip(block_selections_2d, block_selections_2d_array_projection): + for selection, expected_idx in zip(block_selections_2d, block_selections_2d_array_projection): _test_get_block_selection(a, z, selection, expected_idx) with pytest.raises(IndexError): @@ -1300,8 +1203,7 @@ def test_set_block_selection_1d(): a = np.empty(v.shape, dtype=v.dtype) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - for selection, expected_idx in \ - zip(block_selections_1d, block_selections_1d_array_projection): + for selection, expected_idx in zip(block_selections_1d, block_selections_1d_array_projection): _test_set_block_selection(v, a, z, selection, expected_idx) for selection in block_selections_1d_bad: @@ -1317,8 +1219,7 @@ def test_set_block_selection_2d(): a = np.empty(v.shape, dtype=v.dtype) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - for selection, expected_idx in \ - zip(block_selections_2d, block_selections_2d_array_projection): + for selection, expected_idx in zip(block_selections_2d, block_selections_2d_array_projection): _test_set_block_selection(v, a, z, selection, expected_idx) with pytest.raises(IndexError): @@ -1347,8 +1248,8 @@ def _test_get_mask_selection(a, z, selection): Ellipsis, # bad stuff 2.3, - 'foo', - b'xxx', + "foo", + b"xxx", None, (0, 0), (slice(None), slice(None)), @@ -1478,7 +1379,7 @@ def test_get_selection_out(): # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) selections = [ # index both axes with array (ix0, ix1), @@ -1526,22 +1427,20 @@ def test_get_selection_out(): def test_get_selections_with_fields(): - a = [('aaa', 1, 4.2), - ('bbb', 2, 8.4), - ('ccc', 3, 12.6)] - a = np.array(a, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) + a = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] + a = np.array(a, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) z = zarr.create(shape=a.shape, chunks=2, dtype=a.dtype, fill_value=None) z[:] = a fields_fixture = [ - 'foo', - ['foo'], - ['foo', 'bar'], - ['foo', 'baz'], - ['bar', 'baz'], - ['foo', 'bar', 'baz'], - ['bar', 'foo'], - ['baz', 'bar', 'foo'], + "foo", + ["foo"], + ["foo", "bar"], + ["foo", "baz"], + ["bar", "baz"], + ["foo", "bar", "baz"], + ["bar", "foo"], + ["baz", "bar", "foo"], ] for fields in fields_fixture: @@ -1629,30 +1528,28 @@ def test_get_selections_with_fields(): # missing/bad fields with pytest.raises(IndexError): - z.get_basic_selection(Ellipsis, fields=['notafield']) + z.get_basic_selection(Ellipsis, fields=["notafield"]) with pytest.raises(IndexError): z.get_basic_selection(Ellipsis, fields=slice(None)) def test_set_selections_with_fields(): - v = [('aaa', 1, 4.2), - ('bbb', 2, 8.4), - ('ccc', 3, 12.6)] - v = np.array(v, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) + v = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] + v = np.array(v, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) a = np.empty_like(v) z = zarr.empty_like(v, chunks=2) fields_fixture = [ - 'foo', + "foo", [], - ['foo'], - ['foo', 'bar'], - ['foo', 'baz'], - ['bar', 'baz'], - ['foo', 'bar', 'baz'], - ['bar', 'foo'], - ['baz', 'bar', 'foo'], + ["foo"], + ["foo", "bar"], + ["foo", "baz"], + ["bar", "baz"], + ["foo", "bar", "baz"], + ["bar", "foo"], + ["baz", "bar", "foo"], ] for fields in fields_fixture: @@ -1682,8 +1579,8 @@ def test_set_selections_with_fields(): key = fields # setup expectation - a[:] = ('', 0, 0) - z[:] = ('', 0, 0) + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) assert_array_equal(a, z[:]) a[key] = v[key] # total selection @@ -1691,31 +1588,31 @@ def test_set_selections_with_fields(): assert_array_equal(a, z[:]) # basic selection with slice - a[:] = ('', 0, 0) - z[:] = ('', 0, 0) + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) a[key][0:2] = v[key][0:2] z.set_basic_selection(slice(0, 2), v[key][0:2], fields=fields) assert_array_equal(a, z[:]) # orthogonal selection - a[:] = ('', 0, 0) - z[:] = ('', 0, 0) + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) ix = [0, 2] a[key][ix] = v[key][ix] z.set_orthogonal_selection(ix, v[key][ix], fields=fields) assert_array_equal(a, z[:]) # coordinate selection - a[:] = ('', 0, 0) - z[:] = ('', 0, 0) + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) ix = [0, 2] a[key][ix] = v[key][ix] z.set_coordinate_selection(ix, v[key][ix], fields=fields) assert_array_equal(a, z[:]) # mask selection - a[:] = ('', 0, 0) - z[:] = ('', 0, 0) + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) ix = [True, False, True] a[key][ix] = v[key][ix] z.set_mask_selection(ix, v[key][ix], fields=fields) @@ -1823,17 +1720,24 @@ def test_numpy_int_indexing(): # 1D test cases ((1070,), (50,), [("__getitem__", (slice(200, 400),))]), ((1070,), (50,), [("__getitem__", (slice(200, 400, 100),))]), - ((1070,), (50,), [ - ("__getitem__", (slice(200, 400),)), - ("__setitem__", (slice(200, 400, 100),)), - ]), - + ( + (1070,), + (50,), + [ + ("__getitem__", (slice(200, 400),)), + ("__setitem__", (slice(200, 400, 100),)), + ], + ), # 2D test cases - ((40, 50), (5, 8), [ - ("__getitem__", (slice(6, 37, 13), (slice(4, 10)))), - ("__setitem__", (slice(None), (slice(None)))), - ]), - ] + ( + (40, 50), + (5, 8), + [ + ("__getitem__", (slice(6, 37, 13), (slice(4, 10)))), + ("__setitem__", (slice(None), (slice(None)))), + ], + ), + ], ) def test_accessed_chunks(shape, chunks, ops): # Test that only the required chunks are accessed during basic selection operations @@ -1881,9 +1785,8 @@ def test_accessed_chunks(shape, chunks, ops): # don't determine if the chunk was actually partial here, just that the # counts are consistent that this might have happened if optype == "__setitem__": - assert ( - ("__getitem__", ci) not in delta_counts or - delta_counts.pop(("__getitem__", ci)) == 1 - ) + assert ("__getitem__", ci) not in delta_counts or delta_counts.pop( + ("__getitem__", ci) + ) == 1 # Check that no other chunks were accessed assert len(delta_counts) == 0 diff --git a/zarr/tests/test_info.py b/zarr/tests/test_info.py index 434d19d1f7..7fb6feb11b 100644 --- a/zarr/tests/test_info.py +++ b/zarr/tests/test_info.py @@ -5,22 +5,32 @@ from zarr.util import InfoReporter -@pytest.mark.parametrize('array_size', [10, 15000]) +@pytest.mark.parametrize("array_size", [10, 15000]) def test_info(array_size): # setup - g = zarr.group(store=dict(), chunk_store=dict(), - synchronizer=zarr.ThreadSynchronizer()) - g.create_group('foo') - z = g.zeros('bar', shape=array_size, filters=[numcodecs.Adler32()]) + g = zarr.group(store=dict(), chunk_store=dict(), synchronizer=zarr.ThreadSynchronizer()) + g.create_group("foo") + z = g.zeros("bar", shape=array_size, filters=[numcodecs.Adler32()]) # test group info items = g.info_items() keys = sorted([k for k, _ in items]) - expected_keys = sorted([ - 'Type', 'Read-only', 'Synchronizer type', 'Store type', 'Chunk store type', - 'No. members', 'No. arrays', 'No. groups', 'Arrays', 'Groups', 'Name' - ]) + expected_keys = sorted( + [ + "Type", + "Read-only", + "Synchronizer type", + "Store type", + "Chunk store type", + "No. members", + "No. arrays", + "No. groups", + "Arrays", + "Groups", + "Name", + ] + ) assert expected_keys == keys # can also get a string representation of info via the info attribute @@ -30,11 +40,26 @@ def test_info(array_size): # test array info items = z.info_items() keys = sorted([k for k, _ in items]) - expected_keys = sorted([ - 'Type', 'Data type', 'Shape', 'Chunk shape', 'Order', 'Read-only', 'Filter [0]', - 'Compressor', 'Synchronizer type', 'Store type', 'Chunk store type', 'No. bytes', - 'No. bytes stored', 'Storage ratio', 'Chunks initialized', 'Name' - ]) + expected_keys = sorted( + [ + "Type", + "Data type", + "Shape", + "Chunk shape", + "Order", + "Read-only", + "Filter [0]", + "Compressor", + "Synchronizer type", + "Store type", + "Chunk store type", + "No. bytes", + "No. bytes stored", + "Storage ratio", + "Chunks initialized", + "Name", + ] + ) assert expected_keys == keys # can also get a string representation of info via the info attribute diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index a78375986e..db50560c8e 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -7,18 +7,27 @@ from zarr.codecs import Blosc, Delta, Pickle, Zlib from zarr.errors import MetadataError -from zarr.meta import (ZARR_FORMAT, decode_array_metadata, decode_dtype, - decode_group_metadata, encode_array_metadata, - encode_dtype, encode_fill_value, decode_fill_value, - get_extended_dtype_info, _v3_complex_types, - _v3_datetime_types, _default_entry_point_metadata_v3, - Metadata3) +from zarr.meta import ( + ZARR_FORMAT, + decode_array_metadata, + decode_dtype, + decode_group_metadata, + encode_array_metadata, + encode_dtype, + encode_fill_value, + decode_fill_value, + get_extended_dtype_info, + _v3_complex_types, + _v3_datetime_types, + _default_entry_point_metadata_v3, + Metadata3, +) from zarr.util import normalize_dtype, normalize_fill_value def assert_json_equal(expect, actual): if isinstance(actual, bytes): - actual = str(actual, 'ascii') + actual = str(actual, "ascii") ej = json.loads(expect) aj = json.loads(actual) assert ej == aj @@ -29,14 +38,15 @@ def test_encode_decode_array_1(): meta = dict( shape=(100,), chunks=(10,), - dtype=np.dtype('U4', 'U4", " 0: @@ -1399,8 +1416,7 @@ def s3(request): pass timeout -= 0.1 # pragma: no cover time.sleep(0.1) # pragma: no cover - s3so = dict(client_kwargs={'endpoint_url': endpoint_uri}, - use_listings_cache=False) + s3so = dict(client_kwargs={"endpoint_url": endpoint_uri}, use_listings_cache=False) s3 = s3fs.S3FileSystem(anon=False, **s3so) s3.mkdir("test") request.cls.s3so = s3so @@ -1410,7 +1426,6 @@ def s3(request): class TestNestedDirectoryStore(TestDirectoryStore): - def create_store(self, normalize_keys=False, **kwargs): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) @@ -1425,23 +1440,23 @@ def test_init_array(self): # check metadata assert array_meta_key in store meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] - assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] - assert meta['dimension_separator'] == "/" + assert ZARR_FORMAT == meta["zarr_format"] + assert (1000,) == meta["shape"] + assert (100,) == meta["chunks"] + assert np.dtype(None) == meta["dtype"] + assert meta["dimension_separator"] == "/" def test_chunk_nesting(self): store = self.create_store() # any path where last segment looks like a chunk key gets special handling - store[self.root + '0.0'] = b'xxx' - assert b'xxx' == store[self.root + '0.0'] + store[self.root + "0.0"] = b"xxx" + assert b"xxx" == store[self.root + "0.0"] # assert b'xxx' == store['0/0'] - store[self.root + 'foo/10.20.30'] = b'yyy' - assert b'yyy' == store[self.root + 'foo/10.20.30'] + store[self.root + "foo/10.20.30"] = b"yyy" + assert b"yyy" == store[self.root + "foo/10.20.30"] # assert b'yyy' == store['foo/10/20/30'] - store[self.root + '42'] = b'zzz' - assert b'zzz' == store[self.root + '42'] + store[self.root + "42"] = b"zzz" + assert b"zzz" == store[self.root + "42"] def test_listdir(self): store = self.create_store() @@ -1452,29 +1467,22 @@ def test_listdir(self): class TestNestedDirectoryStoreNone: - def test_value_error(self): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = NestedDirectoryStore( - path, normalize_keys=True, - dimension_separator=None) + store = NestedDirectoryStore(path, normalize_keys=True, dimension_separator=None) assert store._dimension_separator == "/" class TestNestedDirectoryStoreWithWrongValue: - def test_value_error(self): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) with pytest.raises(ValueError): - NestedDirectoryStore( - path, normalize_keys=True, - dimension_separator=".") + NestedDirectoryStore(path, normalize_keys=True, dimension_separator=".") class TestN5Store(TestNestedDirectoryStore): - def create_store(self, normalize_keys=False): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) @@ -1486,29 +1494,29 @@ def test_equal(self): store_b = N5Store(store_a.path) assert store_a == store_b - @pytest.mark.parametrize('zarr_meta_key', ['.zarray', '.zattrs', '.zgroup']) + @pytest.mark.parametrize("zarr_meta_key", [".zarray", ".zattrs", ".zgroup"]) def test_del_zarr_meta_key(self, zarr_meta_key): store = self.create_store() - store[n5_attrs_key] = json_dumps({'foo': 'bar'}) + store[n5_attrs_key] = json_dumps({"foo": "bar"}) del store[zarr_meta_key] assert n5_attrs_key not in store def test_chunk_nesting(self): store = self.create_store() - store['0.0'] = b'xxx' - assert '0.0' in store - assert b'xxx' == store['0.0'] + store["0.0"] = b"xxx" + assert "0.0" in store + assert b"xxx" == store["0.0"] # assert b'xxx' == store['0/0'] - store['foo/10.20.30'] = b'yyy' - assert 'foo/10.20.30' in store - assert b'yyy' == store['foo/10.20.30'] + store["foo/10.20.30"] = b"yyy" + assert "foo/10.20.30" in store + assert b"yyy" == store["foo/10.20.30"] # N5 reverses axis order - assert b'yyy' == store['foo/30/20/10'] - del store['foo/10.20.30'] - assert 'foo/30/20/10' not in store - store['42'] = b'zzz' - assert '42' in store - assert b'zzz' == store['42'] + assert b"yyy" == store["foo/30/20/10"] + del store["foo/10.20.30"] + assert "foo/30/20/10" not in store + store["42"] = b"zzz" + assert "42" in store + assert b"zzz" == store["42"] def test_init_array(self): store = self.create_store() @@ -1517,83 +1525,85 @@ def test_init_array(self): # check metadata assert array_meta_key in store meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] - assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] + assert ZARR_FORMAT == meta["zarr_format"] + assert (1000,) == meta["shape"] + assert (100,) == meta["chunks"] + assert np.dtype(None) == meta["dtype"] # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert default_compressor.get_config() == compressor_config # N5Store always has a fill value of 0 - assert meta['fill_value'] == 0 - assert meta['dimension_separator'] == '.' + assert meta["fill_value"] == 0 + assert meta["dimension_separator"] == "." # Top-level groups AND arrays should have # the n5 keyword in metadata raw_n5_meta = json.loads(store[n5_attrs_key]) - assert raw_n5_meta.get('n5', None) == N5_FORMAT + assert raw_n5_meta.get("n5", None) == N5_FORMAT def test_init_array_path(self): - path = 'foo/bar' + path = "foo/bar" store = self.create_store() init_array(store, shape=1000, chunks=100, path=path) # check metadata - key = path + '/' + array_meta_key + key = path + "/" + array_meta_key assert key in store meta = store._metadata_class.decode_array_metadata(store[key]) - assert ZARR_FORMAT == meta['zarr_format'] - assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] + assert ZARR_FORMAT == meta["zarr_format"] + assert (1000,) == meta["shape"] + assert (100,) == meta["chunks"] + assert np.dtype(None) == meta["dtype"] # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert default_compressor.get_config() == compressor_config # N5Store always has a fill value of 0 - assert meta['fill_value'] == 0 + assert meta["fill_value"] == 0 def test_init_array_compat(self): store = self.create_store() - init_array(store, shape=1000, chunks=100, compressor='none') + init_array(store, shape=1000, chunks=100, compressor="none") meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert compressor_config is None def test_init_array_overwrite(self): - self._test_init_array_overwrite('C') + self._test_init_array_overwrite("C") def test_init_array_overwrite_path(self): - self._test_init_array_overwrite_path('C') + self._test_init_array_overwrite_path("C") def test_init_array_overwrite_chunk_store(self): - self._test_init_array_overwrite_chunk_store('C') + self._test_init_array_overwrite_chunk_store("C") def test_init_group_overwrite(self): - self._test_init_group_overwrite('C') + self._test_init_group_overwrite("C") def test_init_group_overwrite_path(self): - self._test_init_group_overwrite_path('C') + self._test_init_group_overwrite_path("C") def test_init_group_overwrite_chunk_store(self): - self._test_init_group_overwrite_chunk_store('C') + self._test_init_group_overwrite_chunk_store("C") def test_init_group(self): store = self.create_store() init_group(store) - store['.zattrs'] = json_dumps({'foo': 'bar'}) + store[".zattrs"] = json_dumps({"foo": "bar"}) # check metadata assert group_meta_key in store assert group_meta_key in store.listdir() - assert group_meta_key in store.listdir('') + assert group_meta_key in store.listdir("") meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] + assert ZARR_FORMAT == meta["zarr_format"] def test_filters(self): - all_filters, all_errors = zip(*[ - (None, does_not_raise()), - ([], does_not_raise()), - ([AsType('f4', 'f8')], pytest.raises(ValueError)), - ]) + all_filters, all_errors = zip( + *[ + (None, does_not_raise()), + ([], does_not_raise()), + ([AsType("f4", "f8")], pytest.raises(ValueError)), + ] + ) for filters, error in zip(all_filters, all_errors): store = self.create_store() with error: @@ -1620,29 +1630,29 @@ def test_equal(self): # be run by making TestN5FSStore inherit from both TestFSStore and # TestN5Store, but a direct copy is arguably more explicit. - @pytest.mark.parametrize('zarr_meta_key', ['.zarray', '.zattrs', '.zgroup']) + @pytest.mark.parametrize("zarr_meta_key", [".zarray", ".zattrs", ".zgroup"]) def test_del_zarr_meta_key(self, zarr_meta_key): store = self.create_store() - store[n5_attrs_key] = json_dumps({'foo': 'bar'}) + store[n5_attrs_key] = json_dumps({"foo": "bar"}) del store[zarr_meta_key] assert n5_attrs_key not in store def test_chunk_nesting(self): store = self.create_store() - store['0.0'] = b'xxx' - assert '0.0' in store - assert b'xxx' == store['0.0'] + store["0.0"] = b"xxx" + assert "0.0" in store + assert b"xxx" == store["0.0"] # assert b'xxx' == store['0/0'] - store['foo/10.20.30'] = b'yyy' - assert 'foo/10.20.30' in store - assert b'yyy' == store['foo/10.20.30'] + store["foo/10.20.30"] = b"yyy" + assert "foo/10.20.30" in store + assert b"yyy" == store["foo/10.20.30"] # N5 reverses axis order - assert b'yyy' == store['foo/30/20/10'] - del store['foo/10.20.30'] - assert 'foo/30/20/10' not in store - store['42'] = b'zzz' - assert '42' in store - assert b'zzz' == store['42'] + assert b"yyy" == store["foo/30/20/10"] + del store["foo/10.20.30"] + assert "foo/30/20/10" not in store + store["42"] = b"zzz" + assert "42" in store + assert b"zzz" == store["42"] def test_init_array(self): store = self.create_store() @@ -1651,88 +1661,90 @@ def test_init_array(self): # check metadata assert array_meta_key in store meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] - assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] + assert ZARR_FORMAT == meta["zarr_format"] + assert (1000,) == meta["shape"] + assert (100,) == meta["chunks"] + assert np.dtype(None) == meta["dtype"] # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert default_compressor.get_config() == compressor_config # N5Store always has a fill value of 0 - assert meta['fill_value'] == 0 - assert meta['dimension_separator'] == '.' + assert meta["fill_value"] == 0 + assert meta["dimension_separator"] == "." # Top-level groups AND arrays should have # the n5 keyword in metadata raw_n5_meta = json.loads(store[n5_attrs_key]) - assert raw_n5_meta.get('n5', None) == N5_FORMAT + assert raw_n5_meta.get("n5", None) == N5_FORMAT def test_init_array_path(self): - path = 'foo/bar' + path = "foo/bar" store = self.create_store() init_array(store, shape=1000, chunks=100, path=path) # check metadata - key = path + '/' + array_meta_key + key = path + "/" + array_meta_key assert key in store meta = store._metadata_class.decode_array_metadata(store[key]) - assert ZARR_FORMAT == meta['zarr_format'] - assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] + assert ZARR_FORMAT == meta["zarr_format"] + assert (1000,) == meta["shape"] + assert (100,) == meta["chunks"] + assert np.dtype(None) == meta["dtype"] # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert default_compressor.get_config() == compressor_config # N5Store always has a fill value of 0 - assert meta['fill_value'] == 0 + assert meta["fill_value"] == 0 def test_init_array_compat(self): store = self.create_store() - init_array(store, shape=1000, chunks=100, compressor='none') + init_array(store, shape=1000, chunks=100, compressor="none") meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert compressor_config is None def test_init_array_overwrite(self): - self._test_init_array_overwrite('C') + self._test_init_array_overwrite("C") def test_init_array_overwrite_path(self): - self._test_init_array_overwrite_path('C') + self._test_init_array_overwrite_path("C") def test_init_array_overwrite_chunk_store(self): - self._test_init_array_overwrite_chunk_store('C') + self._test_init_array_overwrite_chunk_store("C") def test_init_group_overwrite(self): - self._test_init_group_overwrite('C') + self._test_init_group_overwrite("C") def test_init_group_overwrite_path(self): - self._test_init_group_overwrite_path('C') + self._test_init_group_overwrite_path("C") def test_init_group_overwrite_chunk_store(self): - self._test_init_group_overwrite_chunk_store('C') + self._test_init_group_overwrite_chunk_store("C") def test_dimension_separator(self): - with pytest.warns(UserWarning, match='dimension_separator'): - self.create_store(dimension_separator='/') + with pytest.warns(UserWarning, match="dimension_separator"): + self.create_store(dimension_separator="/") def test_init_group(self): store = self.create_store() init_group(store) - store['.zattrs'] = json_dumps({'foo': 'bar'}) + store[".zattrs"] = json_dumps({"foo": "bar"}) # check metadata assert group_meta_key in store assert group_meta_key in store.listdir() - assert group_meta_key in store.listdir('') + assert group_meta_key in store.listdir("") meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] + assert ZARR_FORMAT == meta["zarr_format"] def test_filters(self): - all_filters, all_errors = zip(*[ - (None, does_not_raise()), - ([], does_not_raise()), - ([AsType('f4', 'f8')], pytest.raises(ValueError)), - ]) + all_filters, all_errors = zip( + *[ + (None, does_not_raise()), + ([], does_not_raise()), + ([AsType("f4", "f8")], pytest.raises(ValueError)), + ] + ) for filters, error in zip(all_filters, all_errors): store = self.create_store() with error: @@ -1741,13 +1753,13 @@ def test_filters(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestNestedFSStore(TestNestedDirectoryStore): - def create_store(self, normalize_keys=False, path=None, **kwargs): if path is None: path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = FSStore(path, normalize_keys=normalize_keys, - dimension_separator='/', auto_mkdir=True, **kwargs) + store = FSStore( + path, normalize_keys=normalize_keys, dimension_separator="/", auto_mkdir=True, **kwargs + ) return store def test_numbered_groups(self): @@ -1756,7 +1768,7 @@ def test_numbered_groups(self): # Create an array store = self.create_store() group = zarr.group(store=store) - arr = group.create_dataset('0', shape=(10, 10)) + arr = group.create_dataset("0", shape=(10, 10)) arr[1] = 1 # Read it back @@ -1765,7 +1777,6 @@ def test_numbered_groups(self): class TestTempStore(StoreTests): - def create_store(self, **kwargs): skip_if_nested_chunks(**kwargs) return TempStore(**kwargs) @@ -1780,113 +1791,111 @@ class TestZipStore(StoreTests): ZipStoreClass = ZipStore def create_store(self, **kwargs): - path = mktemp(suffix='.zip') + path = mktemp(suffix=".zip") atexit.register(os.remove, path) - store = ZipStore(path, mode='w', **kwargs) + store = ZipStore(path, mode="w", **kwargs) return store def test_mode(self): - with self.ZipStoreClass('data/store.zip', mode='w') as store: - store[self.root + 'foo'] = b'bar' - store = self.ZipStoreClass('data/store.zip', mode='r') + with self.ZipStoreClass("data/store.zip", mode="w") as store: + store[self.root + "foo"] = b"bar" + store = self.ZipStoreClass("data/store.zip", mode="r") with pytest.raises(PermissionError): - store[self.root + 'foo'] = b'bar' + store[self.root + "foo"] = b"bar" with pytest.raises(PermissionError): store.clear() def test_flush(self): - store = self.ZipStoreClass('data/store.zip', mode='w') - store[self.root + 'foo'] = b'bar' + store = self.ZipStoreClass("data/store.zip", mode="w") + store[self.root + "foo"] = b"bar" store.flush() - assert store[self.root + 'foo'] == b'bar' + assert store[self.root + "foo"] == b"bar" store.close() - store = self.ZipStoreClass('data/store.zip', mode='r') + store = self.ZipStoreClass("data/store.zip", mode="r") store.flush() # no-op def test_context_manager(self): with self.create_store() as store: - store[self.root + 'foo'] = b'bar' - store[self.root + 'baz'] = b'qux' + store[self.root + "foo"] = b"bar" + store[self.root + "baz"] = b"qux" assert 2 == len(store) def test_pop(self): # override because not implemented store = self.create_store() - store[self.root + 'foo'] = b'bar' + store[self.root + "foo"] = b"bar" with pytest.raises(NotImplementedError): - store.pop(self.root + 'foo') + store.pop(self.root + "foo") def test_popitem(self): # override because not implemented store = self.create_store() - store[self.root + 'foo'] = b'bar' + store[self.root + "foo"] = b"bar" with pytest.raises(NotImplementedError): store.popitem() def test_permissions(self): - store = self.ZipStoreClass('data/store.zip', mode='w') - foo_key = 'foo' if self.version == 2 else self.root + 'foo' + store = self.ZipStoreClass("data/store.zip", mode="w") + foo_key = "foo" if self.version == 2 else self.root + "foo" # TODO: cannot provide key ending in / for v3 # how to create an empty folder in that case? - baz_key = 'baz/' if self.version == 2 else self.root + 'baz' - store[foo_key] = b'bar' - store[baz_key] = b'' + baz_key = "baz/" if self.version == 2 else self.root + "baz" + store[foo_key] = b"bar" + store[baz_key] = b"" store.flush() store.close() - z = ZipFile('data/store.zip', 'r') + z = ZipFile("data/store.zip", "r") info = z.getinfo(foo_key) perm = oct(info.external_attr >> 16) - assert perm == '0o644' + assert perm == "0o644" info = z.getinfo(baz_key) perm = oct(info.external_attr >> 16) # only for posix platforms - if os.name == 'posix': + if os.name == "posix": if self.version == 2: - assert perm == '0o40775' + assert perm == "0o40775" else: # baz/ on v2, but baz on v3, so not a directory - assert perm == '0o644' + assert perm == "0o644" z.close() def test_store_and_retrieve_ndarray(self): - store = ZipStore('data/store.zip') + store = ZipStore("data/store.zip") x = np.array([[1, 2], [3, 4]]) - store['foo'] = x - y = np.frombuffer(store['foo'], dtype=x.dtype).reshape(x.shape) + store["foo"] = x + y = np.frombuffer(store["foo"], dtype=x.dtype).reshape(x.shape) assert np.array_equiv(y, x) class TestDBMStore(StoreTests): - def create_store(self, dimension_separator=None): - path = mktemp(suffix='.anydbm') - atexit.register(atexit_rmglob, path + '*') + path = mktemp(suffix=".anydbm") + atexit.register(atexit_rmglob, path + "*") # create store using default dbm implementation - store = DBMStore(path, flag='n', dimension_separator=dimension_separator) + store = DBMStore(path, flag="n", dimension_separator=dimension_separator) return store def test_context_manager(self): with self.create_store() as store: - store[self.root + 'foo'] = b'bar' - store[self.root + 'baz'] = b'qux' + store[self.root + "foo"] = b"bar" + store[self.root + "baz"] = b"qux" assert 2 == len(store) class TestDBMStoreDumb(TestDBMStore): - def create_store(self, **kwargs): - path = mktemp(suffix='.dumbdbm') - atexit.register(atexit_rmglob, path + '*') + path = mktemp(suffix=".dumbdbm") + atexit.register(atexit_rmglob, path + "*") import dbm.dumb as dumbdbm - store = DBMStore(path, flag='n', open=dumbdbm.open, **kwargs) + + store = DBMStore(path, flag="n", open=dumbdbm.open, **kwargs) return store class TestDBMStoreGnu(TestDBMStore): - def create_store(self, **kwargs): gdbm = pytest.importorskip("dbm.gnu") path = mktemp(suffix=".gdbm") # pragma: no cover @@ -1898,7 +1907,6 @@ def create_store(self, **kwargs): class TestDBMStoreNDBM(TestDBMStore): - def create_store(self, **kwargs): ndbm = pytest.importorskip("dbm.ndbm") path = mktemp(suffix=".ndbm") # pragma: no cover @@ -1908,20 +1916,18 @@ def create_store(self, **kwargs): class TestDBMStoreBerkeleyDB(TestDBMStore): - def create_store(self, **kwargs): bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix='.dbm') + path = mktemp(suffix=".dbm") atexit.register(os.remove, path) - store = DBMStore(path, flag='n', open=bsddb3.btopen, write_lock=False, **kwargs) + store = DBMStore(path, flag="n", open=bsddb3.btopen, write_lock=False, **kwargs) return store class TestLMDBStore(StoreTests): - def create_store(self, **kwargs): pytest.importorskip("lmdb") - path = mktemp(suffix='.lmdb') + path = mktemp(suffix=".lmdb") atexit.register(atexit_rmtree, path) buffers = True store = LMDBStore(path, buffers=buffers, **kwargs) @@ -1929,43 +1935,41 @@ def create_store(self, **kwargs): def test_context_manager(self): with self.create_store() as store: - store[self.root + 'foo'] = b'bar' - store[self.root + 'baz'] = b'qux' + store[self.root + "foo"] = b"bar" + store[self.root + "baz"] = b"qux" assert 2 == len(store) class TestSQLiteStore(StoreTests): - def create_store(self, **kwargs): pytest.importorskip("sqlite3") - path = mktemp(suffix='.db') + path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStore(path, **kwargs) return store def test_underscore_in_name(self): - path = mktemp(suffix='.db') + path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStore(path) - store['a'] = b'aaa' - store['a_b'] = b'aa_bb' - store.rmdir('a') - assert 'a_b' in store + store["a"] = b"aaa" + store["a_b"] = b"aa_bb" + store.rmdir("a") + assert "a_b" in store class TestSQLiteStoreInMemory(TestSQLiteStore): - def create_store(self, **kwargs): pytest.importorskip("sqlite3") - store = SQLiteStore(':memory:', **kwargs) + store = SQLiteStore(":memory:", **kwargs) return store def test_pickle(self): # setup store store = self.create_store() - store[self.root + 'foo'] = b'bar' - store[self.root + 'baz'] = b'quux' + store[self.root + "foo"] = b"bar" + store[self.root + "baz"] = b"quux" # round-trip through pickle with pytest.raises(PicklingError): @@ -1974,11 +1978,11 @@ def test_pickle(self): @skip_test_env_var("ZARR_TEST_MONGO") class TestMongoDBStore(StoreTests): - def create_store(self, **kwargs): pytest.importorskip("pymongo") - store = MongoDBStore(host='127.0.0.1', database='zarr_tests', - collection='zarr_tests', **kwargs) + store = MongoDBStore( + host="127.0.0.1", database="zarr_tests", collection="zarr_tests", **kwargs + ) # start with an empty store store.clear() return store @@ -1986,12 +1990,11 @@ def create_store(self, **kwargs): @skip_test_env_var("ZARR_TEST_REDIS") class TestRedisStore(StoreTests): - def create_store(self, **kwargs): # TODO: this is the default host for Redis on Travis, # we probably want to generalize this though pytest.importorskip("redis") - store = RedisStore(host='localhost', port=6379, **kwargs) + store = RedisStore(host="localhost", port=6379, **kwargs) # start with an empty store store.clear() return store @@ -2011,14 +2014,14 @@ def test_cache_values_no_max_size(self): # setup store store = self.CountingClass() - foo_key = self.root + 'foo' - bar_key = self.root + 'bar' - store[foo_key] = b'xxx' - store[bar_key] = b'yyy' - assert 0 == store.counter['__getitem__', foo_key] - assert 1 == store.counter['__setitem__', foo_key] - assert 0 == store.counter['__getitem__', bar_key] - assert 1 == store.counter['__setitem__', bar_key] + foo_key = self.root + "foo" + bar_key = self.root + "bar" + store[foo_key] = b"xxx" + store[bar_key] = b"yyy" + assert 0 == store.counter["__getitem__", foo_key] + assert 1 == store.counter["__setitem__", foo_key] + assert 0 == store.counter["__getitem__", bar_key] + assert 1 == store.counter["__setitem__", bar_key] # setup cache cache = self.LRUStoreClass(store, max_size=None) @@ -2026,39 +2029,39 @@ def test_cache_values_no_max_size(self): assert 0 == cache.misses # test first __getitem__, cache miss - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] - assert 1 == store.counter['__setitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] + assert 1 == store.counter["__setitem__", foo_key] assert 0 == cache.hits assert 1 == cache.misses # test second __getitem__, cache hit - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] - assert 1 == store.counter['__setitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] + assert 1 == store.counter["__setitem__", foo_key] assert 1 == cache.hits assert 1 == cache.misses # test __setitem__, __getitem__ - cache[foo_key] = b'zzz' - assert 1 == store.counter['__getitem__', foo_key] - assert 2 == store.counter['__setitem__', foo_key] + cache[foo_key] = b"zzz" + assert 1 == store.counter["__getitem__", foo_key] + assert 2 == store.counter["__setitem__", foo_key] # should be a cache hit - assert b'zzz' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] - assert 2 == store.counter['__setitem__', foo_key] + assert b"zzz" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] + assert 2 == store.counter["__setitem__", foo_key] assert 2 == cache.hits assert 1 == cache.misses # manually invalidate all cached values cache.invalidate_values() - assert b'zzz' == cache[foo_key] - assert 2 == store.counter['__getitem__', foo_key] - assert 2 == store.counter['__setitem__', foo_key] + assert b"zzz" == cache[foo_key] + assert 2 == store.counter["__getitem__", foo_key] + assert 2 == store.counter["__setitem__", foo_key] cache.invalidate() - assert b'zzz' == cache[foo_key] - assert 3 == store.counter['__getitem__', foo_key] - assert 2 == store.counter['__setitem__', foo_key] + assert b"zzz" == cache[foo_key] + assert 3 == store.counter["__getitem__", foo_key] + assert 2 == store.counter["__setitem__", foo_key] # test __delitem__ del cache[foo_key] @@ -2070,104 +2073,104 @@ def test_cache_values_no_max_size(self): store[foo_key] # verify other keys untouched - assert 0 == store.counter['__getitem__', bar_key] - assert 1 == store.counter['__setitem__', bar_key] + assert 0 == store.counter["__getitem__", bar_key] + assert 1 == store.counter["__setitem__", bar_key] def test_cache_values_with_max_size(self): # setup store store = self.CountingClass() - foo_key = self.root + 'foo' - bar_key = self.root + 'bar' - store[foo_key] = b'xxx' - store[bar_key] = b'yyy' - assert 0 == store.counter['__getitem__', foo_key] - assert 0 == store.counter['__getitem__', bar_key] + foo_key = self.root + "foo" + bar_key = self.root + "bar" + store[foo_key] = b"xxx" + store[bar_key] = b"yyy" + assert 0 == store.counter["__getitem__", foo_key] + assert 0 == store.counter["__getitem__", bar_key] # setup cache - can only hold one item cache = self.LRUStoreClass(store, max_size=5) assert 0 == cache.hits assert 0 == cache.misses # test first 'foo' __getitem__, cache miss - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] assert 0 == cache.hits assert 1 == cache.misses # test second 'foo' __getitem__, cache hit - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] assert 1 == cache.hits assert 1 == cache.misses # test first 'bar' __getitem__, cache miss - assert b'yyy' == cache[bar_key] - assert 1 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 1 == store.counter["__getitem__", bar_key] assert 1 == cache.hits assert 2 == cache.misses # test second 'bar' __getitem__, cache hit - assert b'yyy' == cache[bar_key] - assert 1 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 1 == store.counter["__getitem__", bar_key] assert 2 == cache.hits assert 2 == cache.misses # test 'foo' __getitem__, should have been evicted, cache miss - assert b'xxx' == cache[foo_key] - assert 2 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 2 == store.counter["__getitem__", foo_key] assert 2 == cache.hits assert 3 == cache.misses # test 'bar' __getitem__, should have been evicted, cache miss - assert b'yyy' == cache[bar_key] - assert 2 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 2 == store.counter["__getitem__", bar_key] assert 2 == cache.hits assert 4 == cache.misses # setup store store = self.CountingClass() - store[foo_key] = b'xxx' - store[bar_key] = b'yyy' - assert 0 == store.counter['__getitem__', foo_key] - assert 0 == store.counter['__getitem__', bar_key] + store[foo_key] = b"xxx" + store[bar_key] = b"yyy" + assert 0 == store.counter["__getitem__", foo_key] + assert 0 == store.counter["__getitem__", bar_key] # setup cache - can hold two items cache = self.LRUStoreClass(store, max_size=6) assert 0 == cache.hits assert 0 == cache.misses # test first 'foo' __getitem__, cache miss - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] assert 0 == cache.hits assert 1 == cache.misses # test second 'foo' __getitem__, cache hit - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] assert 1 == cache.hits assert 1 == cache.misses # test first 'bar' __getitem__, cache miss - assert b'yyy' == cache[bar_key] - assert 1 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 1 == store.counter["__getitem__", bar_key] assert 1 == cache.hits assert 2 == cache.misses # test second 'bar' __getitem__, cache hit - assert b'yyy' == cache[bar_key] - assert 1 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 1 == store.counter["__getitem__", bar_key] assert 2 == cache.hits assert 2 == cache.misses # test 'foo' __getitem__, should still be cached - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] assert 3 == cache.hits assert 2 == cache.misses # test 'bar' __getitem__, should still be cached - assert b'yyy' == cache[bar_key] - assert 1 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 1 == store.counter["__getitem__", bar_key] assert 4 == cache.hits assert 2 == cache.misses @@ -2175,78 +2178,78 @@ def test_cache_keys(self): # setup store = self.CountingClass() - foo_key = self.root + 'foo' - bar_key = self.root + 'bar' - baz_key = self.root + 'baz' - store[foo_key] = b'xxx' - store[bar_key] = b'yyy' - assert 0 == store.counter['__contains__', foo_key] - assert 0 == store.counter['__iter__'] - assert 0 == store.counter['keys'] + foo_key = self.root + "foo" + bar_key = self.root + "bar" + baz_key = self.root + "baz" + store[foo_key] = b"xxx" + store[bar_key] = b"yyy" + assert 0 == store.counter["__contains__", foo_key] + assert 0 == store.counter["__iter__"] + assert 0 == store.counter["keys"] cache = self.LRUStoreClass(store, max_size=None) # keys should be cached on first call keys = sorted(cache.keys()) assert keys == [bar_key, foo_key] - assert 1 == store.counter['keys'] + assert 1 == store.counter["keys"] # keys should now be cached assert keys == sorted(cache.keys()) - assert 1 == store.counter['keys'] + assert 1 == store.counter["keys"] assert foo_key in cache - assert 0 == store.counter['__contains__', foo_key] + assert 0 == store.counter["__contains__", foo_key] assert keys == sorted(cache) - assert 0 == store.counter['__iter__'] - assert 1 == store.counter['keys'] + assert 0 == store.counter["__iter__"] + assert 1 == store.counter["keys"] # cache should be cleared if store is modified - crude but simple for now - cache[baz_key] = b'zzz' + cache[baz_key] = b"zzz" keys = sorted(cache.keys()) assert keys == [bar_key, baz_key, foo_key] - assert 2 == store.counter['keys'] + assert 2 == store.counter["keys"] # keys should now be cached assert keys == sorted(cache.keys()) - assert 2 == store.counter['keys'] + assert 2 == store.counter["keys"] # manually invalidate keys cache.invalidate_keys() keys = sorted(cache.keys()) assert keys == [bar_key, baz_key, foo_key] - assert 3 == store.counter['keys'] - assert 0 == store.counter['__contains__', foo_key] - assert 0 == store.counter['__iter__'] + assert 3 == store.counter["keys"] + assert 0 == store.counter["__contains__", foo_key] + assert 0 == store.counter["__iter__"] cache.invalidate_keys() keys = sorted(cache) assert keys == [bar_key, baz_key, foo_key] - assert 4 == store.counter['keys'] - assert 0 == store.counter['__contains__', foo_key] - assert 0 == store.counter['__iter__'] + assert 4 == store.counter["keys"] + assert 0 == store.counter["__contains__", foo_key] + assert 0 == store.counter["__iter__"] cache.invalidate_keys() assert foo_key in cache - assert 5 == store.counter['keys'] - assert 0 == store.counter['__contains__', foo_key] - assert 0 == store.counter['__iter__'] + assert 5 == store.counter["keys"] + assert 0 == store.counter["__contains__", foo_key] + assert 0 == store.counter["__iter__"] # check these would get counted if called directly assert foo_key in store - assert 1 == store.counter['__contains__', foo_key] + assert 1 == store.counter["__contains__", foo_key] assert keys == sorted(store) - assert 1 == store.counter['__iter__'] + assert 1 == store.counter["__iter__"] def test_getsize(): store = KVStore(dict()) - store['foo'] = b'aaa' - store['bar'] = b'bbbb' - store['baz/quux'] = b'ccccc' + store["foo"] = b"aaa" + store["bar"] = b"bbbb" + store["baz/quux"] = b"ccccc" assert 7 == getsize(store) - assert 5 == getsize(store, 'baz') + assert 5 == getsize(store, "baz") store = KVStore(dict()) - store['boo'] = None + store["boo"] = None assert -1 == getsize(store) -@pytest.mark.parametrize('dict_store', [False, True]) +@pytest.mark.parametrize("dict_store", [False, True]) def test_migrate_1to2(dict_store): from zarr import meta_v1 @@ -2258,64 +2261,63 @@ def test_migrate_1to2(dict_store): meta = dict( shape=(100,), chunks=(10,), - dtype=np.dtype('f4'), - compression='zlib', + dtype=np.dtype("f4"), + compression="zlib", compression_opts=1, fill_value=None, - order='C' + order="C", ) meta_json = meta_v1.encode_metadata(meta) - store['meta'] = meta_json - store['attrs'] = json.dumps(dict()).encode('ascii') + store["meta"] = meta_json + store["attrs"] = json.dumps(dict()).encode("ascii") # run migration migrate_1to2(store) # check results - assert 'meta' not in store + assert "meta" not in store assert array_meta_key in store - assert 'attrs' not in store + assert "attrs" not in store assert attrs_key in store meta_migrated = decode_array_metadata(store[array_meta_key]) - assert 2 == meta_migrated['zarr_format'] + assert 2 == meta_migrated["zarr_format"] # preserved fields - for f in 'shape', 'chunks', 'dtype', 'fill_value', 'order': + for f in "shape", "chunks", "dtype", "fill_value", "order": assert meta[f] == meta_migrated[f] # migrate should have added empty filters field - assert meta_migrated['filters'] is None + assert meta_migrated["filters"] is None # check compression and compression_opts migrated to compressor - assert 'compression' not in meta_migrated - assert 'compression_opts' not in meta_migrated - assert meta_migrated['compressor'] == Zlib(1).get_config() + assert "compression" not in meta_migrated + assert "compression_opts" not in meta_migrated + assert meta_migrated["compressor"] == Zlib(1).get_config() # check dict compression_opts store = dict() if dict_store else KVStore(dict()) - meta['compression'] = 'blosc' - meta['compression_opts'] = dict(cname='lz4', clevel=5, shuffle=1) + meta["compression"] = "blosc" + meta["compression_opts"] = dict(cname="lz4", clevel=5, shuffle=1) meta_json = meta_v1.encode_metadata(meta) - store['meta'] = meta_json - store['attrs'] = json.dumps(dict()).encode('ascii') + store["meta"] = meta_json + store["attrs"] = json.dumps(dict()).encode("ascii") migrate_1to2(store) meta_migrated = decode_array_metadata(store[array_meta_key]) - assert 'compression' not in meta_migrated - assert 'compression_opts' not in meta_migrated - assert (meta_migrated['compressor'] == - Blosc(cname='lz4', clevel=5, shuffle=1).get_config()) + assert "compression" not in meta_migrated + assert "compression_opts" not in meta_migrated + assert meta_migrated["compressor"] == Blosc(cname="lz4", clevel=5, shuffle=1).get_config() # check 'none' compression is migrated to None (null in JSON) store = dict() if dict_store else KVStore(dict()) - meta['compression'] = 'none' + meta["compression"] = "none" meta_json = meta_v1.encode_metadata(meta) - store['meta'] = meta_json - store['attrs'] = json.dumps(dict()).encode('ascii') + store["meta"] = meta_json + store["attrs"] = json.dumps(dict()).encode("ascii") migrate_1to2(store) meta_migrated = decode_array_metadata(store[array_meta_key]) - assert 'compression' not in meta_migrated - assert 'compression_opts' not in meta_migrated - assert meta_migrated['compressor'] is None + assert "compression" not in meta_migrated + assert "compression_opts" not in meta_migrated + assert meta_migrated["compressor"] is None def test_format_compatibility(): @@ -2324,71 +2326,75 @@ def test_format_compatibility(): # read data stored with a previous minor version (which should be format-compatible). # fixture data - fixture = group(store=DirectoryStore('fixture')) + fixture = group(store=DirectoryStore("fixture")) # set seed to get consistent random data np.random.seed(42) arrays_chunks = [ - (np.arange(1111, dtype=' 2 else '' + prefix = meta_root if self.version > 2 else "" # setup some values - store[prefix + 'a'] = b'aaa' - store[prefix + 'b'] = b'bbb' - store[prefix + 'c/d'] = b'ddd' - store[prefix + 'c/e/f'] = b'fff' + store[prefix + "a"] = b"aaa" + store[prefix + "b"] = b"bbb" + store[prefix + "c/d"] = b"ddd" + store[prefix + "c/e/f"] = b"fff" # test iterators on store with data assert 4 == len(store) - keys = [prefix + 'a', prefix + 'b', prefix + 'c/d', prefix + 'c/e/f'] - values = [b'aaa', b'bbb', b'ddd', b'fff'] + keys = [prefix + "a", prefix + "b", prefix + "c/d", prefix + "c/e/f"] + values = [b"aaa", b"bbb", b"ddd", b"fff"] items = list(zip(keys, values)) assert set(keys) == set(store) assert set(keys) == set(store.keys()) @@ -2483,7 +2489,7 @@ class TestConsolidatedMetadataStore: @property def metadata_key(self): - return '.zmetadata' + return ".zmetadata" def test_bad_format(self): @@ -2491,7 +2497,7 @@ def test_bad_format(self): store = dict() consolidated = { # bad format version - 'zarr_consolidated_format': 0, + "zarr_consolidated_format": 0, } store[self.metadata_key] = json.dumps(consolidated).encode() @@ -2508,11 +2514,11 @@ def test_read_write(self): # setup store with consolidated metadata store = dict() consolidated = { - 'zarr_consolidated_format': 1, - 'metadata': { - 'foo': 'bar', - 'baz': 42, - } + "zarr_consolidated_format": 1, + "metadata": { + "foo": "bar", + "baz": 42, + }, } store[self.metadata_key] = json.dumps(consolidated).encode() @@ -2520,15 +2526,15 @@ def test_read_write(self): cs = self.ConsolidatedMetadataClass(store) # test __contains__, __getitem__ - for key, value in consolidated['metadata'].items(): + for key, value in consolidated["metadata"].items(): assert key in cs assert value == cs[key] # test __delitem__, __setitem__ with pytest.raises(PermissionError): - del cs['foo'] + del cs["foo"] with pytest.raises(PermissionError): - cs['bar'] = 0 + cs["bar"] = 0 with pytest.raises(PermissionError): cs["spam"] = "eggs" @@ -2558,16 +2564,16 @@ def test_normalize_store_arg(tmpdir): with pytest.raises(ValueError): normalize_store_arg(dict(), zarr_version=4) - for ext, Class in [('.zip', ZipStore), ('.n5', N5Store)]: - fn = tmpdir.join('store' + ext) - store = normalize_store_arg(str(fn), zarr_version=2, mode='w') + for ext, Class in [(".zip", ZipStore), (".n5", N5Store)]: + fn = tmpdir.join("store" + ext) + store = normalize_store_arg(str(fn), zarr_version=2, mode="w") assert isinstance(store, Class) if have_fsspec: import fsspec path = tempfile.mkdtemp() - store = normalize_store_arg("file://" + path, zarr_version=2, mode='w') + store = normalize_store_arg("file://" + path, zarr_version=2, mode="w") assert isinstance(store, FSStore) store = normalize_store_arg(fsspec.get_mapper("file://" + path)) @@ -2578,7 +2584,7 @@ def test_meta_prefix_6853(): fixture = pathlib.Path(zarr.__file__).resolve().parent.parent / "fixture" meta = fixture / "meta" - if not meta.exists(): # pragma: no cover + if not meta.exists(): # pragma: no cover s = DirectoryStore(str(meta), dimension_separator=".") a = zarr.open(store=s, mode="w", shape=(2, 2), dtype="' == actual[-8:] + assert "" == actual[-8:] def test_tree_get_icon(): @@ -184,15 +198,13 @@ def test_tree_widget_missing_ipytree(): "to get the required ipytree dependency for displaying the tree " "widget. If using jupyterlab<3, you also need to run " "`jupyter labextension install ipytree`" - ) + ) with pytest.raises(ImportError, match=re.escape(pattern)): tree_widget(None, None, None) def test_retry_call(): - class Fixture: - def __init__(self, pass_on=1): self.c = 0 self.pass_on = pass_on @@ -217,9 +229,27 @@ def fail(x): def test_flatten(): - assert list(flatten(['0', ['1', ['2', ['3', [4, ]]]]])) == ['0', '1', '2', '3', 4] - assert list(flatten('foo')) == ['f', 'o', 'o'] - assert list(flatten(['foo'])) == ['foo'] + assert list( + flatten( + [ + "0", + [ + "1", + [ + "2", + [ + "3", + [ + 4, + ], + ], + ], + ], + ] + ) + ) == ["0", "1", "2", "3", 4] + assert list(flatten("foo")) == ["f", "o", "o"] + assert list(flatten(["foo"])) == ["foo"] def test_all_equal(): @@ -232,11 +262,11 @@ def test_all_equal(): assert all_equal(np.nan, np.array([np.nan, np.nan])) assert not all_equal(np.nan, np.array([np.nan, 1.0])) - assert all_equal({'a': -1}, np.array([{'a': -1}, {'a': -1}], dtype='object')) - assert not all_equal({'a': -1}, np.array([{'a': -1}, {'a': 2}], dtype='object')) + assert all_equal({"a": -1}, np.array([{"a": -1}, {"a": -1}], dtype="object")) + assert not all_equal({"a": -1}, np.array([{"a": -1}, {"a": 2}], dtype="object")) - assert all_equal(np.timedelta64(999, 'D'), np.array([999, 999], dtype='timedelta64[D]')) - assert not all_equal(np.timedelta64(999, 'D'), np.array([999, 998], dtype='timedelta64[D]')) + assert all_equal(np.timedelta64(999, "D"), np.array([999, 999], dtype="timedelta64[D]")) + assert not all_equal(np.timedelta64(999, "D"), np.array([999, 998], dtype="timedelta64[D]")) # all_equal(None, *) always returns False assert not all_equal(None, np.array([None, None])) diff --git a/zarr/tests/util.py b/zarr/tests/util.py index 19ac8c0bfa..b4f00f703d 100644 --- a/zarr/tests/util.py +++ b/zarr/tests/util.py @@ -11,44 +11,43 @@ class CountingDict(Store): - def __init__(self): self.wrapped = dict() self.counter = collections.Counter() def __len__(self): - self.counter['__len__'] += 1 + self.counter["__len__"] += 1 return len(self.wrapped) def keys(self): - self.counter['keys'] += 1 + self.counter["keys"] += 1 return self.wrapped.keys() def __iter__(self): - self.counter['__iter__'] += 1 + self.counter["__iter__"] += 1 return iter(self.wrapped) def __contains__(self, item): - self.counter['__contains__', item] += 1 + self.counter["__contains__", item] += 1 return item in self.wrapped def __getitem__(self, item): - self.counter['__getitem__', item] += 1 + self.counter["__getitem__", item] += 1 return self.wrapped[item] def __setitem__(self, key, value): - self.counter['__setitem__', key] += 1 + self.counter["__setitem__", key] += 1 self.wrapped[key] = value def __delitem__(self, key): - self.counter['__delitem__', key] += 1 + self.counter["__delitem__", key] += 1 del self.wrapped[key] def getitems( self, keys: Sequence[str], *, contexts: Mapping[str, Context] ) -> Mapping[str, Any]: for key in keys: - self.counter['__getitem__', key] += 1 + self.counter["__getitem__", key] += 1 return {k: self.wrapped[k] for k in keys if k in self.wrapped} @@ -57,10 +56,9 @@ class CountingDictV3(CountingDict, StoreV3): def skip_test_env_var(name): - """ Checks for environment variables indicating whether tests requiring services should be run - """ - value = os.environ.get(name, '0') - return pytest.mark.skipif(value == '0', reason='Tests not enabled via environment variable') + """Checks for environment variables indicating whether tests requiring services should be run""" + value = os.environ.get(name, "0") + return pytest.mark.skipif(value == "0", reason="Tests not enabled via environment variable") try: diff --git a/zarr/util.py b/zarr/util.py index 6ba20b96c2..b8b090ea70 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -5,18 +5,7 @@ from textwrap import TextWrapper import mmap import time -from typing import ( - Any, - Callable, - Dict, - Iterator, - Mapping, - Optional, - Tuple, - TypeVar, - Union, - Iterable -) +from typing import Any, Callable, Dict, Iterator, Mapping, Optional, Tuple, TypeVar, Union, Iterable import numpy as np from asciitree import BoxStyle, LeftAligned @@ -25,14 +14,14 @@ ensure_text, ensure_ndarray_like, ensure_bytes, - ensure_contiguous_ndarray_like + ensure_contiguous_ndarray_like, ) from numcodecs.ndarray_like import NDArrayLike from numcodecs.registry import codec_registry from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo -KeyType = TypeVar('KeyType') -ValueType = TypeVar('ValueType') +KeyType = TypeVar("KeyType") +ValueType = TypeVar("ValueType") def flatten(arg: Iterable) -> Iterable: @@ -45,14 +34,13 @@ def flatten(arg: Iterable) -> Iterable: # codecs to use for object dtype convenience API object_codecs = { - str.__name__: 'vlen-utf8', - bytes.__name__: 'vlen-bytes', - 'array': 'vlen-array', + str.__name__: "vlen-utf8", + bytes.__name__: "vlen-bytes", + "array": "vlen-array", } class NumberEncoder(json.JSONEncoder): - def default(self, o): # See json.JSONEncoder.default docstring for explanation # This is necessary to encode numpy dtype @@ -65,20 +53,21 @@ def default(self, o): def json_dumps(o: Any) -> bytes: """Write JSON in a consistent, human-readable way.""" - return json.dumps(o, indent=4, sort_keys=True, ensure_ascii=True, - separators=(',', ': '), cls=NumberEncoder).encode('ascii') + return json.dumps( + o, indent=4, sort_keys=True, ensure_ascii=True, separators=(",", ": "), cls=NumberEncoder + ).encode("ascii") def json_loads(s: Union[bytes, str]) -> Dict[str, Any]: """Read JSON in a consistent way.""" - return json.loads(ensure_text(s, 'utf-8')) + return json.loads(ensure_text(s, "utf-8")) def normalize_shape(shape) -> Tuple[int]: """Convenience function to normalize the `shape` argument.""" if shape is None: - raise TypeError('shape is None') + raise TypeError("shape is None") # handle 1D convenience form if isinstance(shape, numbers.Integral): @@ -91,9 +80,9 @@ def normalize_shape(shape) -> Tuple[int]: # code to guess chunk shape, adapted from h5py -CHUNK_BASE = 256*1024 # Multiplier by which chunks are adjusted -CHUNK_MIN = 128*1024 # Soft lower limit (128k) -CHUNK_MAX = 64*1024*1024 # Hard upper limit +CHUNK_BASE = 256 * 1024 # Multiplier by which chunks are adjusted +CHUNK_MIN = 128 * 1024 # Soft lower limit (128k) +CHUNK_MAX = 64 * 1024 * 1024 # Hard upper limit def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: @@ -107,12 +96,12 @@ def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: ndims = len(shape) # require chunks to have non-zero length for all dimensions - chunks = np.maximum(np.array(shape, dtype='=f8'), 1) + chunks = np.maximum(np.array(shape, dtype="=f8"), 1) # Determine the optimal chunk size in bytes using a PyTables expression. # This is kept as a float. - dset_size = np.prod(chunks)*typesize - target_size = CHUNK_BASE * (2**np.log10(dset_size/(1024.*1024))) + dset_size = np.prod(chunks) * typesize + target_size = CHUNK_BASE * (2 ** np.log10(dset_size / (1024.0 * 1024))) if target_size > CHUNK_MAX: target_size = CHUNK_MAX @@ -126,11 +115,11 @@ def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: # 1b. We're within 50% of the target chunk size, AND # 2. The chunk is smaller than the maximum chunk size - chunk_bytes = np.prod(chunks)*typesize + chunk_bytes = np.prod(chunks) * typesize - if (chunk_bytes < target_size or - abs(chunk_bytes-target_size)/target_size < 0.5) and \ - chunk_bytes < CHUNK_MAX: + if ( + chunk_bytes < target_size or abs(chunk_bytes - target_size) / target_size < 0.5 + ) and chunk_bytes < CHUNK_MAX: break if np.prod(chunks) == 1: @@ -142,9 +131,7 @@ def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: return tuple(int(x) for x in chunks) -def normalize_chunks( - chunks: Any, shape: Tuple[int, ...], typesize: int -) -> Tuple[int, ...]: +def normalize_chunks(chunks: Any, shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: """Convenience function to normalize the `chunks` argument for an array with the given `shape`.""" @@ -164,17 +151,16 @@ def normalize_chunks( # handle bad dimensionality if len(chunks) > len(shape): - raise ValueError('too many dimensions in chunks') + raise ValueError("too many dimensions in chunks") # handle underspecified chunks if len(chunks) < len(shape): # assume chunks across remaining dimensions - chunks += shape[len(chunks):] + chunks += shape[len(chunks) :] # handle None or -1 in chunks if -1 in chunks or None in chunks: - chunks = tuple(s if c == -1 or c is None else int(c) - for s, c in zip(shape, chunks)) + chunks = tuple(s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks)) return tuple(chunks) @@ -186,30 +172,34 @@ def normalize_dtype(dtype: Union[str, np.dtype], object_codec) -> Tuple[np.dtype dtype = dtype.__name__ # type: ignore if isinstance(dtype, str): # allow ':' to delimit class from codec arguments - tokens = dtype.split(':') + tokens = dtype.split(":") key = tokens[0] if key in object_codecs: dtype = np.dtype(object) if object_codec is None: codec_id = object_codecs[key] if len(tokens) > 1: - args = tokens[1].split(',') + args = tokens[1].split(",") else: args = [] try: object_codec = codec_registry[codec_id](*args) except KeyError: # pragma: no cover - raise ValueError('codec %r for object type %r is not ' - 'available; please provide an ' - 'object_codec manually' % (codec_id, key)) + raise ValueError( + "codec %r for object type %r is not " + "available; please provide an " + "object_codec manually" % (codec_id, key) + ) return dtype, object_codec dtype = np.dtype(dtype) # don't allow generic datetime64 or timedelta64, require units to be specified - if dtype == np.dtype('M8') or dtype == np.dtype('m8'): - raise ValueError('datetime64 and timedelta64 dtypes with generic units ' - 'are not supported, please specify units (e.g., "M8[ns]")') + if dtype == np.dtype("M8") or dtype == np.dtype("m8"): + raise ValueError( + "datetime64 and timedelta64 dtypes with generic units " + 'are not supported, please specify units (e.g., "M8[ns]")' + ) return dtype, object_codec @@ -227,16 +217,17 @@ def is_total_slice(item, shape: Tuple[int]) -> bool: if item == slice(None): return True if isinstance(item, slice): - item = item, + item = (item,) if isinstance(item, tuple): return all( - (isinstance(s, slice) and - ((s == slice(None)) or - ((s.stop - s.start == l) and (s.step in [1, None])))) - for s, l in zip(item, shape) + ( + isinstance(it, slice) + and ((it == slice(None)) or ((it.stop - it.start == sh) and (it.step in [1, None]))) + ) + for it, sh in zip(item, shape) ) else: - raise TypeError('expected slice or tuple of slices, found %r' % item) + raise TypeError("expected slice or tuple of slices, found %r" % item) def normalize_resize_args(old_shape, *args): @@ -251,33 +242,32 @@ def normalize_resize_args(old_shape, *args): else: new_shape = tuple(new_shape) if len(new_shape) != len(old_shape): - raise ValueError('new shape must have same number of dimensions') + raise ValueError("new shape must have same number of dimensions") # handle None in new_shape - new_shape = tuple(s if n is None else int(n) - for s, n in zip(old_shape, new_shape)) + new_shape = tuple(s if n is None else int(n) for s, n in zip(old_shape, new_shape)) return new_shape def human_readable_size(size) -> str: if size < 2**10: - return '%s' % size + return "%s" % size elif size < 2**20: - return '%.1fK' % (size / float(2**10)) + return "%.1fK" % (size / float(2**10)) elif size < 2**30: - return '%.1fM' % (size / float(2**20)) + return "%.1fM" % (size / float(2**20)) elif size < 2**40: - return '%.1fG' % (size / float(2**30)) + return "%.1fG" % (size / float(2**30)) elif size < 2**50: - return '%.1fT' % (size / float(2**40)) + return "%.1fT" % (size / float(2**40)) else: - return '%.1fP' % (size / float(2**50)) + return "%.1fP" % (size / float(2**50)) def normalize_order(order: str) -> str: order = str(order).upper() - if order not in ['C', 'F']: + if order not in ["C", "F"]: raise ValueError("order must be either 'C' or 'F', found: %r" % order) return order @@ -286,8 +276,7 @@ def normalize_dimension_separator(sep: Optional[str]) -> Optional[str]: if sep in (".", "/", None): return sep else: - raise ValueError( - "dimension_separator must be either '.' or '/', found: %r" % sep) + raise ValueError("dimension_separator must be either '.' or '/', found: %r" % sep) def normalize_fill_value(fill_value, dtype: np.dtype): @@ -300,17 +289,19 @@ def normalize_fill_value(fill_value, dtype: np.dtype): # structured arrays fill_value = np.zeros((), dtype=dtype)[()] - elif dtype.kind == 'U': + elif dtype.kind == "U": # special case unicode because of encoding issues on Windows if passed through numpy # https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713 if not isinstance(fill_value, str): - raise ValueError('fill_value {!r} is not valid for dtype {}; must be a ' - 'unicode string'.format(fill_value, dtype)) + raise ValueError( + "fill_value {!r} is not valid for dtype {}; must be a " + "unicode string".format(fill_value, dtype) + ) else: try: - if isinstance(fill_value, bytes) and dtype.kind == 'V': + if isinstance(fill_value, bytes) and dtype.kind == "V": # special case for numpy 1.14 compatibility fill_value = np.array(fill_value, dtype=dtype.str).view(dtype)[()] else: @@ -318,8 +309,10 @@ def normalize_fill_value(fill_value, dtype: np.dtype): except Exception as e: # re-raise with our own error message to be helpful - raise ValueError('fill_value {!r} is not valid for dtype {}; nested ' - 'exception: {}'.format(fill_value, dtype, e)) + raise ValueError( + "fill_value {!r} is not valid for dtype {}; nested " + "exception: {}".format(fill_value, dtype, e) + ) return fill_value @@ -328,7 +321,7 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: # handle bytes if isinstance(path, bytes): - path = str(path, 'ascii') + path = str(path, "ascii") # ensure str if path is not None and not isinstance(path, str): @@ -337,21 +330,21 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: if path: # convert backslash to forward slash - path = path.replace('\\', '/') + path = path.replace("\\", "/") # ensure no leading slash - while len(path) > 0 and path[0] == '/': + while len(path) > 0 and path[0] == "/": path = path[1:] # ensure no trailing slash - while len(path) > 0 and path[-1] == '/': + while len(path) > 0 and path[-1] == "/": path = path[:-1] # collapse any repeated slashes previous_char = None - collapsed = '' + collapsed = "" for char in path: - if char == '/' and previous_char == '/': + if char == "/" and previous_char == "/": pass else: collapsed += char @@ -359,12 +352,12 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: path = collapsed # don't allow path segments with just '.' or '..' - segments = path.split('/') - if any(s in {'.', '..'} for s in segments): + segments = path.split("/") + if any(s in {".", ".."} for s in segments): raise ValueError("path containing '.' or '..' segment not allowed") else: - path = '' + path = "" return path @@ -376,32 +369,34 @@ def buffer_size(v) -> int: def info_text_report(items: Dict[Any, Any]) -> str: keys = [k for k, v in items] max_key_len = max(len(k) for k in keys) - report = '' + report = "" for k, v in items: - wrapper = TextWrapper(width=80, - initial_indent=k.ljust(max_key_len) + ' : ', - subsequent_indent=' '*max_key_len + ' : ') + wrapper = TextWrapper( + width=80, + initial_indent=k.ljust(max_key_len) + " : ", + subsequent_indent=" " * max_key_len + " : ", + ) text = wrapper.fill(str(v)) - report += text + '\n' + report += text + "\n" return report def info_html_report(items) -> str: report = '' - report += '' + report += "" for k, v in items: - report += '' \ - '' \ - '' \ - '' \ - % (k, v) - report += '' - report += '
%s%s
' + report += ( + "" + '%s' + '%s' + "" % (k, v) + ) + report += "" + report += "" return report class InfoReporter: - def __init__(self, obj): self.obj = obj @@ -415,24 +410,22 @@ def _repr_html_(self): class TreeNode: - def __init__(self, obj, depth=0, level=None): self.obj = obj self.depth = depth self.level = level def get_children(self): - if hasattr(self.obj, 'values'): + if hasattr(self.obj, "values"): if self.level is None or self.depth < self.level: depth = self.depth + 1 - return [TreeNode(o, depth=depth, level=self.level) - for o in self.obj.values()] + return [TreeNode(o, depth=depth, level=self.level) for o in self.obj.values()] return [] def get_text(self): name = self.obj.name.split("/")[-1] or "/" - if hasattr(self.obj, 'shape'): - name += ' {} {}'.format(self.obj.shape, self.obj.dtype) + if hasattr(self.obj, "shape"): + name += " {} {}".format(self.obj.shape, self.obj.dtype) return name def get_type(self): @@ -440,7 +433,6 @@ def get_type(self): class TreeTraversal(Traversal): - def get_children(self, node): return node.get_children() @@ -451,8 +443,8 @@ def get_text(self, node): return node.get_text() -tree_group_icon = 'folder' -tree_array_icon = 'table' +tree_group_icon = "folder" +tree_array_icon = "table" def tree_get_icon(stype: str) -> str: @@ -499,37 +491,28 @@ def tree_widget(group, expand, level): class TreeViewer: - def __init__(self, group, expand=False, level=None): self.group = group self.expand = expand self.level = level - self.text_kwargs = dict( - horiz_len=2, - label_space=1, - indent=1 - ) + self.text_kwargs = dict(horiz_len=2, label_space=1, indent=1) self.bytes_kwargs = dict( - UP_AND_RIGHT="+", - HORIZONTAL="-", - VERTICAL="|", - VERTICAL_AND_RIGHT="+" + UP_AND_RIGHT="+", HORIZONTAL="-", VERTICAL="|", VERTICAL_AND_RIGHT="+" ) self.unicode_kwargs = dict( UP_AND_RIGHT="\u2514", HORIZONTAL="\u2500", VERTICAL="\u2502", - VERTICAL_AND_RIGHT="\u251C" + VERTICAL_AND_RIGHT="\u251C", ) def __bytes__(self): drawer = LeftAligned( - traverse=TreeTraversal(), - draw=BoxStyle(gfx=self.bytes_kwargs, **self.text_kwargs) + traverse=TreeTraversal(), draw=BoxStyle(gfx=self.bytes_kwargs, **self.text_kwargs) ) root = TreeNode(self.group, level=self.level) result = drawer(root) @@ -542,8 +525,7 @@ def __bytes__(self): def __unicode__(self): drawer = LeftAligned( - traverse=TreeTraversal(), - draw=BoxStyle(gfx=self.unicode_kwargs, **self.text_kwargs) + traverse=TreeTraversal(), draw=BoxStyle(gfx=self.unicode_kwargs, **self.text_kwargs) ) root = TreeNode(self.group, level=self.level) return drawer(root) @@ -557,16 +539,21 @@ def _repr_mimebundle_(self, **kwargs): def check_array_shape(param, array, shape): - if not hasattr(array, 'shape'): - raise TypeError('parameter {!r}: expected an array-like object, got {!r}' - .format(param, type(array))) + if not hasattr(array, "shape"): + raise TypeError( + "parameter {!r}: expected an array-like object, got {!r}".format(param, type(array)) + ) if array.shape != shape: - raise ValueError('parameter {!r}: expected array with shape {!r}, got {!r}' - .format(param, shape, array.shape)) + raise ValueError( + "parameter {!r}: expected array with shape {!r}, got {!r}".format( + param, shape, array.shape + ) + ) def is_valid_python_name(name): from keyword import iskeyword + return name.isidentifier() and not iskeyword(name) @@ -599,9 +586,9 @@ def __init__(self, store_key, chunk_store): self.read_blocks = set() _key_path = self.map._key_to_str(store_key) - _key_path = _key_path.split('/') + _key_path = _key_path.split("/") _chunk_path = [self.chunk_store._normalize_key(_key_path[-1])] - _key_path = '/'.join(_key_path[:-1] + _chunk_path) + _key_path = "/".join(_key_path[:-1] + _chunk_path) self.key_path = _key_path def prepare_chunk(self): @@ -613,21 +600,15 @@ def prepare_chunk(self): self.buff[0:16] = header self.nblocks = nbytes / blocksize self.nblocks = ( - int(self.nblocks) - if self.nblocks == int(self.nblocks) - else int(self.nblocks + 1) + int(self.nblocks) if self.nblocks == int(self.nblocks) else int(self.nblocks + 1) ) if self.nblocks == 1: self.buff = self.read_full() return - start_points_buffer = self.fs.read_block( - self.key_path, 16, int(self.nblocks * 4) - ) - self.start_points = np.frombuffer( - start_points_buffer, count=self.nblocks, dtype=np.int32 - ) + start_points_buffer = self.fs.read_block(self.key_path, 16, int(self.nblocks * 4)) + self.start_points = np.frombuffer(start_points_buffer, count=self.nblocks, dtype=np.int32) self.start_points_max = self.start_points.max() - self.buff[16: (16 + (self.nblocks * 4))] = start_points_buffer + self.buff[16 : (16 + (self.nblocks * 4))] = start_points_buffer self.n_per_block = blocksize / typesize def read_part(self, start, nitems): @@ -676,12 +657,14 @@ def read_full(self): return self.chunk_store[self.store_key] -def retry_call(callabl: Callable, - args=None, - kwargs=None, - exceptions: Tuple[Any, ...] = (), - retries: int = 10, - wait: float = 0.1) -> Any: +def retry_call( + callabl: Callable, + args=None, + kwargs=None, + exceptions: Tuple[Any, ...] = (), + retries: int = 10, + wait: float = 0.1, +) -> Any: """ Make several attempts to invoke the callable. If one of the given exceptions is raised, wait the given period of time and retry up to the given number of @@ -693,7 +676,7 @@ def retry_call(callabl: Callable, if kwargs is None: kwargs = {} - for attempt in range(1, retries+1): + for attempt in range(1, retries + 1): try: return callabl(*args, **kwargs) except exceptions: From b79f1e2959a7cb2a2d5a0d3de77aa06ef29d026e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 12 Jul 2023 22:26:47 -0400 Subject: [PATCH 111/213] chore: add linting commit to list of revs ignored for git blame --- .git-blame-ignore-revs | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000000..05fe3dbd8c --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# lint codebase with black and ruff +4e348d6b80c96da461fd866576c971b8a659ba15 \ No newline at end of file From b0c50e1c284c6ce693bd306b77d0a5f00df2d0e8 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 12 Jul 2023 22:49:44 -0400 Subject: [PATCH 112/213] docs: update release notes --- docs/release.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 46bd1f025d..5bdae7465d 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -30,6 +30,12 @@ Enhancements * **Block Indexing**: Implemented blockwise (chunk blocks) indexing to ``zarr.Array``. By :user:`Altay Sansal ` :issue:`1428` +Maintenance +~~~~~~~~~~~ + +* Style the codebase with ``ruff`` and ``black``. + By :user:`Davis Bennett` :issue:`1459` + .. _release_2.15.0: 2.15.0 From 1558041db6c829654f3af738f16e41f0f0bfeac1 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 14 Jul 2023 16:15:23 -0400 Subject: [PATCH 113/213] Refactor core tests, round 2 (#1462) * chore: refactor tests to inherit create_array from a base class * chore: widen type of shape to variable length tuple * chore: add importskip barrier for lmdb * docs: release notes * chore: remove fsstore kwargs variable * chore: fix type error in creation of fsstore * chore: add `create_filters` method to TestArray. Pop out `compressor` kwarg in create_array. --- docs/release.rst | 6 + zarr/storage.py | 2 +- zarr/tests/test_core.py | 859 +++++++++++----------------------------- zarr/util.py | 6 +- 4 files changed, 249 insertions(+), 624 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 46bd1f025d..269305c2a4 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -30,6 +30,12 @@ Enhancements * **Block Indexing**: Implemented blockwise (chunk blocks) indexing to ``zarr.Array``. By :user:`Altay Sansal ` :issue:`1428` +Maintenance +~~~~~~~~~~~ + +* Refactor the core array tests to reduce code duplication. + By :user:`Davis Bennett ` :issue:`1462`. + .. _release_2.15.0: 2.15.0 diff --git a/zarr/storage.py b/zarr/storage.py index ef1bd64955..c91f2f1cf0 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -300,7 +300,7 @@ def _require_parent_group( def init_array( store: StoreLike, - shape: Tuple[int, ...], + shape: Union[int, Tuple[int, ...]], chunks: Union[bool, int, Tuple[int, ...]] = True, dtype=None, compressor="default", diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index ab1a6e8aa7..8bf8789f56 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -3,10 +3,10 @@ import sys import pickle import shutil +from typing import Any, Literal, Optional, Tuple, Union import unittest from itertools import zip_longest from tempfile import mkdtemp - import numpy as np import packaging.version import pytest @@ -19,6 +19,7 @@ import zarr from zarr._storage.store import ( + BaseStore, v3_api_available, ) from .._storage.v3_storage_transformers import ShardingStorageTransformer, v3_sharding_available @@ -42,6 +43,7 @@ init_array, init_group, meta_root, + normalize_store_arg ) from zarr._storage.v3 import ( ABSStoreV3, @@ -62,16 +64,64 @@ # noinspection PyMethodMayBeStatic -class TestArray(unittest.TestCase): - +class TestArray(): version = 2 root = '' - KVStoreClass = KVStore + path = '' + compressor = Zlib(level=1) + filters = None + dimension_separator: Literal["/", ".", None] = None + cache_metadata = True + cache_attrs = True + partial_decompress: bool = False + write_empty_chunks = True + read_only = False + storage_transformers: Tuple[Any, ...] = () + + def create_store(self) -> BaseStore: + return KVStore(dict()) + + # used by child classes + def create_chunk_store(self) -> Optional[BaseStore]: + return None + + def create_storage_transformers(self, shape: Union[int, Tuple[int, ...]]) -> Tuple[Any, ...]: + return () + + def create_filters(self, dtype: Optional[str]) -> Tuple[Any, ...]: + return () + + def create_array(self, shape: Union[int, Tuple[int, ...]], **kwargs): + store = self.create_store() + chunk_store = self.create_chunk_store() + # keyword arguments for array initialization + init_array_kwargs = { + "path": kwargs.pop("path", self.path), + "compressor": kwargs.pop("compressor", self.compressor), + "chunk_store": chunk_store, + "storage_transformers": self.create_storage_transformers(shape), + "filters": kwargs.pop("filters", self.create_filters(kwargs.get("dtype", None))) + } + + # keyword arguments for array instantiation + access_array_kwargs = { + "path": init_array_kwargs["path"], + "read_only": kwargs.pop("read_only", self.read_only), + "chunk_store": chunk_store, + "cache_metadata": kwargs.pop("cache_metadata", self.cache_metadata), + "cache_attrs": kwargs.pop("cache_attrs", self.cache_attrs), + "partial_decompress": kwargs.pop("partial_decompress", self.partial_decompress), + "write_empty_chunks": kwargs.pop("write_empty_chunks", self.write_empty_chunks), + } + + init_array(store, shape, **{**init_array_kwargs, **kwargs}) + + return Array(store, **access_array_kwargs) def test_array_init(self): # normal initialization - store = self.KVStoreClass(dict()) + store = self.create_store() init_array(store, shape=100, chunks=10, dtype=" Tuple[Any, ...]: + return ( Delta(dtype=dtype), FixedScaleOffset(dtype=dtype, scale=1, offset=0), - ] - kwargs.setdefault('filters', filters) - compressor = Zlib(1) - kwargs.setdefault('compressor', compressor) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_attrs=cache_attrs, - cache_metadata=cache_metadata, write_empty_chunks=write_empty_chunks) + ) def expected(self): return [ @@ -2407,19 +2296,14 @@ def __delitem__(self, key): def __contains__(self, item): return item in self.inner + def close(self): + return self.inner.close() + class TestArrayWithCustomMapping(TestArray): - @staticmethod - def create_array(read_only=False, **kwargs): - store = CustomMapping() - kwargs.setdefault('compressor', Zlib(1)) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + def create_store(self): + return CustomMapping() def test_nbytes_stored(self): z = self.create_array(shape=1000, chunks=100) @@ -2429,18 +2313,6 @@ def test_nbytes_stored(self): class TestArrayNoCache(TestArray): - - @staticmethod - def create_array(read_only=False, **kwargs): - store = KVStore(dict()) - kwargs.setdefault('compressor', Zlib(level=1)) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) - def test_cache_metadata(self): a1 = self.create_array(shape=100, chunks=10, dtype='i1', cache_metadata=False) path = None if self.version == 2 else a1.path @@ -2504,45 +2376,30 @@ def test_object_arrays_danger(self): class TestArrayWithStoreCache(TestArray): - - @staticmethod - def create_array(read_only=False, **kwargs): - store = LRUStoreCache(dict(), max_size=None) - kwargs.setdefault('compressor', Zlib(level=1)) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + def create_store(self): + return LRUStoreCache(dict(), max_size=None) def test_store_has_bytes_values(self): # skip as the cache has no control over how the store provides values pass -fsspec_mapper_kwargs = { - "check": True, - "create": True, - "missing_exceptions": None -} - - @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestArrayWithFSStore(TestArray): - @staticmethod - def create_array(read_only=False, **kwargs): + compressor = Blosc() + dimension_separator: Literal[".", "/"] = "." + + def create_store(self): path = mkdtemp() atexit.register(shutil.rmtree, path) - key_separator = kwargs.pop('key_separator', ".") - store = FSStore(path, key_separator=key_separator, auto_mkdir=True, **fsspec_mapper_kwargs) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Blosc()) - init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + key_separator = self.dimension_separator + store = FSStore(path, + key_separator=key_separator, + auto_mkdir=True, + check=True, + create=True, + missing_exceptions=None) + return store def expected(self): return [ @@ -2556,21 +2413,23 @@ def expected(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestArrayWithFSStoreFromFilesystem(TestArray): - @staticmethod - def create_array(read_only=False, **kwargs): + compressor = Blosc() + dimension_separator = "." + + def create_store(self): from fsspec.implementations.local import LocalFileSystem + fs = LocalFileSystem(auto_mkdir=True) path = mkdtemp() atexit.register(shutil.rmtree, path) - key_separator = kwargs.pop('key_separator', ".") - store = FSStore(path, fs=fs, key_separator=key_separator, **fsspec_mapper_kwargs) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Blosc()) - init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + key_separator = self.dimension_separator + store = FSStore(path, + fs=fs, + key_separator=key_separator, + check=True, + create=True, + missing_exceptions=None) + return store def expected(self): return [ @@ -2584,24 +2443,14 @@ def expected(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestArrayWithFSStorePartialRead(TestArray): - @staticmethod - def create_array(read_only=False, **kwargs): + compressor = Blosc(blocksize=256) + partial_decompress = True + + def create_store(self): path = mkdtemp() atexit.register(shutil.rmtree, path) store = FSStore(path) - cache_metadata = kwargs.pop("cache_metadata", True) - cache_attrs = kwargs.pop("cache_attrs", True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault("compressor", Blosc(blocksize=256)) - init_array(store, **kwargs) - return Array( - store, - read_only=read_only, - cache_metadata=cache_metadata, - cache_attrs=cache_attrs, - partial_decompress=True, - write_empty_chunks=write_empty_chunks - ) + return store def expected(self): return [ @@ -2640,21 +2489,9 @@ def test_read_from_all_blocks(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestArrayWithFSStoreNested(TestArray): - - @staticmethod - def create_array(read_only=False, **kwargs): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - key_separator = kwargs.pop('key_separator', "/") - store = FSStore(path, key_separator=key_separator, auto_mkdir=True) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Blosc()) - init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) +class TestArrayWithFSStoreNested(TestArrayWithFSStore): + compressor = Blosc() + dimension_separator = "/" def expected(self): return [ @@ -2667,26 +2504,10 @@ def expected(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestArrayWithFSStoreNestedPartialRead(TestArray): - @staticmethod - def create_array(read_only=False, **kwargs): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - key_separator = kwargs.pop('key_separator', "/") - store = FSStore(path, key_separator=key_separator, auto_mkdir=True) - cache_metadata = kwargs.pop("cache_metadata", True) - cache_attrs = kwargs.pop("cache_attrs", True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault("compressor", Blosc()) - init_array(store, **kwargs) - return Array( - store, - read_only=read_only, - cache_metadata=cache_metadata, - cache_attrs=cache_attrs, - partial_decompress=True, - write_empty_chunks=write_empty_chunks - ) +class TestArrayWithFSStoreNestedPartialRead(TestArrayWithFSStore): + compressor = Blosc() + dimension_separator = "/" + partial_decompress = True def expected(self): return [ @@ -2730,10 +2551,12 @@ def test_read_from_all_blocks(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestArrayV3(TestArray): - version = 3 root = meta_root - KVStoreClass = KVStoreV3 + path = "arr1" + + def create_store(self): + return KVStoreV3(dict()) def expected(self): # tests for array without path will not be run for v3 stores @@ -2750,25 +2573,11 @@ def expected(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithPathV3(TestArrayWithPath): - - version = 3 - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): - store = KVStoreV3(dict()) - kwargs.setdefault('compressor', Zlib(level=1)) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, - write_empty_chunks=write_empty_chunks) +class TestArrayWithPathV3(TestArrayV3): def test_array_init(self): - store = KVStoreV3(dict()) + store = self.create_store() # can initialize an array without a path init_array(store, shape=100, chunks=10, dtype=" BaseStore: path = mkdtemp() atexit.register(shutil.rmtree, path) - store = DirectoryStoreV3(path) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, - write_empty_chunks=write_empty_chunks) + return DirectoryStoreV3(path) def test_nbytes_stored(self): # dict as store @@ -2960,87 +2751,52 @@ def test_nbytes_stored(self): @skip_test_env_var("ZARR_TEST_ABS") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithABSStoreV3(TestArrayWithABSStore, TestArrayWithPathV3): - - @staticmethod - def absstore(): +class TestArrayWithABSStoreV3(TestArrayV3): + def create_store(self) -> ABSStoreV3: client = abs_container() store = ABSStoreV3(client=client) store.rmdir() return store - def create_array(self, array_path='arr1', read_only=False, **kwargs): - store = self.absstore() - kwargs.setdefault('compressor', Zlib(1)) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) - - # TODO: TestArrayWithN5StoreV3 # class TestArrayWithN5StoreV3(TestArrayWithDirectoryStoreV3): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithDBMStoreV3(TestArrayWithDBMStore, TestArrayWithPathV3): - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): - path = mktemp(suffix='.anydbm') - atexit.register(atexit_rmglob, path + '*') - store = DBMStoreV3(path, flag='n') - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_attrs=cache_attrs, - cache_metadata=cache_metadata, write_empty_chunks=write_empty_chunks) +class TestArrayWithDBMStoreV3(TestArrayV3): + def create_store(self) -> DBMStoreV3: + path = mktemp(suffix=".anydbm") + atexit.register(atexit_rmglob, path + "*") + store = DBMStoreV3(path, flag="n") + return store def test_nbytes_stored(self): pass # not implemented @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithDBMStoreV3BerkeleyDB(TestArrayWithDBMStoreBerkeleyDB, TestArrayWithPathV3): - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): +class TestArrayWithDBMStoreV3BerkeleyDB(TestArrayV3): + def create_store(self) -> DBMStoreV3: bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix='.dbm') + path = mktemp(suffix=".dbm") atexit.register(os.remove, path) - store = DBMStoreV3(path, flag='n', open=bsddb3.btopen) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + store = DBMStoreV3(path, flag="n", open=bsddb3.btopen) + return store def test_nbytes_stored(self): pass # not implemented @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithLMDBStoreV3(TestArrayWithLMDBStore, TestArrayWithPathV3): +class TestArrayWithLMDBStoreV3(TestArrayV3): + lmdb_buffers = True - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): + def create_store(self) -> LMDBStoreV3: pytest.importorskip("lmdb") - path = mktemp(suffix='.lmdb') + path = mktemp(suffix=".lmdb") atexit.register(atexit_rmtree, path) - store = LMDBStoreV3(path, buffers=True) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + store = LMDBStoreV3(path, buffers=self.lmdb_buffers) + return store def test_store_has_bytes_values(self): pass # returns values as memoryviews/buffers instead of bytes @@ -3050,42 +2806,21 @@ def test_nbytes_stored(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithLMDBStoreV3NoBuffers(TestArrayWithLMDBStoreNoBuffers, TestArrayWithPathV3): - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): - pytest.importorskip("lmdb") - path = mktemp(suffix='.lmdb') - atexit.register(atexit_rmtree, path) - store = LMDBStoreV3(path, buffers=False) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) +class TestArrayWithLMDBStoreV3NoBuffers(TestArrayWithLMDBStoreV3): + lmdb_buffers = False def test_nbytes_stored(self): pass # not implemented @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithSQLiteStoreV3(TestArrayWithPathV3, TestArrayWithSQLiteStore): - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): +class TestArrayWithSQLiteStoreV3(TestArrayV3): + def create_store(self): pytest.importorskip("sqlite3") - path = mktemp(suffix='.db') + path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStoreV3(path) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return store def test_nbytes_stored(self): pass # not implemented @@ -3142,18 +2877,10 @@ def __contains__(self, item): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithCustomMappingV3(TestArrayWithPathV3, TestArrayWithCustomMapping): - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): +class TestArrayWithCustomMappingV3(TestArrayV3): + def create_store(self): store = CustomMappingV3() - kwargs.setdefault('compressor', Zlib(1)) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return store def test_nbytes_stored(self): z = self.create_array(shape=1000, chunks=100) @@ -3171,18 +2898,10 @@ def test_len(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayNoCacheV3(TestArrayWithPathV3, TestArrayNoCache): - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): +class TestArrayNoCacheV3(TestArrayWithPathV3): + def create_store(self): store = KVStoreV3(dict()) - kwargs.setdefault('compressor', Zlib(level=1)) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return store def test_object_arrays_danger(self): # skip this one as it only works if metadata are cached @@ -3190,18 +2909,10 @@ def test_object_arrays_danger(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithStoreCacheV3(TestArrayWithPathV3, TestArrayWithStoreCache): - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): +class TestArrayWithStoreCacheV3(TestArrayV3): + def create_store(self): store = LRUStoreCacheV3(dict(), max_size=None) - kwargs.setdefault('compressor', Zlib(level=1)) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return store def test_store_has_bytes_values(self): # skip as the cache has no control over how the store provides values @@ -3210,25 +2921,22 @@ def test_store_has_bytes_values(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3(TestArrayWithPathV3, TestArrayWithFSStore): - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): +class TestArrayWithFSStoreV3(TestArrayV3): + compressor = Blosc() + + def create_store(self): path = mkdtemp() atexit.register(shutil.rmtree, path) - key_separator = kwargs.pop('key_separator', ".") + key_separator = self.dimension_separator store = FSStoreV3( path, key_separator=key_separator, auto_mkdir=True, - **fsspec_mapper_kwargs + create=True, + check=True, + missing_exceptions=None ) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Blosc()) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return store def expected(self): return [ @@ -3242,22 +2950,21 @@ def expected(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3FromFilesystem(TestArrayWithPathV3, TestArrayWithFSStore): - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): +class TestArrayWithFSStoreV3FromFilesystem(TestArrayWithFSStoreV3): + def create_store(self): from fsspec.implementations.local import LocalFileSystem + fs = LocalFileSystem(auto_mkdir=True) path = mkdtemp() atexit.register(shutil.rmtree, path) - key_separator = kwargs.pop('key_separator', ".") - store = FSStoreV3(path, fs=fs, key_separator=key_separator, **fsspec_mapper_kwargs) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Blosc()) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + key_separator = self.dimension_separator + store = FSStoreV3(path, + fs=fs, + key_separator=key_separator, + create=True, + check=True, + missing_exceptions=None) + return store def expected(self): return [ @@ -3271,27 +2978,8 @@ def expected(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3PartialRead(TestArrayWithPathV3, TestArrayWithFSStorePartialRead): - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - store = FSStoreV3(path) - cache_metadata = kwargs.pop("cache_metadata", True) - cache_attrs = kwargs.pop("cache_attrs", True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault("compressor", Blosc()) - init_array(store, path=array_path, **kwargs) - return Array( - store, - path=array_path, - read_only=read_only, - cache_metadata=cache_metadata, - cache_attrs=cache_attrs, - partial_decompress=True, - write_empty_chunks=write_empty_chunks, - ) +class TestArrayWithFSStoreV3PartialRead(TestArrayWithFSStoreV3): + partial_decompress = True def expected(self): return [ @@ -3306,33 +2994,16 @@ def expected(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") @pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") -class TestArrayWithFSStoreV3PartialReadUncompressedSharded( - TestArrayWithPathV3, TestArrayWithFSStorePartialRead -): +class TestArrayWithFSStoreV3PartialReadUncompressedSharded(TestArrayWithFSStoreV3): + partial_decompress = True + compressor = None - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - store = FSStoreV3(path) - cache_metadata = kwargs.pop("cache_metadata", True) - cache_attrs = kwargs.pop("cache_attrs", True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', None) - num_dims = 1 if isinstance(kwargs["shape"], int) else len(kwargs["shape"]) + def create_storage_transformers(self, shape) -> Tuple[Any]: + num_dims = 1 if isinstance(shape, int) else len(shape) sharding_transformer = ShardingStorageTransformer( "indexed", chunks_per_shard=(2, ) * num_dims ) - init_array(store, path=array_path, storage_transformers=[sharding_transformer], **kwargs) - return Array( - store, - path=array_path, - read_only=read_only, - cache_metadata=cache_metadata, - cache_attrs=cache_attrs, - partial_decompress=True, - write_empty_chunks=write_empty_chunks, - ) + return (sharding_transformer,) def test_nbytes_stored(self): z = self.create_array(shape=1000, chunks=100) @@ -3359,21 +3030,8 @@ def expected(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3Nested(TestArrayWithPathV3, TestArrayWithFSStoreNested): - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - key_separator = kwargs.pop('key_separator', "/") - store = FSStoreV3(path, key_separator=key_separator, auto_mkdir=True) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Blosc()) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) +class TestArrayWithFSStoreV3Nested(TestArrayWithFSStoreV3): + dimension_separator = "/" def expected(self): return [ @@ -3387,28 +3045,8 @@ def expected(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3NestedPartialRead(TestArrayWithPathV3, - TestArrayWithFSStoreNestedPartialRead): - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - key_separator = kwargs.pop('key_separator', "/") - store = FSStoreV3(path, key_separator=key_separator, auto_mkdir=True) - cache_metadata = kwargs.pop("cache_metadata", True) - cache_attrs = kwargs.pop("cache_attrs", True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault("compressor", Blosc()) - init_array(store, path=array_path, **kwargs) - return Array( - store, - path=array_path, - read_only=read_only, - cache_metadata=cache_metadata, - cache_attrs=cache_attrs, - partial_decompress=True, - write_empty_chunks=write_empty_chunks, - ) +class TestArrayWithFSStoreV3NestedPartialRead(TestArrayWithFSStoreV3): + dimension_separator = "/" def expected(self): return [ @@ -3423,22 +3061,10 @@ def expected(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestArrayWithStorageTransformersV3(TestArrayWithChunkStoreV3): - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): - store = KVStoreV3(dict()) - # separate chunk store - chunk_store = KVStoreV3(dict()) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - dummy_storage_transformer = DummyStorageTransfomer( - "dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT + def create_storage_transformers(self, shape) -> Tuple[Any]: + return ( + DummyStorageTransfomer("dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT), ) - init_array(store, path=array_path, chunk_store=chunk_store, - storage_transformers=[dummy_storage_transformer], **kwargs) - return Array(store, path=array_path, read_only=read_only, - chunk_store=chunk_store, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) def expected(self): return [ @@ -3452,23 +3078,14 @@ def expected(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") @pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") -class TestArrayWithShardingStorageTransformerV3(TestArrayWithPathV3): +class TestArrayWithShardingStorageTransformerV3(TestArrayV3): + compressor = None - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): - store = KVStoreV3(dict()) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', None) - num_dims = 1 if isinstance(kwargs["shape"], int) else len(kwargs["shape"]) - sharding_transformer = ShardingStorageTransformer( - "indexed", chunks_per_shard=(2, ) * num_dims + def create_storage_transformers(self, shape) -> Tuple[Any]: + num_dims = (1 if isinstance(shape, int) else len(shape)) + return ( + ShardingStorageTransformer("indexed", chunks_per_shard=(2, ) * num_dims), ) - init_array(store, path=array_path, storage_transformers=[sharding_transformer], **kwargs) - return Array(store, path=array_path, read_only=read_only, - cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) def test_nbytes_stored(self): z = self.create_array(shape=1000, chunks=100) diff --git a/zarr/util.py b/zarr/util.py index 6ba20b96c2..efbb86e4c0 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -15,7 +15,8 @@ Tuple, TypeVar, Union, - Iterable + Iterable, + cast ) import numpy as np @@ -74,7 +75,7 @@ def json_loads(s: Union[bytes, str]) -> Dict[str, Any]: return json.loads(ensure_text(s, 'utf-8')) -def normalize_shape(shape) -> Tuple[int]: +def normalize_shape(shape: Union[int, Tuple[int, ...], None]) -> Tuple[int, ...]: """Convenience function to normalize the `shape` argument.""" if shape is None: @@ -85,6 +86,7 @@ def normalize_shape(shape) -> Tuple[int]: shape = (int(shape),) # normalize + shape = cast(Tuple[int, ...], shape) shape = tuple(int(s) for s in shape) return shape From 8fc3b4b7cdd85117ed83a88cb7f30d5691eae1a1 Mon Sep 17 00:00:00 2001 From: Lars Date: Tue, 18 Jul 2023 18:02:36 +0200 Subject: [PATCH 114/213] Fix typo (#1468) Just a small err in the spec. --- docs/spec/v2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/spec/v2.rst b/docs/spec/v2.rst index 45e6afb320..4fcd6ded76 100644 --- a/docs/spec/v2.rst +++ b/docs/spec/v2.rst @@ -81,7 +81,7 @@ filters The following keys MAY be present within the object: dimension_separator - If present, either the string ``"."`` or ``"/""`` defining the separator placed + If present, either the string ``"."`` or ``"/"`` defining the separator placed between the dimensions of a chunk. If the value is not set, then the default MUST be assumed to be ``"."``, leading to chunk keys of the form "0.0". Arrays defined with ``"/"`` as the dimension separator can be considered to have From a5d8a958a6821c97739a1547149eb1eede299686 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 20 Jul 2023 11:41:21 -0400 Subject: [PATCH 115/213] chore: expose codecov token (#1473) --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f8fe9ab379..6a32793df3 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -80,7 +80,7 @@ jobs: pytest --cov=zarr --cov-config=pyproject.toml --doctest-plus --cov-report xml --cov=./ --timeout=300 - uses: codecov/codecov-action@v3 with: - #token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos + token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos #files: ./coverage1.xml,./coverage2.xml # optional #flags: unittests # optional #name: codecov-umbrella # optional From 9623d5c26a5cefbe92f9ec8af44ee53b75d9dc38 Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Thu, 20 Jul 2023 12:28:28 -0400 Subject: [PATCH 116/213] updated release notes for 2.16.0 (#1471) --- docs/release.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 31d4ba63c4..c09667e78d 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,11 @@ Release notes Unreleased ---------- +.. _release_2.16.0: + +2.16.0 +------ + Enhancements ~~~~~~~~~~~~ @@ -27,8 +32,8 @@ Enhancements * Add ``__contains__`` method to ``KVStore``. By :user:`Christoph Gohlke ` :issue:`1454`. - * **Block Indexing**: Implemented blockwise (chunk blocks) indexing to ``zarr.Array``. - By :user:`Altay Sansal ` :issue:`1428` +* **Block Indexing**: Implemented blockwise (chunk blocks) indexing to ``zarr.Array``. + By :user:`Altay Sansal ` :issue:`1428` Maintenance ~~~~~~~~~~~ From 55ccb3a855d52c5cb2ee9b62ecfc82571d754763 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Thu, 20 Jul 2023 11:27:31 -0700 Subject: [PATCH 117/213] Add "Generate release notes" to release process (#1476) cc: @rabernat Starting a hand-full of versions ago, GitHub added a "Generate release notes" button. This generates links to all PRs as well as to new contributors. I've updated the 2.16.0 release, but I'd be for always clicking this button when making a release. --- docs/contributing.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/contributing.rst b/docs/contributing.rst index dc6beb0094..e590d15d8f 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -379,6 +379,8 @@ Set the description of the release to:: replacing the correct version numbers. For pre-release versions, the URL should omit the pre-release suffix, e.g. "a1" or "rc1". +Click on "Generate release notes" to auto-file the description. + After creating the release, the documentation will be built on https://readthedocs.io. Full releases will be available under `/stable `_ while From f3ba0b19adc74a281489a9df892041a88d3cb7fc Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 20 Jul 2023 15:19:54 -0700 Subject: [PATCH 118/213] Require `setuptools_scm` version `1.5.4`+ (#1477) --- docs/release.rst | 6 ++++++ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index c09667e78d..f80d4a8c75 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,12 @@ Release notes Unreleased ---------- +Maintenance +~~~~~~~~~~~ + +* Require ``setuptools_scm`` version ``1.5.4``\+ + By :user:`John A. Kirkham ` :issue:`1477`. + .. _release_2.16.0: 2.16.0 diff --git a/pyproject.toml b/pyproject.toml index 4b293b90e4..1b880cf6fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=64.0.0", "setuptools-scm"] +requires = ["setuptools>=64.0.0", "setuptools-scm>1.5.4"] build-backend = "setuptools.build_meta" From 6cb3cf19bc98792e0e3a6cfc7ae626551e7ea8a1 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 27 Jul 2023 22:37:25 +0200 Subject: [PATCH 119/213] Delete trailing spaces (#1424) ...and add missing trailing newline for POSIX compliance. --- .git-blame-ignore-revs | 2 +- docs/_static/custom.css | 2 +- docs/acknowledgments.rst | 2 +- docs/contributing.rst | 4 ++-- docs/index.rst | 4 ++-- docs/release.rst | 2 +- pyproject.toml | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 05fe3dbd8c..53bf4633f0 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,2 @@ # lint codebase with black and ruff -4e348d6b80c96da461fd866576c971b8a659ba15 \ No newline at end of file +4e348d6b80c96da461fd866576c971b8a659ba15 diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 487addfbbd..87dd70e347 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -120,4 +120,4 @@ html[data-theme=dark] h1 { html[data-theme=dark] h3 { color: #0a6774; -} \ No newline at end of file +} diff --git a/docs/acknowledgments.rst b/docs/acknowledgments.rst index 36cd1f5646..4fce1e8ae4 100644 --- a/docs/acknowledgments.rst +++ b/docs/acknowledgments.rst @@ -73,4 +73,4 @@ documentation, code reviews, comments and/or ideas: * :user:`gsakkis` * :user:`hailiangzhang ` * :user:`pmav99 ` -* :user:`sbalmer ` \ No newline at end of file +* :user:`sbalmer ` diff --git a/docs/contributing.rst b/docs/contributing.rst index e590d15d8f..46fc39d960 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -213,9 +213,9 @@ Test coverage Zarr maintains 100% test coverage under the latest Python stable release (currently Python 3.8). Both unit tests and docstring doctests are included when computing coverage. Running:: - + $ python -m pytest -v --cov=zarr --cov-config=pyproject.toml zarr - + will automatically run the test suite with coverage and produce a coverage report. This should be 100% before code can be accepted into the main code base. diff --git a/docs/index.rst b/docs/index.rst index 50060d10cc..97f5889ca5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,7 +20,7 @@ Zarr-Python **Version**: |version| **Download documentation**: `Zipped HTML `_ - + **Useful links**: `Installation `_ | `Source Repository `_ | @@ -101,4 +101,4 @@ Zarr is a file storage format for chunked, compressed, N-dimensional arrays base :color: dark :click-parent: - To the contributor's guide \ No newline at end of file + To the contributor's guide diff --git a/docs/release.rst b/docs/release.rst index f80d4a8c75..b9a9417059 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -35,7 +35,7 @@ Enhancements * Allow for partial codec specification in V3 array metadata. By :user:`Joe Hamman ` :issue:`1443`. -* Add ``__contains__`` method to ``KVStore``. +* Add ``__contains__`` method to ``KVStore``. By :user:`Christoph Gohlke ` :issue:`1454`. * **Block Indexing**: Implemented blockwise (chunk blocks) indexing to ``zarr.Array``. diff --git a/pyproject.toml b/pyproject.toml index 1b880cf6fe..7d8d275c0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,7 +116,7 @@ follow_imports = "silent" [tool.pytest.ini_options] doctest_optionflags = [ "NORMALIZE_WHITESPACE", - "ELLIPSIS", + "ELLIPSIS", "IGNORE_EXCEPTION_DETAIL", ] addopts = [ From 6ed4d785db71d7087b276f0f6dc5b3f7cfedd7f1 Mon Sep 17 00:00:00 2001 From: Philipp Hanslovsky Date: Thu, 27 Jul 2023 17:17:09 -0400 Subject: [PATCH 120/213] Ensure that chunks is tuple of ints upon array creation (#1470) * Add failing test for creating group with float chunks * Fix flake8 errors * Cast chunks to tuple[int, ...] before returning * Use decorator to cast to int tuple * Fix mypy type issues * Fix black formatting * Add docstring to _as_int_tuple * Document changes in docs/release.rst * Revert to casting to tuple of ints inside normalize_chunks After discussion in #1470, this was selected as the best option --- docs/release.rst | 3 +++ zarr/tests/test_creation.py | 24 ++++++++++++++++++++++++ zarr/util.py | 3 ++- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index b9a9417059..aa622723e8 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -50,6 +50,9 @@ Maintenance * Style the codebase with ``ruff`` and ``black``. By :user:`Davis Bennett` :issue:`1459` +* Ensure that chunks is tuple of ints upon array creation. + By :user:`Philipp Hanslovsky` :issue:`1461` + .. _release_2.15.0: 2.15.0 diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index 61710cc221..b44c6379fd 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -757,3 +757,27 @@ def test_create_with_storage_transformers(at_root): z = create(1000000000, chunks=True, storage_transformers=[transformer], **kwargs) assert isinstance(z.chunk_store, DummyStorageTransfomer) assert z.chunk_store.test_value == DummyStorageTransfomer.TEST_CONSTANT + + +@pytest.mark.parametrize( + ("init_shape", "init_chunks", "shape", "chunks"), + ( + ((1,), (1,), (1,), (1,)), + ((1.0,), (1.0,), (1,), (1,)), + ((1.0,), False, (1,), (1,)), + ((1.0,), True, (1,), (1,)), + ((1.0,), None, (1,), (1,)), + ), +) +def test_shape_chunk_ints(init_shape, init_chunks, shape, chunks): + g = open_group() + array = g.create_dataset("ds", shape=init_shape, chunks=init_chunks, dtype=np.uint8) + + assert all( + isinstance(s, int) for s in array.shape + ), f"Expected shape to be all ints but found {array.shape=}." + assert all( + isinstance(c, int) for c in array.chunks + ), f"Expected chunks to be all ints but found {array.chunks=}." + assert array.shape == shape, f"Expected {shape=} but found {array.shape=}." + assert array.chunks == chunks, f"Expected {chunks=} but found {array.chunks=}." diff --git a/zarr/util.py b/zarr/util.py index acd461cfd5..ea0dd9fcec 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -175,7 +175,8 @@ def normalize_chunks(chunks: Any, shape: Tuple[int, ...], typesize: int) -> Tupl if -1 in chunks or None in chunks: chunks = tuple(s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks)) - return tuple(chunks) + chunks = tuple(int(c) for c in chunks) + return chunks def normalize_dtype(dtype: Union[str, np.dtype], object_codec) -> Tuple[np.dtype, Any]: From 0cedd9832111c2af0c46fda46b10bec385ea74ab Mon Sep 17 00:00:00 2001 From: jakirkham Date: Fri, 11 Aug 2023 02:53:34 -0700 Subject: [PATCH 121/213] Add `docs` requirements to `pyproject.toml` (#1494) * Add `docs` requirements to `pyproject.toml` * Add release note * Exclude `numpy` version `1.21.0` Move the RTD exclusion to the `numpy` dependency directly. * Require `numcodecs[msgpack]` for `docs` * Update references for installing doc dependencies --- .pyup.yml | 3 --- .readthedocs.yaml | 3 ++- docs/contributing.rst | 4 ++-- docs/release.rst | 3 +++ pyproject.toml | 12 +++++++++++- requirements_rtfd.txt | 12 ------------ 6 files changed, 18 insertions(+), 19 deletions(-) delete mode 100644 requirements_rtfd.txt diff --git a/.pyup.yml b/.pyup.yml index 04937ed0b0..a70df4e3fb 100644 --- a/.pyup.yml +++ b/.pyup.yml @@ -13,6 +13,3 @@ requirements: - requirements_dev_optional.txt: pin: True update: all - - requirements_rtfd.txt: - pin: False - update: False diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 42fc08bca2..2124f77271 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,6 +10,7 @@ sphinx: python: install: - - requirements: requirements_rtfd.txt - method: pip path: . + extra_requirements: + - docs diff --git a/docs/contributing.rst b/docs/contributing.rst index 46fc39d960..0420535093 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -92,8 +92,8 @@ the repository, you can do something like the following:: $ mkdir -p ~/pyenv/zarr-dev $ python -m venv ~/pyenv/zarr-dev $ source ~/pyenv/zarr-dev/bin/activate - $ pip install -r requirements_dev_minimal.txt -r requirements_dev_numpy.txt -r requirements_rtfd.txt - $ pip install -e . + $ pip install -r requirements_dev_minimal.txt -r requirements_dev_numpy.txt + $ pip install -e .[docs] To verify that your development environment is working, you can run the unit tests:: diff --git a/docs/release.rst b/docs/release.rst index aa622723e8..cf1400d3f8 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -24,6 +24,9 @@ Maintenance * Require ``setuptools_scm`` version ``1.5.4``\+ By :user:`John A. Kirkham ` :issue:`1477`. +* Add ``docs`` requirements to ``pyproject.toml`` + By :user:`John A. Kirkham ` :issue:`1494`. + .. _release_2.16.0: 2.16.0 diff --git a/pyproject.toml b/pyproject.toml index 7d8d275c0e..292bfddded 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ maintainers = [ requires-python = ">=3.8" dependencies = [ 'asciitree', - 'numpy>=1.20', + 'numpy>=1.20,!=1.21.0', 'fasteners', 'numcodecs>=0.10.0', ] @@ -43,6 +43,16 @@ jupyter = [ 'ipytree>=0.2.2', 'ipywidgets>=8.0.0', ] +docs = [ + 'sphinx', + 'sphinx_design', + 'sphinx-issues', + 'sphinx-copybutton', + 'sphinx-rtd-theme', + 'pydata-sphinx-theme', + 'numpydoc', + 'numcodecs[msgpack]', +] [project.urls] "Bug Tracker" = "https://github.com/zarr-developers/zarr-python/issues" diff --git a/requirements_rtfd.txt b/requirements_rtfd.txt deleted file mode 100644 index 5d7fec369a..0000000000 --- a/requirements_rtfd.txt +++ /dev/null @@ -1,12 +0,0 @@ -asciitree -setuptools -setuptools_scm -sphinx -sphinx_design -sphinx-issues -sphinx-copybutton -sphinx-rtd-theme -pydata-sphinx-theme -numpydoc -numpy!=1.21.0 -msgpack-python==0.5.6 From 12af7f13981ad6d7497e0bb32d3175758e7a109e Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 11 Aug 2023 16:23:53 +0200 Subject: [PATCH 122/213] `zarr.group` now accept the `meta_array` argument (#1489) * group() now takes the meta_array * added tests --- zarr/hierarchy.py | 9 ++++++++- zarr/tests/test_meta_array.py | 7 ++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index c7cc5c6fe2..3361969f08 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -1360,7 +1360,8 @@ def group( synchronizer=None, path=None, *, - zarr_version=None + zarr_version=None, + meta_array=None ): """Create a group. @@ -1382,6 +1383,11 @@ def group( Array synchronizer. path : string, optional Group path within store. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + .. versionadded:: 2.16.1 Returns ------- @@ -1432,6 +1438,7 @@ def group( synchronizer=synchronizer, path=path, zarr_version=zarr_version, + meta_array=meta_array, ) diff --git a/zarr/tests/test_meta_array.py b/zarr/tests/test_meta_array.py index 39394bd690..2545c6d624 100644 --- a/zarr/tests/test_meta_array.py +++ b/zarr/tests/test_meta_array.py @@ -9,7 +9,7 @@ import zarr.codecs from zarr.core import Array from zarr.creation import array, empty, full, ones, open_array, zeros -from zarr.hierarchy import open_group +from zarr.hierarchy import open_group, group from zarr.storage import DirectoryStore, MemoryStore, Store, ZipStore @@ -234,12 +234,13 @@ def test_full(module, compressor): assert np.all(np.isnan(z[:])) +@pytest.mark.parametrize("group_create_function", [group, open_group]) @pytest.mark.parametrize("module, compressor", param_module_and_compressor) @pytest.mark.parametrize("store_type", [None, DirectoryStore, MemoryStore, ZipStore]) -def test_group(tmp_path, module, compressor, store_type): +def test_group(tmp_path, group_create_function, module, compressor, store_type): xp = ensure_module(module) store = init_store(tmp_path, store_type) - g = open_group(store, meta_array=xp.empty(())) + g = group_create_function(store, meta_array=xp.empty(())) g.ones("data", shape=(10, 11), dtype=int, compressor=compressor) a = g["data"] assert a.shape == (10, 11) From f542fca7d0d42ee050e9a49d57ad0f5346f62de3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Aug 2023 08:44:12 +0200 Subject: [PATCH 123/213] Bump pypa/gh-action-pypi-publish from 1.8.7 to 1.8.10 (#1498) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.7 to 1.8.10. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.7...v1.8.10) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index a00096bb18..8956cae314 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -64,7 +64,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.8.7 + - uses: pypa/gh-action-pypi-publish@v1.8.10 with: user: __token__ password: ${{ secrets.pypi_password }} From 7017ce16954f81a209019041c56ad5ca1e568f09 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 16 Aug 2023 21:45:13 +0200 Subject: [PATCH 124/213] `LRUStoreCache`: cache "contains" by contains checks (#1499) * cache "contains" by contains checks * updated the test_cache_keys counts * release notes --- docs/release.rst | 3 +++ zarr/_storage/v3.py | 2 +- zarr/storage.py | 10 +++++----- zarr/tests/test_storage.py | 15 +++++++++------ 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index cf1400d3f8..188edd625f 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -27,6 +27,9 @@ Maintenance * Add ``docs`` requirements to ``pyproject.toml`` By :user:`John A. Kirkham ` :issue:`1494`. +* Fixed caching issue in ``LRUStoreCache``. + By :user:`Mads R. B. Kristensen ` :issue:`1499`. + .. _release_2.16.0: 2.16.0 diff --git a/zarr/_storage/v3.py b/zarr/_storage/v3.py index 1a50265c11..00dc085dac 100644 --- a/zarr/_storage/v3.py +++ b/zarr/_storage/v3.py @@ -509,7 +509,7 @@ def __init__(self, store, max_size: int): self._max_size = max_size self._current_size = 0 self._keys_cache = None - self._contains_cache = None + self._contains_cache = {} self._listdir_cache: Dict[Path, Any] = dict() self._values_cache: Dict[Path, Any] = OrderedDict() self._mutex = Lock() diff --git a/zarr/storage.py b/zarr/storage.py index 4f7b9905f1..b36f804ebd 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2393,7 +2393,7 @@ def __init__(self, store: StoreLike, max_size: int): self._max_size = max_size self._current_size = 0 self._keys_cache = None - self._contains_cache = None + self._contains_cache: Dict[Any, Any] = {} self._listdir_cache: Dict[Path, Any] = dict() self._values_cache: Dict[Path, Any] = OrderedDict() self._mutex = Lock() @@ -2434,9 +2434,9 @@ def __iter__(self): def __contains__(self, key): with self._mutex: - if self._contains_cache is None: - self._contains_cache = set(self._keys()) - return key in self._contains_cache + if key not in self._contains_cache: + self._contains_cache[key] = key in self._store + return self._contains_cache[key] def clear(self): self._store.clear() @@ -2506,7 +2506,7 @@ def invalidate_keys(self): def _invalidate_keys(self): self._keys_cache = None - self._contains_cache = None + self._contains_cache.clear() self._listdir_cache.clear() def _invalidate_value(self, key): diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 9557000472..ca6a6c1a98 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -2196,7 +2196,10 @@ def test_cache_keys(self): assert keys == sorted(cache.keys()) assert 1 == store.counter["keys"] assert foo_key in cache - assert 0 == store.counter["__contains__", foo_key] + assert 1 == store.counter["__contains__", foo_key] + # the next check for `foo_key` is cached + assert foo_key in cache + assert 1 == store.counter["__contains__", foo_key] assert keys == sorted(cache) assert 0 == store.counter["__iter__"] assert 1 == store.counter["keys"] @@ -2215,23 +2218,23 @@ def test_cache_keys(self): keys = sorted(cache.keys()) assert keys == [bar_key, baz_key, foo_key] assert 3 == store.counter["keys"] - assert 0 == store.counter["__contains__", foo_key] + assert 1 == store.counter["__contains__", foo_key] assert 0 == store.counter["__iter__"] cache.invalidate_keys() keys = sorted(cache) assert keys == [bar_key, baz_key, foo_key] assert 4 == store.counter["keys"] - assert 0 == store.counter["__contains__", foo_key] + assert 1 == store.counter["__contains__", foo_key] assert 0 == store.counter["__iter__"] cache.invalidate_keys() assert foo_key in cache - assert 5 == store.counter["keys"] - assert 0 == store.counter["__contains__", foo_key] + assert 4 == store.counter["keys"] + assert 2 == store.counter["__contains__", foo_key] assert 0 == store.counter["__iter__"] # check these would get counted if called directly assert foo_key in store - assert 1 == store.counter["__contains__", foo_key] + assert 3 == store.counter["__contains__", foo_key] assert keys == sorted(store) assert 1 == store.counter["__iter__"] From cb3908b998f692c1ff8a071034cba0554c775e4e Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Fri, 18 Aug 2023 10:27:53 -0400 Subject: [PATCH 125/213] prepare for 2.16.1 release (#1503) --- docs/release.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 188edd625f..bf0dcd69e9 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,11 @@ Release notes Unreleased ---------- +.. _release_2.16.1: + +2.16.1 +------ + Maintenance ~~~~~~~~~~~ From d25110ebe34eaab7eb6a57fd2259c1831ca2c3d1 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Wed, 23 Aug 2023 12:18:55 -0700 Subject: [PATCH 126/213] Disable pre-commit.ci's PR autofixes (#1506) --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c46115342d..c5152b10b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,7 @@ ci: autoupdate_commit_msg: "chore: update pre-commit hooks" autofix_commit_msg: "style: pre-commit fixes" + autofix_prs: false default_stages: [commit, push] default_language_version: python: python3 From 76ba69a21018822a5a0244c03af882a09293ff28 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 25 Aug 2023 11:18:48 +0200 Subject: [PATCH 127/213] Bump redis from 4.5.5 to 4.6.0 (#1446) Bumps [redis](https://github.com/redis/redis-py) from 4.5.5 to 4.6.0. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.5.5...v4.6.0) --- updated-dependencies: - dependency-name: redis dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Joe Hamman --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 0398d8f494..f86eb8c8e3 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.0.7 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.16.0 # pyup: ignore -redis==4.5.5 +redis==4.6.0 types-redis types-setuptools pymongo==4.4.0 From 4944e66cd847a6ab5ec3a70c2b7bc0973f707bd6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Aug 2023 09:09:20 +0200 Subject: [PATCH 128/213] Bump ipywidgets from 8.0.7 to 8.1.0 (#1510) Bumps [ipywidgets](https://github.com/jupyter-widgets/ipywidgets) from 8.0.7 to 8.1.0. - [Release notes](https://github.com/jupyter-widgets/ipywidgets/releases) - [Commits](https://github.com/jupyter-widgets/ipywidgets/compare/8.0.7...8.1.0) --- updated-dependencies: - dependency-name: ipywidgets dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index f86eb8c8e3..baa2fbc1e9 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -3,7 +3,7 @@ lmdb==1.4.1; sys_platform != 'win32' # optional library requirements for Jupyter ipytree==0.2.2 -ipywidgets==8.0.7 +ipywidgets==8.1.0 # optional library requirements for services # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) From 9dea8e34d1c6fa1b90a2e828e1f80bc1baf638b8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Sep 2023 11:49:05 +0200 Subject: [PATCH 129/213] Bump actions/checkout from 3 to 4 (#1520) Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/minimal.yml | 2 +- .github/workflows/python-package.yml | 2 +- .github/workflows/releases.yml | 2 +- .github/workflows/windows-testing.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index bebe1ee205..7013f1784f 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -38,7 +38,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml index 4de5aca273..63bc97d157 100644 --- a/.github/workflows/minimal.yml +++ b/.github/workflows/minimal.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Miniconda uses: conda-incubator/setup-miniconda@v2.2.0 with: diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 6a32793df3..cdf230bc7c 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -38,7 +38,7 @@ jobs: ports: - 27017:27017 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Setup Miniconda diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 8956cae314..238c5e457d 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -11,7 +11,7 @@ jobs: fail-fast: false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: true fetch-depth: 0 diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index b17eece058..3afa8c467e 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -18,7 +18,7 @@ jobs: matrix: python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: conda-incubator/setup-miniconda@v2.2.0 From 6ec746ef1242dd9fec26b128cc0b3455d28ad6f0 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Wed, 20 Sep 2023 17:35:36 +0100 Subject: [PATCH 130/213] Tiny tweak: Extend copyright notice to 2023 (#1528) --- LICENSE.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE.txt b/LICENSE.txt index d672a4f670..850a0d8772 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2015-2022 Zarr Developers +Copyright (c) 2015-2023 Zarr Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 5a54c95e7438779f66e4fe2491e7a9238b6a43fb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 10:24:40 -0700 Subject: [PATCH 131/213] Bump pytest from 7.3.2 to 7.4.0 (#1445) Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.3.2 to 7.4.0. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.3.2...7.4.0) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Joe Hamman --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index df1ca11677..4b1208598e 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -5,4 +5,4 @@ numcodecs==0.11.0 msgpack-python==0.5.6 setuptools-scm==7.1.0 # test requirements -pytest==7.3.2 +pytest==7.4.0 From a9d516bb562da1360f7a2b7ae845046924180674 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Wed, 25 Oct 2023 22:04:25 +0100 Subject: [PATCH 132/213] Allow black to be run on any Python version (#1549) * Allow black to be run on any Python version * docs: add release notes * docs: whitespace --------- Co-authored-by: Davis Vann Bennett --- .pre-commit-config.yaml | 1 - docs/release.rst | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c5152b10b6..f22dc39832 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,6 @@ repos: rev: 22.12.0 hooks: - id: black - language_version: python3.8 - repo: https://github.com/codespell-project/codespell rev: v2.2.5 hooks: diff --git a/docs/release.rst b/docs/release.rst index bf0dcd69e9..edb5422fa0 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,12 @@ Release notes Unreleased ---------- +Maintenance +~~~~~~~~~~~ + +* Allow ``black`` code formatter to be run with any Python version. + By :user:`David Stansby ` :issue:`1549` + .. _release_2.16.1: 2.16.1 From 16c2684345a659e70a23152442ab77c0809ab9b0 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Fri, 27 Oct 2023 00:12:46 +0200 Subject: [PATCH 133/213] =?UTF-8?q?Unnecessary=20list=20literal=20?= =?UTF-8?q?=E2=86=92=20set=20literal=20(#1534)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Unnecessary list literal → set literal Why use a list literal to initialise a set? Just use a set literal. * Add release note for previous commit Signed-off-by: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> --------- Signed-off-by: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> --- docs/release.rst | 3 +++ zarr/tests/test_storage.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index edb5422fa0..12e1449a80 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -21,6 +21,9 @@ Unreleased Maintenance ~~~~~~~~~~~ +* Initialise some sets in tests with set literals instead of list literals. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1534`. + * Allow ``black`` code formatter to be run with any Python version. By :user:`David Stansby ` :issue:`1549` diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index ca6a6c1a98..e87716fa47 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1166,9 +1166,9 @@ def test_deep_ndim(self): if self.version == 2: assert set(store.listdir()) == {".zgroup", "bar"} else: - assert set(store.listdir()) == set(["data", "meta", "zarr.json"]) - assert set(store.listdir("meta/root/" + path)) == set(["bar", "bar.group.json"]) - assert set(store.listdir("data/root/" + path)) == set(["bar"]) + assert set(store.listdir()) == {"data", "meta", "zarr.json"} + assert set(store.listdir("meta/root/" + path)) == {"bar", "bar.group.json"} + assert set(store.listdir("data/root/" + path)) == {"bar"} assert foo["bar"]["baz"][(0, 0, 0)] == 1 def test_not_fsspec(self): From 5eb737b02836799a0544610b87ffbc3e4e69ec01 Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Thu, 26 Oct 2023 22:13:29 +0000 Subject: [PATCH 134/213] Minor edits in docs (#1509) --- .gitignore | 2 ++ docs/conf.py | 2 +- docs/spec.rst | 2 +- docs/spec/v1.rst | 2 +- docs/spec/v2.rst | 2 +- docs/spec/v3.rst | 6 +++--- 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 4f0d523785..a6a456636d 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,5 @@ zarr/version.py #doesnotexist #test_sync* data/* + +.DS_Store diff --git a/docs/conf.py b/docs/conf.py index f85ecb7454..1ffaeddef4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -68,7 +68,7 @@ # General information about the project. project = "zarr" -copyright = "2022, Zarr Developers" +copyright = "2023, Zarr Developers" author = "Zarr Developers" version = zarr.__version__ diff --git a/docs/spec.rst b/docs/spec.rst index 3c06f3228d..8aca0bbd80 100644 --- a/docs/spec.rst +++ b/docs/spec.rst @@ -4,7 +4,7 @@ Specifications ============== .. toctree:: - :maxdepth: 3 + :maxdepth: 1 spec/v3 spec/v2 diff --git a/docs/spec/v1.rst b/docs/spec/v1.rst index 8584b24e6d..13f68ef36e 100644 --- a/docs/spec/v1.rst +++ b/docs/spec/v1.rst @@ -1,6 +1,6 @@ .. _spec_v1: -Zarr storage specification version 1 +Zarr Storage Specification Version 1 ==================================== This document provides a technical specification of the protocol and diff --git a/docs/spec/v2.rst b/docs/spec/v2.rst index 4fcd6ded76..c1e12e1218 100644 --- a/docs/spec/v2.rst +++ b/docs/spec/v2.rst @@ -1,6 +1,6 @@ .. _spec_v2: -Zarr storage specification version 2 +Zarr Storage Specification Version 2 ==================================== This document provides a technical specification of the protocol and format diff --git a/docs/spec/v3.rst b/docs/spec/v3.rst index a448fbc3a1..bd8852707b 100644 --- a/docs/spec/v3.rst +++ b/docs/spec/v3.rst @@ -1,7 +1,7 @@ .. _spec_v3: -Zarr storage specification version 3 (under development) -======================================================== +Zarr Storage Specification Version 3 +======================================================= -The v3 specification has been migrated to its own website, +The V3 Specification has been migrated to its website → https://zarr-specs.readthedocs.io/. From 1ed37f5fa7039cbc8849bf9b0e9aae9c890e7ced Mon Sep 17 00:00:00 2001 From: Ziwen Liu <67518483+ziw-liu@users.noreply.github.com> Date: Thu, 26 Oct 2023 16:10:10 -0700 Subject: [PATCH 135/213] Preserve dimension separator metadata when resizing arrays (#1540) * preserve dimension separator when resizing arrays * test dimension separator metadata after resizing * document the change * Update release.rst --------- Co-authored-by: Davis Bennett --- docs/release.rst | 5 ++++- zarr/core.py | 25 ++++++++--------------- zarr/tests/test_core.py | 44 +++++++---------------------------------- 3 files changed, 19 insertions(+), 55 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 12e1449a80..a361bbc8d9 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -21,11 +21,14 @@ Unreleased Maintenance ~~~~~~~~~~~ +* Preserve ``dimension_separator`` when resizing arrays. + By :user:`Ziwen Liu ` :issue:`1533`. + * Initialise some sets in tests with set literals instead of list literals. By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1534`. * Allow ``black`` code formatter to be run with any Python version. - By :user:`David Stansby ` :issue:`1549` + By :user:`David Stansby ` :issue:`1549`. .. _release_2.16.1: diff --git a/zarr/core.py b/zarr/core.py index 43ccdbaf7d..2177e9055c 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -253,7 +253,6 @@ def _load_metadata_nosync(self): except KeyError: raise ArrayNotFoundError(self._path) else: - # decode and store metadata as instance members meta = self._store._metadata_class.decode_array_metadata(meta_bytes) self._meta = meta @@ -341,7 +340,14 @@ def _flush_metadata_nosync(self): filters=filters_config, ) if getattr(self._store, "_store_version", 2) == 2: - meta.update(dict(chunks=self._chunks, dtype=self._dtype, order=self._order)) + meta.update( + dict( + chunks=self._chunks, + dtype=self._dtype, + order=self._order, + dimension_separator=self._dimension_separator, + ) + ) else: meta.update( dict( @@ -1358,7 +1364,6 @@ def get_mask_selection(self, selection, out=None, fields=None): return self._get_selection(indexer=indexer, out=out, fields=fields) def _get_selection(self, indexer, out=None, fields=None): - # We iterate over all chunks which overlap the selection and thus contain data # that needs to be extracted. Each chunk is processed in turn, extracting the # necessary data and storing into the correct location in the output array. @@ -1983,7 +1988,6 @@ def _set_basic_selection_nd(self, selection, value, fields=None): self._set_selection(indexer, value, fields=fields) def _set_selection(self, indexer, value, fields=None): - # We iterate over all chunks which overlap the selection and thus contain data # that needs to be replaced. Each chunk is processed in turn, extracting the # necessary data from the value array and storing into the chunk array. @@ -2018,7 +2022,6 @@ def _set_selection(self, indexer, value, fields=None): ): # iterative approach for chunk_coords, chunk_selection, out_selection in indexer: - # extract data to store if sel_shape == (): chunk_value = value @@ -2077,7 +2080,6 @@ def _process_chunk( and not self._filters and self._dtype != object ): - dest = out[out_selection] # Assume that array-like objects that doesn't have a # `writeable` flag is writable. @@ -2088,7 +2090,6 @@ def _process_chunk( ) if write_direct: - # optimization: we want the whole chunk, and the destination is # contiguous, so we can decompress directly from the chunk # into the destination array @@ -2321,7 +2322,6 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): # to access the existing chunk data if is_scalar(value, self._dtype): - # setup array filled with value chunk = np.empty_like( self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order @@ -2329,7 +2329,6 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): chunk.fill(value) else: - # ensure array is contiguous chunk = value.astype(self._dtype, order=self._order, copy=False) @@ -2337,12 +2336,10 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): # partially replace the contents of this chunk try: - # obtain compressed data for chunk cdata = self.chunk_store[ckey] except KeyError: - # chunk not initialized if self._fill_value is not None: chunk = np.empty_like( @@ -2359,7 +2356,6 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): ) else: - # decode chunk chunk = self._decode_chunk(cdata) if not chunk.flags.writeable: @@ -2429,7 +2425,6 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): return chunk def _encode_chunk(self, chunk): - # apply filters if self._filters: for f in self._filters: @@ -2619,7 +2614,6 @@ def __setstate__(self, state): self.__init__(**state) def _synchronized_op(self, f, *args, **kwargs): - if self._synchronizer is None: # no synchronization lock = nolock @@ -2636,7 +2630,6 @@ def _synchronized_op(self, f, *args, **kwargs): return result def _write_op(self, f, *args, **kwargs): - # guard condition if self._read_only: raise ReadOnlyError() @@ -2676,7 +2669,6 @@ def resize(self, *args): return self._write_op(self._resize_nosync, *args) def _resize_nosync(self, *args): - # normalize new shape argument old_shape = self._shape new_shape = normalize_resize_args(old_shape, *args) @@ -2755,7 +2747,6 @@ def append(self, data, axis=0): return self._write_op(self._append_nosync, data, axis=axis) def _append_nosync(self, data, axis=0): - # ensure data is array-like if not hasattr(data, "shape"): data = np.asanyarray(data, like=self._meta_array) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 77b9415d8b..f3ca73dea8 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -132,7 +132,6 @@ def create_array(self, shape: Union[int, Tuple[int, ...]], **kwargs): return Array(store, **access_array_kwargs) def test_array_init(self): - # normal initialization store = self.create_store() init_array(store, shape=100, chunks=10, dtype=" Tuple[Any, ...]: @@ -2583,7 +2558,6 @@ def expected(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestArrayWithPathV3(TestArrayV3): def test_array_init(self): - store = self.create_store() # can initialize an array without a path init_array(store, shape=100, chunks=10, dtype=" Date: Mon, 30 Oct 2023 12:03:43 +0100 Subject: [PATCH 136/213] =?UTF-8?q?io.open=20=E2=86=92=20open=20(#1421)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Python 3, io.open() is an alias for the builtin open() function: https://docs.python.org/3/library/io.html#io.open Co-authored-by: Davis Bennett Co-authored-by: Josh Moore --- docs/release.rst | 3 +++ zarr/convenience.py | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index a361bbc8d9..2f9b93a361 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -21,6 +21,9 @@ Unreleased Maintenance ~~~~~~~~~~~ +* Change occurrence of ``io.open()`` into ``open()``. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1421`. + * Preserve ``dimension_separator`` when resizing arrays. By :user:`Ziwen Liu ` :issue:`1533`. diff --git a/zarr/convenience.py b/zarr/convenience.py index ff236d0df2..0ee8a8d323 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -1,5 +1,4 @@ """Convenience functions for storing and loading data.""" -import io import itertools import os import re @@ -29,6 +28,8 @@ StoreLike = Union[BaseStore, MutableMapping, str, None] +_builtin_open = open # builtin open is later shadowed by a local open function + def _check_and_update_path(store: BaseStore, path): if getattr(store, "_store_version", 2) > 2 and not path: @@ -491,7 +492,7 @@ def __init__(self, log): elif callable(log): self.log_func = log elif isinstance(log, str): - self.log_file = io.open(log, mode="w") + self.log_file = _builtin_open(log, mode="w") self.needs_closing = True elif hasattr(log, "write"): self.log_file = log From 2240d612464802e6f4e6bc9c6bf8b19ef71bbc13 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 12:05:13 +0100 Subject: [PATCH 137/213] Bump pymongo from 4.4.0 to 4.5.0 (#1507) Bumps [pymongo](https://github.com/mongodb/mongo-python-driver) from 4.4.0 to 4.5.0. - [Release notes](https://github.com/mongodb/mongo-python-driver/releases) - [Changelog](https://github.com/mongodb/mongo-python-driver/blob/master/doc/changelog.rst) - [Commits](https://github.com/mongodb/mongo-python-driver/compare/4.4.0...4.5.0) --- updated-dependencies: - dependency-name: pymongo dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index baa2fbc1e9..3ff11d93e5 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -11,7 +11,7 @@ azure-storage-blob==12.16.0 # pyup: ignore redis==4.6.0 types-redis types-setuptools -pymongo==4.4.0 +pymongo==4.5.0 # optional test requirements coverage pytest-cov==4.1.0 From 71ce63a5d1bd27053eb59005a868581d550a6866 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Tue, 31 Oct 2023 09:32:31 +0100 Subject: [PATCH 138/213] Fix typo newly found by codespell (#1554) --- docs/tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index e3155acfae..f335db18d0 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -1315,7 +1315,7 @@ better performance, at least when using the Blosc compression library. The optimal chunk shape will depend on how you want to access the data. E.g., for a 2-dimensional array, if you only ever take slices along the first -dimension, then chunk across the second dimenson. If you know you want to chunk +dimension, then chunk across the second dimension. If you know you want to chunk across an entire dimension you can use ``None`` or ``-1`` within the ``chunks`` argument, e.g.:: From 15390a1fb5f76193de048dd277ef674d21d80f41 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 31 Oct 2023 09:58:52 +0100 Subject: [PATCH 139/213] Bump actions/setup-python from 4.6.0 to 4.7.1 (#1541) Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4.6.0 to 4.7.1. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4.6.0...v4.7.1) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 238c5e457d..c08bfc6677 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -16,7 +16,7 @@ jobs: submodules: true fetch-depth: 0 - - uses: actions/setup-python@v4.6.0 + - uses: actions/setup-python@v4.7.1 name: Install Python with: python-version: '3.8' From 254b585ef4348608dc8e3a91153ff82ca4245bb4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 31 Oct 2023 10:00:20 +0100 Subject: [PATCH 140/213] Bump pytest-doctestplus from 0.13.0 to 1.0.0 (#1512) Bumps [pytest-doctestplus](https://github.com/scientific-python/pytest-doctestplus) from 0.13.0 to 1.0.0. - [Release notes](https://github.com/scientific-python/pytest-doctestplus/releases) - [Changelog](https://github.com/scientific-python/pytest-doctestplus/blob/main/CHANGES.rst) - [Commits](https://github.com/scientific-python/pytest-doctestplus/compare/v0.13.0...v1.0.0) --- updated-dependencies: - dependency-name: pytest-doctestplus dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 3ff11d93e5..fda3fcc6e5 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -15,7 +15,7 @@ pymongo==4.5.0 # optional test requirements coverage pytest-cov==4.1.0 -pytest-doctestplus==0.13.0 +pytest-doctestplus==1.0.0 pytest-timeout==2.1.0 h5py==3.9.0 fsspec==2023.6.0 From d756626759a9c0eb8a72f2c073a3bdf883b47cef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 31 Oct 2023 10:01:02 +0100 Subject: [PATCH 141/213] Bump fasteners from 0.18 to 0.19 (#1553) Bumps [fasteners](https://github.com/harlowja/fasteners) from 0.18 to 0.19. - [Release notes](https://github.com/harlowja/fasteners/releases) - [Changelog](https://github.com/harlowja/fasteners/blob/main/CHANGELOG.md) - [Commits](https://github.com/harlowja/fasteners/compare/0.18...0.19) --- updated-dependencies: - dependency-name: fasteners dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index 4b1208598e..725e95a8af 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -1,6 +1,6 @@ # library requirements asciitree==0.3.3 -fasteners==0.18 +fasteners==0.19 numcodecs==0.11.0 msgpack-python==0.5.6 setuptools-scm==7.1.0 From cce501a92a56d5ce22bd6cf5d71a72b2ae9d18fe Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 31 Oct 2023 11:09:17 +0000 Subject: [PATCH 142/213] Add links to numcodecs docs in tutorial (#1535) * Fix numcodecs links * Add release note --- docs/conf.py | 1 + docs/release.rst | 8 ++++++++ docs/tutorial.rst | 17 +++++++++-------- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1ffaeddef4..e33d10b2f6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -331,6 +331,7 @@ def setup(app): intersphinx_mapping = { "python": ("https://docs.python.org/", None), "numpy": ("https://numpy.org/doc/stable/", None), + "numcodecs": ("https://numcodecs.readthedocs.io/en/stable/", None), } diff --git a/docs/release.rst b/docs/release.rst index 2f9b93a361..02552a8f93 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,12 @@ Release notes Unreleased ---------- +Docs +~~~~ + +* Add links to ``numcodecs`` docs in the tutorial. + By :user:`David Stansby ` :issue:`1535`. + Maintenance ~~~~~~~~~~~ @@ -33,6 +39,8 @@ Maintenance * Allow ``black`` code formatter to be run with any Python version. By :user:`David Stansby ` :issue:`1549`. + + .. _release_2.16.1: 2.16.1 diff --git a/docs/tutorial.rst b/docs/tutorial.rst index f335db18d0..e563c16040 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -1175,8 +1175,9 @@ A fixed-length unicode dtype is also available, e.g.:: For variable-length strings, the ``object`` dtype can be used, but a codec must be provided to encode the data (see also :ref:`tutorial_objects` below). At the time of writing there are four codecs available that can encode variable length string -objects: :class:`numcodecs.VLenUTF8`, :class:`numcodecs.JSON`, :class:`numcodecs.MsgPack`. -and :class:`numcodecs.Pickle`. E.g. using ``VLenUTF8``:: +objects: :class:`numcodecs.vlen.VLenUTF8`, :class:`numcodecs.json.JSON`, +:class:`numcodecs.msgpacks.MsgPack`. and :class:`numcodecs.pickles.Pickle`. +E.g. using ``VLenUTF8``:: >>> import numcodecs >>> z = zarr.array(text_data, dtype=object, object_codec=numcodecs.VLenUTF8()) @@ -1201,8 +1202,8 @@ is a short-hand for ``dtype=object, object_codec=numcodecs.VLenUTF8()``, e.g.:: 'Helló, világ!', 'Zdravo svete!', 'เฮลโลเวิลด์'], dtype=object) Variable-length byte strings are also supported via ``dtype=object``. Again an -``object_codec`` is required, which can be one of :class:`numcodecs.VLenBytes` or -:class:`numcodecs.Pickle`. For convenience, ``dtype=bytes`` (or ``dtype=str`` on Python +``object_codec`` is required, which can be one of :class:`numcodecs.vlen.VLenBytes` or +:class:`numcodecs.pickles.Pickle`. For convenience, ``dtype=bytes`` (or ``dtype=str`` on Python 2.7) can be used as a short-hand for ``dtype=object, object_codec=numcodecs.VLenBytes()``, e.g.:: @@ -1218,7 +1219,7 @@ e.g.:: b'\xe0\xb9\x80\xe0\xb8\xae\xe0\xb8\xa5\xe0\xb9\x82\xe0\xb8\xa5\xe0\xb9\x80\xe0\xb8\xa7\xe0\xb8\xb4\xe0\xb8\xa5\xe0\xb8\x94\xe0\xb9\x8c'], dtype=object) If you know ahead of time all the possible string values that can occur, you could -also use the :class:`numcodecs.Categorize` codec to encode each unique string value as an +also use the :class:`numcodecs.categorize.Categorize` codec to encode each unique string value as an integer. E.g.:: >>> categorize = numcodecs.Categorize(greetings, dtype=object) @@ -1245,7 +1246,7 @@ The best codec to use will depend on what type of objects are present in the arr At the time of writing there are three codecs available that can serve as a general purpose object codec and support encoding of a mixture of object types: -:class:`numcodecs.JSON`, :class:`numcodecs.MsgPack`. and :class:`numcodecs.Pickle`. +:class:`numcodecs.json.JSON`, :class:`numcodecs.msgpacks.MsgPack`. and :class:`numcodecs.pickles.Pickle`. For example, using the JSON codec:: @@ -1258,7 +1259,7 @@ For example, using the JSON codec:: array([42, 'foo', list(['bar', 'baz', 'qux']), {'a': 1, 'b': 2.2}, None], dtype=object) Not all codecs support encoding of all object types. The -:class:`numcodecs.Pickle` codec is the most flexible, supporting encoding any type +:class:`numcodecs.pickles.Pickle` codec is the most flexible, supporting encoding any type of Python object. However, if you are sharing data with anyone other than yourself, then Pickle is not recommended as it is a potential security risk. This is because malicious code can be embedded within pickled data. The JSON and MsgPack codecs do not have any @@ -1270,7 +1271,7 @@ Ragged arrays If you need to store an array of arrays, where each member array can be of any length and stores the same primitive type (a.k.a. a ragged array), the -:class:`numcodecs.VLenArray` codec can be used, e.g.:: +:class:`numcodecs.vlen.VLenArray` codec can be used, e.g.:: >>> z = zarr.empty(4, dtype=object, object_codec=numcodecs.VLenArray(int)) >>> z From c9532b02c59921065a27c98e94070d0acc5cfca1 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 31 Oct 2023 11:56:22 +0000 Subject: [PATCH 143/213] Fail doc build on warnings (#1548) * Fail doc build on warnings * Add fail on warning to readthedocs * Disable navigation with keys in docs * Add release note --------- Co-authored-by: Josh Moore --- .readthedocs.yaml | 1 + docs/Makefile | 2 +- docs/conf.py | 1 + docs/index.rst | 1 + docs/release.rst | 5 +++++ 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 2124f77271..e8783abe0d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,6 +7,7 @@ build: sphinx: configuration: docs/conf.py + fail_on_warning: true python: install: diff --git a/docs/Makefile b/docs/Makefile index f279d820c6..e6adc1ca8c 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -2,7 +2,7 @@ # # You can set these variables from the command line. -SPHINXOPTS = +SPHINXOPTS = -W --keep-going SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build diff --git a/docs/conf.py b/docs/conf.py index e33d10b2f6..e338348eac 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -144,6 +144,7 @@ }, ], "collapse_navigation": True, + "navigation_with_keys": False, } # Add any paths that contain custom themes here, relative to this directory. diff --git a/docs/index.rst b/docs/index.rst index 97f5889ca5..28a2d0e6aa 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -60,6 +60,7 @@ Zarr is a file storage format for chunked, compressed, N-dimensional arrays base +++ .. button-ref:: tutorial + :ref-type: ref :expand: :color: dark :click-parent: diff --git a/docs/release.rst b/docs/release.rst index 02552a8f93..90ac434475 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -21,6 +21,9 @@ Unreleased Docs ~~~~ +* The documentation build now fails if there are any warnings. + By :user:`David Stansby ` :issue:`1548`. + * Add links to ``numcodecs`` docs in the tutorial. By :user:`David Stansby ` :issue:`1535`. @@ -169,10 +172,12 @@ Major changes * Improve Zarr V3 support, adding partial store read/write and storage transformers. Add new features from the `v3 spec `_: + * storage transformers * `get_partial_values` and `set_partial_values` * efficient `get_partial_values` implementation for `FSStoreV3` * sharding storage transformer + By :user:`Jonathan Striebel `; :issue:`1096`, :issue:`1111`. * N5 nows supports Blosc. From e771c51a465c1461e8d810c93b51bcbf97a945a2 Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Tue, 31 Oct 2023 15:28:13 +0000 Subject: [PATCH 144/213] DOC: Enable offline formats (#1560) * Enable offline builds * Add link for documentation downloads --- .readthedocs.yaml | 2 ++ docs/index.rst | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index e8783abe0d..08cac8d78d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -15,3 +15,5 @@ python: path: . extra_requirements: - docs + +formats: all diff --git a/docs/index.rst b/docs/index.rst index 28a2d0e6aa..06f79b7e7c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,7 +19,7 @@ Zarr-Python **Version**: |version| -**Download documentation**: `Zipped HTML `_ +**Download documentation**: `PDF/Zipped HTML/EPUB `_ **Useful links**: `Installation `_ | From 889d2855f288d2d215cfff5289fe86a54019f5bf Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Fri, 3 Nov 2023 15:39:31 +0530 Subject: [PATCH 145/213] Remove dependency (#1563) * Remove dependency * Update pyproject.toml --------- Co-authored-by: Josh Moore --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 292bfddded..6869cbf834 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,6 @@ docs = [ 'sphinx_design', 'sphinx-issues', 'sphinx-copybutton', - 'sphinx-rtd-theme', 'pydata-sphinx-theme', 'numpydoc', 'numcodecs[msgpack]', From 4ef322337cfb1a7db06024b007e7c2f37d24bc03 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Nov 2023 16:25:10 +0100 Subject: [PATCH 146/213] Bump setuptools-scm from 7.1.0 to 8.0.4 (#1562) Bumps [setuptools-scm](https://github.com/pypa/setuptools_scm) from 7.1.0 to 8.0.4. - [Release notes](https://github.com/pypa/setuptools_scm/releases) - [Changelog](https://github.com/pypa/setuptools_scm/blob/main/CHANGELOG.md) - [Commits](https://github.com/pypa/setuptools_scm/compare/v7.1.0...v8.0.4) --- updated-dependencies: - dependency-name: setuptools-scm dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index 725e95a8af..b189726517 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -3,6 +3,6 @@ asciitree==0.3.3 fasteners==0.19 numcodecs==0.11.0 msgpack-python==0.5.6 -setuptools-scm==7.1.0 +setuptools-scm==8.0.4 # test requirements pytest==7.4.0 From 8a7b41500c5ce739df3f8bb60ed139e4bbac04d9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Nov 2023 16:25:20 +0100 Subject: [PATCH 147/213] Bump redis from 4.6.0 to 5.0.1 (#1561) Bumps [redis](https://github.com/redis/redis-py) from 4.6.0 to 5.0.1. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.6.0...v5.0.1) --- updated-dependencies: - dependency-name: redis dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index fda3fcc6e5..584d0d838d 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.1.0 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.16.0 # pyup: ignore -redis==4.6.0 +redis==5.0.1 types-redis types-setuptools pymongo==4.5.0 From 503efafe85b82d05a438d3ab49bb64708023a300 Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Thu, 23 Nov 2023 15:03:15 +0530 Subject: [PATCH 148/213] Update release.rst (#1574) --- docs/release.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 90ac434475..9873d62896 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -21,15 +21,31 @@ Unreleased Docs ~~~~ +* Minor correction and changes in documentation. + By :user:`Sanket Verma ` :issue:`1509`. + +* Fix typo in documentation. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1554` + * The documentation build now fails if there are any warnings. By :user:`David Stansby ` :issue:`1548`. * Add links to ``numcodecs`` docs in the tutorial. By :user:`David Stansby ` :issue:`1535`. +* Enable offline formats for documentation builds. + By :user:`Sanket Verma ` :issue:`1551`. + +* Minor tweak to advanced indexing tutorial examples. + By :user:`Ross Barnowski ` :issue:`1550`. + + Maintenance ~~~~~~~~~~~ +* Extend copyright notice to 2023. + By :user:`Jack Kelly ` :issue:`1528`. + * Change occurrence of ``io.open()`` into ``open()``. By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1421`. @@ -42,6 +58,8 @@ Maintenance * Allow ``black`` code formatter to be run with any Python version. By :user:`David Stansby ` :issue:`1549`. +* Remove ``sphinx-rtd-theme`` dependency from ``pyproject.toml``. + By :user:`Sanket Verma ` :issue:`1563`. .. _release_2.16.1: From 74764af32d870ecca0e92644e2b4985e568779c5 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Fri, 24 Nov 2023 16:31:31 +0000 Subject: [PATCH 149/213] Automatically document Array members (#1547) * Automatically document Array members * Fix Array link --- .gitignore | 1 + docs/api/core.rst | 23 ++----------------- docs/conf.py | 4 ++++ pyproject.toml | 1 + zarr/core.py | 58 ++--------------------------------------------- 5 files changed, 10 insertions(+), 77 deletions(-) diff --git a/.gitignore b/.gitignore index a6a456636d..7de405d8a0 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ coverage.xml # Sphinx documentation docs/_build/ +docs/_autoapi/ # PyBuilder target/ diff --git a/docs/api/core.rst b/docs/api/core.rst index c4075fdb30..b310460e51 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -1,24 +1,5 @@ The Array class (``zarr.core``) =============================== -.. module:: zarr.core -.. autoclass:: Array - - .. automethod:: __getitem__ - .. automethod:: __setitem__ - .. automethod:: get_basic_selection - .. automethod:: set_basic_selection - .. automethod:: get_mask_selection - .. automethod:: set_mask_selection - .. automethod:: get_block_selection - .. automethod:: set_block_selection - .. automethod:: get_coordinate_selection - .. automethod:: set_coordinate_selection - .. automethod:: get_orthogonal_selection - .. automethod:: set_orthogonal_selection - .. automethod:: digest - .. automethod:: hexdigest - .. automethod:: resize - .. automethod:: append - .. automethod:: view - .. automethod:: astype +.. automodapi:: zarr.core + :no-heading: diff --git a/docs/conf.py b/docs/conf.py index e338348eac..318843a9fb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,6 +42,7 @@ "sphinx.ext.autosummary", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", + "sphinx_automodapi.automodapi", "numpydoc", "sphinx_issues", "sphinx_copybutton", @@ -52,6 +53,9 @@ numpydoc_class_members_toctree = False issues_github_path = "zarr-developers/zarr-python" +automodapi_inheritance_diagram = False +automodapi_toctreedirnm = "_autoapi" + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/pyproject.toml b/pyproject.toml index 6869cbf834..22ea19f28f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ jupyter = [ ] docs = [ 'sphinx', + 'sphinx-automodapi', 'sphinx_design', 'sphinx-issues', 'sphinx-copybutton', diff --git a/zarr/core.py b/zarr/core.py index 2177e9055c..c07a31e95f 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -60,6 +60,8 @@ ensure_ndarray_like, ) +__all__ = ["Array"] + # noinspection PyUnresolvedReferences class Array: @@ -110,62 +112,6 @@ class Array: to users. Use `numpy.empty(())` by default. .. versionadded:: 2.13 - - - Attributes - ---------- - store - path - name - read_only - chunk_store - shape - chunks - dtype - compression - compression_opts - dimension_separator - fill_value - order - synchronizer - filters - attrs - size - itemsize - nbytes - nbytes_stored - cdata_shape - nchunks - nchunks_initialized - is_view - info - vindex - oindex - blocks - write_empty_chunks - meta_array - - Methods - ------- - __getitem__ - __setitem__ - get_basic_selection - set_basic_selection - get_orthogonal_selection - set_orthogonal_selection - get_mask_selection - set_mask_selection - get_coordinate_selection - set_coordinate_selection - get_block_selection - set_block_selection - digest - hexdigest - resize - append - view - astype - """ def __init__( From b93860a75760dced732273747871fa5502c310e3 Mon Sep 17 00:00:00 2001 From: Ross Barnowski Date: Fri, 24 Nov 2023 09:42:25 -0800 Subject: [PATCH 150/213] DOC: Minor tweak to advanced indexing example in tutorial (#1550) * DOC: Update advanced indexing example. Suggestion to modify the advanced indexing example so that the indices and the values in the array differ. * DOC: Fix malformed doctest comment. * DOC: Rm reference to virtualenv from contributor guide. --------- Co-authored-by: Davis Bennett --- docs/contributing.rst | 6 +++--- docs/tutorial.rst | 26 +++++++++++++------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/contributing.rst b/docs/contributing.rst index 0420535093..91606b7276 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -85,9 +85,9 @@ Creating a development environment To work with the Zarr source code, it is recommended to set up a Python virtual environment and install all Zarr dependencies using the same versions as are used by the core developers and continuous integration services. Assuming you have a Python -3 interpreter already installed, and have also installed the virtualenv package, and -you have cloned the Zarr source code and your current working directory is the root of -the repository, you can do something like the following:: +3 interpreter already installed, and you have cloned the Zarr source code and your +current working directory is the root of the repository, you can do something like +the following:: $ mkdir -p ~/pyenv/zarr-dev $ python -m venv ~/pyenv/zarr-dev diff --git a/docs/tutorial.rst b/docs/tutorial.rst index e563c16040..4099bac1c8 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -480,17 +480,17 @@ Indexing with coordinate arrays Items from a Zarr array can be extracted by providing an integer array of coordinates. E.g.:: - >>> z = zarr.array(np.arange(10)) + >>> z = zarr.array(np.arange(10) ** 2) >>> z[:] - array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - >>> z.get_coordinate_selection([1, 4]) - array([1, 4]) + array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81]) + >>> z.get_coordinate_selection([2, 5]) + array([ 4, 25]) Coordinate arrays can also be used to update data, e.g.:: - >>> z.set_coordinate_selection([1, 4], [-1, -2]) + >>> z.set_coordinate_selection([2, 5], [-1, -2]) >>> z[:] - array([ 0, -1, 2, 3, -2, 5, 6, 7, 8, 9]) + array([ 0, 1, -1, 9, 16, -2, 36, 49, 64, 81]) For multidimensional arrays, coordinates must be provided for each dimension, e.g.:: @@ -534,17 +534,17 @@ Indexing with a mask array Items can also be extracted by providing a Boolean mask. E.g.:: - >>> z = zarr.array(np.arange(10)) + >>> z = zarr.array(np.arange(10) ** 2) >>> z[:] - array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81]) >>> sel = np.zeros_like(z, dtype=bool) - >>> sel[1] = True - >>> sel[4] = True + >>> sel[2] = True + >>> sel[5] = True >>> z.get_mask_selection(sel) - array([1, 4]) + array([ 4, 25]) >>> z.set_mask_selection(sel, [-1, -2]) >>> z[:] - array([ 0, -1, 2, 3, -2, 5, 6, 7, 8, 9]) + array([ 0, 1, -1, 9, 16, -2, 36, 49, 64, 81]) Here's a multidimensional example:: @@ -986,7 +986,7 @@ It is also possible to initialize the filesystem outside of Zarr and then pass it through. This requires creating an :class:`zarr.storage.FSStore` object explicitly. For example:: - >>> import s3fs * doctest: +SKIP + >>> import s3fs # doctest: +SKIP >>> fs = s3fs.S3FileSystem(anon=True) # doctest: +SKIP >>> store = zarr.storage.FSStore('/zarr-demo/store', fs=fs) # doctest: +SKIP >>> g = zarr.open_group(store) # doctest: +SKIP From bdbecc7e67b0bd55b99662d213aa066a46195f1b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Nov 2023 21:11:52 +0100 Subject: [PATCH 151/213] Bump h5py from 3.9.0 to 3.10.0 (#1571) Bumps [h5py](https://github.com/h5py/h5py) from 3.9.0 to 3.10.0. - [Release notes](https://github.com/h5py/h5py/releases) - [Changelog](https://github.com/h5py/h5py/blob/master/docs/release_guide.rst) - [Commits](https://github.com/h5py/h5py/compare/3.9.0...3.10.0) --- updated-dependencies: - dependency-name: h5py dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 584d0d838d..ff45c25944 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -17,7 +17,7 @@ coverage pytest-cov==4.1.0 pytest-doctestplus==1.0.0 pytest-timeout==2.1.0 -h5py==3.9.0 +h5py==3.10.0 fsspec==2023.6.0 s3fs==2023.6.0 moto[server]>=4.0.8 From d641bbceb67bdf915e0afd2259d2fe6043a8c71b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Nov 2023 22:31:34 +0100 Subject: [PATCH 152/213] Bump fsspec from 2023.6.0 to 2023.10.0 (#1570) * Bump fsspec from 2023.6.0 to 2023.10.0 Bumps [fsspec](https://github.com/fsspec/filesystem_spec) from 2023.6.0 to 2023.10.0. - [Commits](https://github.com/fsspec/filesystem_spec/compare/2023.6.0...2023.10.0) --- updated-dependencies: - dependency-name: fsspec dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Bump s3fs as well --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index ff45c25944..bf2965a8fa 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,6 +18,6 @@ pytest-cov==4.1.0 pytest-doctestplus==1.0.0 pytest-timeout==2.1.0 h5py==3.10.0 -fsspec==2023.6.0 -s3fs==2023.6.0 +fsspec==2023.10.0 +s3fs==2023.10.0 moto[server]>=4.0.8 From 87b034c60d98514778cbe75cd94fc0ce7fe6990c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:16:15 +0100 Subject: [PATCH 153/213] Bump pytest-timeout from 2.1.0 to 2.2.0 (#1577) Bumps [pytest-timeout](https://github.com/pytest-dev/pytest-timeout) from 2.1.0 to 2.2.0. - [Commits](https://github.com/pytest-dev/pytest-timeout/compare/2.1.0...2.2.0) --- updated-dependencies: - dependency-name: pytest-timeout dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index bf2965a8fa..f3ea80a546 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -16,7 +16,7 @@ pymongo==4.5.0 coverage pytest-cov==4.1.0 pytest-doctestplus==1.0.0 -pytest-timeout==2.1.0 +pytest-timeout==2.2.0 h5py==3.10.0 fsspec==2023.10.0 s3fs==2023.10.0 From 5696991f7552c21f53ab80bedd9aba33e0c6c78e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:16:34 +0100 Subject: [PATCH 154/213] Bump pytest from 7.4.0 to 7.4.3 (#1576) Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.4.0 to 7.4.3. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.4.0...7.4.3) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index b189726517..e2be6eb825 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -5,4 +5,4 @@ numcodecs==0.11.0 msgpack-python==0.5.6 setuptools-scm==8.0.4 # test requirements -pytest==7.4.0 +pytest==7.4.3 From 039d3ba7ac72741b1493dbe8c5649f3cc7e1aab9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:18:19 +0100 Subject: [PATCH 155/213] Bump conda-incubator/setup-miniconda from 2.2.0 to 2.3.0 (#1575) Bumps [conda-incubator/setup-miniconda](https://github.com/conda-incubator/setup-miniconda) from 2.2.0 to 2.3.0. - [Release notes](https://github.com/conda-incubator/setup-miniconda/releases) - [Changelog](https://github.com/conda-incubator/setup-miniconda/blob/main/CHANGELOG.md) - [Commits](https://github.com/conda-incubator/setup-miniconda/compare/v2.2.0...v2.3.0) --- updated-dependencies: - dependency-name: conda-incubator/setup-miniconda dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/minimal.yml | 2 +- .github/workflows/python-package.yml | 2 +- .github/workflows/windows-testing.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml index 63bc97d157..2c0cd45ca9 100644 --- a/.github/workflows/minimal.yml +++ b/.github/workflows/minimal.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2.2.0 + uses: conda-incubator/setup-miniconda@v2.3.0 with: channels: conda-forge environment-file: environment.yml diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index cdf230bc7c..aa7158f1cf 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -42,7 +42,7 @@ jobs: with: fetch-depth: 0 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2.2.0 + uses: conda-incubator/setup-miniconda@v2.3.0 with: channels: conda-forge python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index 3afa8c467e..78945e97aa 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -21,7 +21,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: conda-incubator/setup-miniconda@v2.2.0 + - uses: conda-incubator/setup-miniconda@v2.3.0 with: auto-update-conda: true python-version: ${{ matrix.python-version }} From d40bf1270ceb439b5d4229973197cd2f5e8976ea Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Wed, 29 Nov 2023 15:01:53 +0530 Subject: [PATCH 156/213] Update CoC to the latest version (#1572) * Update CoC * Update email address * Remove CODE_OF_CONDUCT.md --- CODE_OF_CONDUCT.md | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 CODE_OF_CONDUCT.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index f07035c69f..0000000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,46 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at zarr.conduct@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [https://www.contributor-covenant.org/version/1/4][version] - -[homepage]: https://www.contributor-covenant.org -[version]: https://www.contributor-covenant.org/version/1/4 From ea90564e5f5276a37df06f7d3dadf90faa210d7f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Dec 2023 09:13:27 +0100 Subject: [PATCH 157/213] Bump pypa/gh-action-pypi-publish from 1.8.10 to 1.8.11 (#1586) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.10 to 1.8.11. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.10...v1.8.11) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index c08bfc6677..3bd25bfbf7 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -64,7 +64,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.8.10 + - uses: pypa/gh-action-pypi-publish@v1.8.11 with: user: __token__ password: ${{ secrets.pypi_password }} From 79e80b36b14c50c6d522f0fe0caaee0bbfbce1a1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Dec 2023 09:13:52 +0100 Subject: [PATCH 158/213] Bump conda-incubator/setup-miniconda from 2.3.0 to 3.0.1 (#1587) Bumps [conda-incubator/setup-miniconda](https://github.com/conda-incubator/setup-miniconda) from 2.3.0 to 3.0.1. - [Release notes](https://github.com/conda-incubator/setup-miniconda/releases) - [Changelog](https://github.com/conda-incubator/setup-miniconda/blob/main/CHANGELOG.md) - [Commits](https://github.com/conda-incubator/setup-miniconda/compare/v2.3.0...v3.0.1) --- updated-dependencies: - dependency-name: conda-incubator/setup-miniconda dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/minimal.yml | 2 +- .github/workflows/python-package.yml | 2 +- .github/workflows/windows-testing.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml index 2c0cd45ca9..2cc0213781 100644 --- a/.github/workflows/minimal.yml +++ b/.github/workflows/minimal.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2.3.0 + uses: conda-incubator/setup-miniconda@v3.0.1 with: channels: conda-forge environment-file: environment.yml diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index aa7158f1cf..0c3c49d78d 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -42,7 +42,7 @@ jobs: with: fetch-depth: 0 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2.3.0 + uses: conda-incubator/setup-miniconda@v3.0.1 with: channels: conda-forge python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index 78945e97aa..eeee5b704d 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -21,7 +21,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: conda-incubator/setup-miniconda@v2.3.0 + - uses: conda-incubator/setup-miniconda@v3.0.1 with: auto-update-conda: true python-version: ${{ matrix.python-version }} From 25dbeeda7d3a300569b358c157c5bd1c02ddaec3 Mon Sep 17 00:00:00 2001 From: Janick Martinez Esturo Date: Tue, 5 Dec 2023 22:31:03 +0100 Subject: [PATCH 159/213] * Cache result of FSStore._fsspec_installed (#1581) Prevent runtime-overhead in doing this check multiple times --- docs/release.rst | 3 +++ zarr/storage.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 9873d62896..842c36e290 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -43,6 +43,9 @@ Docs Maintenance ~~~~~~~~~~~ +* Cache result of ``FSStore._fsspec_installed()``. + By :user:`Janick Martinez Esturo ` :issue:`1581`. + * Extend copyright notice to 2023. By :user:`Jack Kelly ` :issue:`1528`. diff --git a/zarr/storage.py b/zarr/storage.py index b36f804ebd..a7426e5345 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -28,6 +28,7 @@ import zipfile from collections import OrderedDict from collections.abc import MutableMapping +from functools import lru_cache from os import scandir from pickle import PicklingError from threading import Lock, RLock @@ -1540,6 +1541,7 @@ def clear(self): self.map.clear() @classmethod + @lru_cache(maxsize=None) def _fsspec_installed(cls): """Returns true if fsspec is installed""" import importlib.util From 8579e21c80927afbc26153153ca8eedc91a6ff6f Mon Sep 17 00:00:00 2001 From: David Stansby Date: Thu, 7 Dec 2023 21:15:55 +0000 Subject: [PATCH 160/213] Bump version of black in pre-commit (#1559) --- .pre-commit-config.yaml | 2 +- bench/compress_normal.py | 1 - zarr/_storage/absstore.py | 3 +- zarr/_storage/store.py | 1 - zarr/_storage/v3.py | 1 - zarr/attrs.py | 6 ---- zarr/convenience.py | 20 +++-------- zarr/creation.py | 2 -- zarr/hierarchy.py | 12 +++---- zarr/indexing.py | 25 -------------- zarr/meta.py | 1 - zarr/n5.py | 57 -------------------------------- zarr/storage.py | 5 --- zarr/tests/test_attrs.py | 6 ---- zarr/tests/test_convenience.py | 7 ---- zarr/tests/test_creation.py | 9 ----- zarr/tests/test_dim_separator.py | 1 - zarr/tests/test_filters.py | 12 ------- zarr/tests/test_hierarchy.py | 3 -- zarr/tests/test_indexing.py | 35 -------------------- zarr/tests/test_info.py | 1 - zarr/tests/test_meta.py | 19 ----------- zarr/tests/test_storage.py | 20 ----------- zarr/tests/test_storage_v3.py | 10 ------ zarr/tests/test_sync.py | 2 -- zarr/tests/test_util.py | 2 -- zarr/util.py | 6 ---- 27 files changed, 11 insertions(+), 258 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f22dc39832..e985d24000 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: # Respect `exclude` and `extend-exclude` settings. args: ["--force-exclude"] - repo: https://github.com/psf/black - rev: 22.12.0 + rev: 23.10.1 hooks: - id: black - repo: https://github.com/codespell-project/codespell diff --git a/bench/compress_normal.py b/bench/compress_normal.py index 9f1655541c..803d54b76b 100644 --- a/bench/compress_normal.py +++ b/bench/compress_normal.py @@ -8,7 +8,6 @@ from zarr import blosc if __name__ == "__main__": - sys.path.insert(0, "..") # setup diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index f62529f096..c9a113148c 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -87,7 +87,7 @@ def __init__( "https://{}.blob.core.windows.net/".format(account_name), container, credential=account_key, - **blob_service_kwargs + **blob_service_kwargs, ) self.client = client @@ -240,7 +240,6 @@ def __setitem__(self, key, value): super().__setitem__(key, value) def rmdir(self, path=None): - if not path: # Currently allowing clear to delete everything as in v2 diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 8daedae48f..80e4ad8f75 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -629,7 +629,6 @@ def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: - meta_dir = meta_root + path meta_dir = meta_dir.rstrip("/") _rmdir_from_keys(store, meta_dir) diff --git a/zarr/_storage/v3.py b/zarr/_storage/v3.py index 00dc085dac..32e78f7a34 100644 --- a/zarr/_storage/v3.py +++ b/zarr/_storage/v3.py @@ -118,7 +118,6 @@ def _get_files_and_dirs_from_path(store, path): class FSStoreV3(FSStore, StoreV3): - # FSStoreV3 doesn't use this (FSStore uses it within _normalize_key) _META_KEYS = () diff --git a/zarr/attrs.py b/zarr/attrs.py index 01fc617b3c..e967c5b853 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -26,7 +26,6 @@ class Attributes(MutableMapping): """ def __init__(self, store, key=".zattrs", read_only=False, cache=True, synchronizer=None): - self._version = getattr(store, "_store_version", 2) _Store = Store if self._version == 2 else StoreV3 self.store = _Store._ensure_store(store) @@ -73,7 +72,6 @@ def __getitem__(self, item): return self.asdict()[item] def _write_op(self, f, *args, **kwargs): - # guard condition if self.read_only: raise PermissionError("attributes are read-only") @@ -89,7 +87,6 @@ def __setitem__(self, item, value): self._write_op(self._setitem_nosync, item, value) def _setitem_nosync(self, item, value): - # load existing data d = self._get_nosync() @@ -106,7 +103,6 @@ def __delitem__(self, item): self._write_op(self._delitem_nosync, item) def _delitem_nosync(self, key): - # load existing data d = self._get_nosync() @@ -128,7 +124,6 @@ def put(self, d): self._write_op(self._put_nosync, dict(attributes=d)) def _put_nosync(self, d): - d_to_check = d if self._version == 2 else d["attributes"] if not all(isinstance(item, str) for item in d_to_check): # TODO: Raise an error for non-string keys @@ -178,7 +173,6 @@ def update(self, *args, **kwargs): self._write_op(self._update_nosync, *args, **kwargs) def _update_nosync(self, *args, **kwargs): - # load existing data d = self._get_nosync() diff --git a/zarr/convenience.py b/zarr/convenience.py index 0ee8a8d323..9c0deeea47 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -675,10 +675,8 @@ def copy_store( # setup logging with _LogWriter(log) as log: - # iterate over source keys for source_key in sorted(source.keys()): - # filter to keys under source path if source_store_version == 2: if not source_key.startswith(source_path): @@ -757,7 +755,7 @@ def copy( log=None, if_exists="raise", dry_run=False, - **create_kws + **create_kws, ): """Copy the `source` array or group into the `dest` group. @@ -878,7 +876,6 @@ def copy( # setup logging with _LogWriter(log) as log: - # do the copying n_copied, n_skipped, n_bytes_copied = _copy( log, @@ -890,7 +887,7 @@ def copy( without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, - **create_kws + **create_kws, ) # log a final message with a summary of what happened @@ -948,12 +945,10 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ # take action if do_copy: - # log a message about what we're going to do log("copy {} {} {}".format(source.name, source.shape, source.dtype)) if not dry_run: - # clear the way if exists: del dest[name] @@ -1038,12 +1033,10 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ # take action if do_copy: - # log action log("copy {}".format(source.name)) if not dry_run: - # clear the way if exists_array: del dest[name] @@ -1056,7 +1049,6 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ grp.attrs.update(source.attrs) else: - # setup for dry run without creating any groups in the # destination if dest is not None: @@ -1076,7 +1068,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, - **create_kws + **create_kws, ) n_copied += c n_skipped += s @@ -1099,7 +1091,7 @@ def copy_all( log=None, if_exists="raise", dry_run=False, - **create_kws + **create_kws, ): """Copy all children of the `source` group into the `dest` group. @@ -1189,7 +1181,6 @@ def copy_all( # setup logging with _LogWriter(log) as log: - for k in source.keys(): c, s, b = _copy( log, @@ -1201,7 +1192,7 @@ def copy_all( without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, - **create_kws + **create_kws, ) n_copied += c n_skipped += s @@ -1262,7 +1253,6 @@ def is_zarr_key(key): return key.endswith(".zarray") or key.endswith(".zgroup") or key.endswith(".zattrs") else: - assert_zarr_v3_api_available() sfx = _get_metadata_suffix(store) # type: ignore diff --git a/zarr/creation.py b/zarr/creation.py index 726d0b5932..6227f90b7b 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -234,7 +234,6 @@ def create( def _kwargs_compat(compressor, fill_value, kwargs): - # to be compatible with h5py, as well as backwards-compatible with Zarr # 1.x, accept 'compression' and 'compression_opts' keyword arguments @@ -697,7 +696,6 @@ def open_array( def _like_args(a, kwargs): - shape, chunks = _get_shape_chunks(a) if shape is not None: kwargs.setdefault("shape", shape) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 3361969f08..1cfea89c81 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -145,7 +145,7 @@ def __init__( synchronizer=None, zarr_version=None, *, - meta_array=None + meta_array=None, ): store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) if zarr_version is None: @@ -919,7 +919,6 @@ def tree(self, expand=False, level=None): return TreeViewer(self, expand=expand, level=level) def _write_op(self, f, *args, **kwargs): - # guard condition if self._read_only: raise ReadOnlyError() @@ -1094,7 +1093,6 @@ def create_dataset(self, name, **kwargs): return self._write_op(self._create_dataset_nosync, name, **kwargs) def _create_dataset_nosync(self, name, data=None, **kwargs): - assert "mode" not in kwargs path = self._item_path(name) @@ -1138,11 +1136,9 @@ def require_dataset(self, name, shape, dtype=None, exact=False, **kwargs): ) def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, **kwargs): - path = self._item_path(name) if contains_array(self._store, path): - # array already exists at path, validate that it is the right shape and type synchronizer = kwargs.get("synchronizer", self._synchronizer) @@ -1235,7 +1231,7 @@ def _full_nosync(self, name, fill_value, **kwargs): path=path, chunk_store=self._chunk_store, fill_value=fill_value, - **kwargs + **kwargs, ) def array(self, name, data, **kwargs): @@ -1361,7 +1357,7 @@ def group( path=None, *, zarr_version=None, - meta_array=None + meta_array=None, ): """Create a group. @@ -1452,7 +1448,7 @@ def open_group( storage_options=None, *, zarr_version=None, - meta_array=None + meta_array=None, ): """Open a group using file-mode-like semantics. diff --git a/zarr/indexing.py b/zarr/indexing.py index 487cc8b9d9..3042147ebb 100644 --- a/zarr/indexing.py +++ b/zarr/indexing.py @@ -111,7 +111,6 @@ def is_pure_orthogonal_indexing(selection, ndim): def normalize_integer_selection(dim_sel, dim_len): - # normalize type to int dim_sel = int(dim_sel) @@ -145,7 +144,6 @@ def normalize_integer_selection(dim_sel, dim_len): class IntDimIndexer: def __init__(self, dim_sel, dim_len, dim_chunk_len): - # normalize dim_sel = normalize_integer_selection(dim_sel, dim_len) @@ -169,7 +167,6 @@ def ceildiv(a, b): class SliceDimIndexer: def __init__(self, dim_sel, dim_len, dim_chunk_len): - # normalize self.start, self.stop, self.step = dim_sel.indices(dim_len) if self.step < 1: @@ -182,14 +179,12 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) def __iter__(self): - # figure out the range of chunks we need to visit dim_chunk_ix_from = self.start // self.dim_chunk_len dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len) # iterate over chunks in range for dim_chunk_ix in range(dim_chunk_ix_from, dim_chunk_ix_to): - # compute offsets for chunk within overall array dim_offset = dim_chunk_ix * self.dim_chunk_len dim_limit = min(self.dim_len, (dim_chunk_ix + 1) * self.dim_chunk_len) @@ -237,7 +232,6 @@ def check_selection_length(selection, shape): def replace_ellipsis(selection, shape): - selection = ensure_tuple(selection) # count number of ellipsis present @@ -330,14 +324,12 @@ def is_basic_selection(selection): # noinspection PyProtectedMember class BasicIndexer: def __init__(self, selection, array): - # handle ellipsis selection = replace_ellipsis(selection, array._shape) # setup per-dimension indexers dim_indexers = [] for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): - if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) @@ -358,7 +350,6 @@ def __init__(self, selection, array): def __iter__(self): for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) out_selection = tuple( @@ -370,7 +361,6 @@ def __iter__(self): class BoolArrayDimIndexer: def __init__(self, dim_sel, dim_len, dim_chunk_len): - # check number of dimensions if not is_bool_array(dim_sel, 1): raise IndexError( @@ -402,10 +392,8 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0] def __iter__(self): - # iterate over chunks with at least one item for dim_chunk_ix in self.dim_chunk_ixs: - # find region in chunk dim_offset = dim_chunk_ix * self.dim_chunk_len dim_chunk_sel = self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] @@ -472,7 +460,6 @@ def __init__( boundscheck=True, order=Order.UNKNOWN, ): - # ensure 1d array dim_sel = np.asanyarray(dim_sel) if not is_integer_array(dim_sel, 1): @@ -526,9 +513,7 @@ def __init__( self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) def __iter__(self): - for dim_chunk_ix in self.dim_chunk_ixs: - # find region in output if dim_chunk_ix == 0: start = 0 @@ -602,7 +587,6 @@ def oindex_set(a, selection, value): # noinspection PyProtectedMember class OrthogonalIndexer: def __init__(self, selection, array): - # handle ellipsis selection = replace_ellipsis(selection, array._shape) @@ -612,7 +596,6 @@ def __init__(self, selection, array): # setup per-dimension indexers dim_indexers = [] for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): - if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) @@ -649,7 +632,6 @@ def __init__(self, selection, array): def __iter__(self): for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) out_selection = tuple( @@ -658,7 +640,6 @@ def __iter__(self): # handle advanced indexing arrays orthogonally if self.is_advanced: - # N.B., numpy doesn't support orthogonal indexing directly as yet, # so need to work around via np.ix_. Also np.ix_ does not support a # mixture of arrays and slices or integers, so need to convert slices @@ -692,7 +673,6 @@ def __setitem__(self, selection, value): # noinspection PyProtectedMember class BlockIndexer: def __init__(self, selection, array): - # handle ellipsis selection = replace_ellipsis(selection, array._shape) @@ -794,7 +774,6 @@ def is_mask_selection(selection, array): # noinspection PyProtectedMember class CoordinateIndexer: def __init__(self, selection, array): - # some initial normalization selection = ensure_tuple(selection) selection = tuple([i] if is_integer(i) else i for i in selection) @@ -810,7 +789,6 @@ def __init__(self, selection, array): # handle wraparound, boundscheck for dim_sel, dim_len in zip(selection, array.shape): - # handle wraparound wraparound_indices(dim_sel, dim_len) @@ -861,10 +839,8 @@ def __init__(self, selection, array): self.chunk_mixs = np.unravel_index(self.chunk_rixs, array._cdata_shape) def __iter__(self): - # iterate over chunks for i, chunk_rix in enumerate(self.chunk_rixs): - chunk_coords = tuple(m[i] for m in self.chunk_mixs) if chunk_rix == 0: start = 0 @@ -891,7 +867,6 @@ def __iter__(self): # noinspection PyProtectedMember class MaskIndexer(CoordinateIndexer): def __init__(self, selection, array): - # some initial normalization selection = ensure_tuple(selection) selection = replace_lists(selection) diff --git a/zarr/meta.py b/zarr/meta.py index 48791ddf17..f23889f3ea 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -89,7 +89,6 @@ class Metadata2: @classmethod def parse_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - # Here we allow that a store may return an already-parsed metadata object, # or a string of JSON that we will parse here. We allow for an already-parsed # object to accommodate a consolidated metadata store, where all the metadata for diff --git a/zarr/n5.py b/zarr/n5.py index 7e73905527..44b44e69e2 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -72,21 +72,18 @@ class N5Store(NestedDirectoryStore): def __getitem__(self, key: str) -> bytes: if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) value = group_metadata_to_zarr(self._load_n5_attrs(key_new)) return json_dumps(value) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) top_level = key == zarr_array_meta_key value = array_metadata_to_zarr(self._load_n5_attrs(key_new), top_level=top_level) return json_dumps(value) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) value = attrs_to_zarr(self._load_n5_attrs(key_new)) @@ -104,9 +101,7 @@ def __getitem__(self, key: str) -> bytes: return super().__getitem__(key_new) def __setitem__(self, key: str, value: Any): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) n5_attrs = self._load_n5_attrs(key_new) @@ -115,7 +110,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) top_level = key == zarr_array_meta_key n5_attrs = self._load_n5_attrs(key_new) @@ -123,7 +117,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) n5_attrs = self._load_n5_attrs(key_new) @@ -166,9 +159,7 @@ def __delitem__(self, key: str): super().__delitem__(key_new) def __contains__(self, key): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) if key_new not in self: return False @@ -176,18 +167,15 @@ def __contains__(self, key): return "dimensions" not in self._load_n5_attrs(key_new) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) # array if attributes contain 'dimensions' return "dimensions" in self._load_n5_attrs(key_new) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) return self._contains_attrs(key_new) elif is_chunk_key(key): - key_new = invert_chunk_coords(key) else: key_new = key @@ -198,7 +186,6 @@ def __eq__(self, other): return isinstance(other, N5Store) and self.path == other.path def listdir(self, path: Optional[str] = None): - if path is not None: path = invert_chunk_coords(path) path = cast(str, path) @@ -208,7 +195,6 @@ def listdir(self, path: Optional[str] = None): children = super().listdir(path=path) if self._is_array(path): - # replace n5 attribute file with respective zarr attribute files children.remove(n5_attrs_key) children.append(zarr_array_meta_key) @@ -234,7 +220,6 @@ def listdir(self, path: Optional[str] = None): return sorted(new_children) elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files children.remove(n5_attrs_key) children.append(zarr_group_meta_key) @@ -244,7 +229,6 @@ def listdir(self, path: Optional[str] = None): return sorted(children) else: - return children def _load_n5_attrs(self, path: str) -> Dict[str, Any]: @@ -255,7 +239,6 @@ def _load_n5_attrs(self, path: str) -> Dict[str, Any]: return {} def _is_group(self, path: str): - if path is None: attrs_key = n5_attrs_key else: @@ -265,7 +248,6 @@ def _is_group(self, path: str): return len(n5_attrs) > 0 and "dimensions" not in n5_attrs def _is_array(self, path: str): - if path is None: attrs_key = n5_attrs_key else: @@ -274,7 +256,6 @@ def _is_array(self, path: str): return "dimensions" in self._load_n5_attrs(attrs_key) def _contains_attrs(self, path: str): - if path is None: attrs_key = n5_attrs_key else: @@ -376,21 +357,18 @@ def _normalize_key(self, key: str): def __getitem__(self, key: str) -> bytes: if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) value = group_metadata_to_zarr(self._load_n5_attrs(key_new)) return json_dumps(value) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) top_level = key == zarr_array_meta_key value = array_metadata_to_zarr(self._load_n5_attrs(key_new), top_level=top_level) return json_dumps(value) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) value = attrs_to_zarr(self._load_n5_attrs(key_new)) @@ -409,7 +387,6 @@ def __getitem__(self, key: str) -> bytes: def __setitem__(self, key: str, value: Any): if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) n5_attrs = self._load_n5_attrs(key_new) @@ -418,7 +395,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) top_level = key == zarr_array_meta_key n5_attrs = self._load_n5_attrs(key_new) @@ -427,7 +403,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) n5_attrs = self._load_n5_attrs(key_new) @@ -456,7 +431,6 @@ def __setitem__(self, key: str, value: Any): super().__setitem__(key_new, value) def __delitem__(self, key: str): - if key.endswith(zarr_group_meta_key): key_new = key.replace(zarr_group_meta_key, self._group_meta_key) elif key.endswith(zarr_array_meta_key): @@ -471,7 +445,6 @@ def __delitem__(self, key: str): def __contains__(self, key: Any): if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) if key_new not in self: return False @@ -479,13 +452,11 @@ def __contains__(self, key: Any): return "dimensions" not in self._load_n5_attrs(key_new) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) # array if attributes contain 'dimensions' return "dimensions" in self._load_n5_attrs(key_new) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) return self._contains_attrs(key_new) @@ -508,7 +479,6 @@ def listdir(self, path: Optional[str] = None): # doesn't provide. children = super().listdir(path=path) if self._is_array(path): - # replace n5 attribute file with respective zarr attribute files children.remove(self._array_meta_key) children.append(zarr_array_meta_key) @@ -532,7 +502,6 @@ def listdir(self, path: Optional[str] = None): return sorted(new_children) elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files children.remove(self._group_meta_key) children.append(zarr_group_meta_key) @@ -550,7 +519,6 @@ def _load_n5_attrs(self, path: str): return {} def _is_group(self, path: Optional[str]): - if path is None: attrs_key = self._attrs_key else: @@ -560,7 +528,6 @@ def _is_group(self, path: Optional[str]): return len(n5_attrs) > 0 and "dimensions" not in n5_attrs def _is_array(self, path: Optional[str]): - if path is None: attrs_key = self._attrs_key else: @@ -569,7 +536,6 @@ def _is_array(self, path: Optional[str]): return "dimensions" in self._load_n5_attrs(attrs_key) def _contains_attrs(self, path: Optional[str]): - if path is None: attrs_key = self._attrs_key else: @@ -712,7 +678,6 @@ def attrs_to_zarr(attrs: Dict[str, Any]) -> Dict[str, Any]: def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict[str, Any]: - if compressor_config is None: return {"type": "raw"} else: @@ -726,19 +691,16 @@ def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict n5_config = {"type": codec_id} if codec_id == "bz2": - n5_config["type"] = "bzip2" n5_config["blockSize"] = _compressor_config["level"] elif codec_id == "blosc": - n5_config["cname"] = _compressor_config["cname"] n5_config["clevel"] = _compressor_config["clevel"] n5_config["shuffle"] = _compressor_config["shuffle"] n5_config["blocksize"] = _compressor_config["blocksize"] elif codec_id == "lzma": - # Switch to XZ for N5 if we are using the default XZ format. # Note: 4 is the default, which is lzma.CHECK_CRC64. if _compressor_config["format"] == 1 and _compressor_config["check"] in [-1, 4]: @@ -760,50 +722,42 @@ def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict n5_config["preset"] = 6 elif codec_id == "zlib": - n5_config["type"] = "gzip" n5_config["level"] = _compressor_config["level"] n5_config["useZlib"] = True elif codec_id == "gzip": - n5_config["type"] = "gzip" n5_config["level"] = _compressor_config["level"] n5_config["useZlib"] = False else: - n5_config.update({k: v for k, v in _compressor_config.items() if k != "type"}) return n5_config def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dict[str, Any]]: - codec_id = compressor_config["type"] zarr_config = {"id": codec_id} if codec_id == "bzip2": - zarr_config["id"] = "bz2" zarr_config["level"] = compressor_config["blockSize"] elif codec_id == "blosc": - zarr_config["cname"] = compressor_config["cname"] zarr_config["clevel"] = compressor_config["clevel"] zarr_config["shuffle"] = compressor_config["shuffle"] zarr_config["blocksize"] = compressor_config["blocksize"] elif codec_id == "lzma": - zarr_config["format"] = compressor_config["format"] zarr_config["check"] = compressor_config["check"] zarr_config["preset"] = compressor_config["preset"] zarr_config["filters"] = compressor_config["filters"] elif codec_id == "xz": - zarr_config["id"] = "lzma" zarr_config["format"] = 1 # lzma.FORMAT_XZ zarr_config["check"] = -1 @@ -811,7 +765,6 @@ def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dic zarr_config["filters"] = None elif codec_id == "gzip": - if "useZlib" in compressor_config and compressor_config["useZlib"]: zarr_config["id"] = "zlib" zarr_config["level"] = compressor_config["level"] @@ -820,22 +773,18 @@ def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dic zarr_config["level"] = compressor_config["level"] elif codec_id == "raw": - return None else: - zarr_config.update({k: v for k, v in compressor_config.items() if k != "type"}) return zarr_config class N5ChunkWrapper(Codec): - codec_id = "n5_wrapper" def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): - self.dtype = np.dtype(dtype) self.chunk_shape = tuple(chunk_shape) # is the dtype a little endian format? @@ -860,7 +809,6 @@ def get_config(self): return config def encode(self, chunk): - assert chunk.flags.c_contiguous header = self._create_header(chunk) @@ -872,12 +820,10 @@ def encode(self, chunk): return header + chunk.tobytes(order="A") def decode(self, chunk, out=None) -> bytes: - len_header, chunk_shape = self._read_header(chunk) chunk = chunk[len_header:] if out is not None: - # out should only be used if we read a complete chunk assert chunk_shape == self.chunk_shape, "Expected chunk of shape {}, found {}".format( self.chunk_shape, chunk_shape @@ -895,7 +841,6 @@ def decode(self, chunk, out=None) -> bytes: return out else: - if self._compressor: chunk = self._compressor.decode(chunk) @@ -915,7 +860,6 @@ def decode(self, chunk, out=None) -> bytes: @staticmethod def _create_header(chunk): - mode = struct.pack(">H", 0) num_dims = struct.pack(">H", len(chunk.shape)) shape = b"".join(struct.pack(">I", d) for d in chunk.shape[::-1]) @@ -924,7 +868,6 @@ def _create_header(chunk): @staticmethod def _read_header(chunk): - num_dims = struct.unpack(">H", chunk[2:4])[0] shape = tuple( struct.unpack(">I", chunk[i : i + 4])[0] for i in range(4, num_dims * 4 + 4, 4) diff --git a/zarr/storage.py b/zarr/storage.py index a7426e5345..585417f59c 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -483,7 +483,6 @@ def _init_array_metadata( dimension_separator=None, storage_transformers=(), ): - store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) @@ -688,7 +687,6 @@ def _init_group_metadata( path: Optional[str] = None, chunk_store: Optional[StoreLike] = None, ): - store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) @@ -1056,7 +1054,6 @@ class DirectoryStore(Store): """ def __init__(self, path, normalize_keys=False, dimension_separator=None): - # guard conditions path = os.path.abspath(path) if os.path.exists(path) and not os.path.isdir(path): @@ -1416,7 +1413,6 @@ def _normalize_key(self, key): def getitems( self, keys: Sequence[str], *, contexts: Mapping[str, Context] ) -> Mapping[str, Any]: - keys_transformed = [self._normalize_key(key) for key in keys] results = self.map.getitems(keys_transformed, on_error="omit") # The function calling this method may not recognize the transformed keys @@ -1770,7 +1766,6 @@ def __init__( mode="a", dimension_separator=None, ): - # store properties path = os.path.abspath(path) self.path = path diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index 7dd5b340a2..2d9553971b 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -30,7 +30,6 @@ def init_attributes(self, store, read_only=False, cache=True, zarr_version=2): return Attributes(store, key=root + "attrs", read_only=read_only, cache=cache) def test_storage(self, zarr_version): - store = _init_store(zarr_version) root = ".z" if zarr_version == 2 else meta_root attrs_key = root + "attrs" @@ -50,7 +49,6 @@ def test_storage(self, zarr_version): assert dict(foo="bar", baz=42) == d def test_utf8_encoding(self, zarr_version): - project_root = pathlib.Path(zarr.__file__).resolve().parent.parent fixdir = project_root / "fixture" testdir = fixdir / "utf8attrs" @@ -67,7 +65,6 @@ def test_utf8_encoding(self, zarr_version): assert fixture["utf8attrs"].attrs.asdict() == dict(foo="た") def test_get_set_del_contains(self, zarr_version): - store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) assert "foo" not in a @@ -84,7 +81,6 @@ def test_get_set_del_contains(self, zarr_version): a["foo"] def test_update_put(self, zarr_version): - store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) assert "foo" not in a @@ -102,7 +98,6 @@ def test_update_put(self, zarr_version): assert "baz" not in a def test_iterators(self, zarr_version): - store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) assert 0 == len(a) @@ -232,7 +227,6 @@ def test_caching_on(self, zarr_version): assert get_cnt == store.counter["__getitem__", attrs_key] def test_caching_off(self, zarr_version): - # setup store store = CountingDict() if zarr_version == 2 else CountingDictV3() attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 389ce90a9d..7d190adc2c 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -57,7 +57,6 @@ def _init_creation_kwargs(zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_array(path_type, zarr_version): - store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) store = path_type(store) @@ -86,7 +85,6 @@ def test_open_array(path_type, zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_group(path_type, zarr_version): - store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) store = path_type(store) @@ -210,7 +208,6 @@ def test_tree(zarr_version): def test_consolidate_metadata( with_chunk_store, zarr_version, listable, monkeypatch, stores_from_path ): - # setup initial data if stores_from_path: store = tempfile.mkdtemp() @@ -399,7 +396,6 @@ def test_save_array_separator(tmpdir, options): class TestCopyStore(unittest.TestCase): - _version = 2 def setUp(self): @@ -536,7 +532,6 @@ def test_if_exists(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestCopyStoreV3(TestCopyStore): - _version = 3 def setUp(self): @@ -557,7 +552,6 @@ def test_mismatched_store_versions(self): def check_copied_array(original, copied, without_attrs=False, expect_props=None): - # setup source_h5py = original.__module__.startswith("h5py.") dest_h5py = copied.__module__.startswith("h5py.") @@ -621,7 +615,6 @@ def check_copied_array(original, copied, without_attrs=False, expect_props=None) def check_copied_group(original, copied, without_attrs=False, expect_props=None, shallow=False): - # setup if expect_props is None: expect_props = dict() diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index b44c6379fd..8e586abfff 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -74,7 +74,6 @@ def _init_creation_kwargs(zarr_version, at_root=True): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_array(zarr_version, at_root): - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version kwargs = _init_creation_kwargs(zarr_version, at_root) @@ -213,7 +212,6 @@ def test_full_additional_dtypes(zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_open_array(zarr_version, at_root, dimension_separator): - store = "data/array.zarr" kwargs = _init_creation_kwargs(zarr_version, at_root) @@ -329,7 +327,6 @@ def test_open_array(zarr_version, at_root, dimension_separator): def test_open_array_none(): - # open with both store and zarr_version = None z = open_array(mode="w", shape=100, chunks=10) assert isinstance(z, Array) @@ -339,7 +336,6 @@ def test_open_array_none(): @pytest.mark.parametrize("dimension_separator", [".", "/", None]) @pytest.mark.parametrize("zarr_version", _VERSIONS2) def test_open_array_infer_separator_from_store(zarr_version, dimension_separator): - if zarr_version == 3: StoreClass = DirectoryStoreV3 path = "data" @@ -370,7 +366,6 @@ def test_open_array_infer_separator_from_store(zarr_version, dimension_separator # TODO: N5 support for v3 @pytest.mark.parametrize("zarr_version", [None, 2]) def test_open_array_n5(zarr_version): - store = "data/array.zarr" kwargs = _init_creation_kwargs(zarr_version) @@ -409,7 +404,6 @@ def test_open_array_n5(zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_open_array_dict_store(zarr_version, at_root): - # dict will become a KVStore store = dict() kwargs = _init_creation_kwargs(zarr_version, at_root) @@ -503,7 +497,6 @@ def test_empty_like(zarr_version, at_root): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_zeros_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version @@ -529,7 +522,6 @@ def test_zeros_like(zarr_version, at_root): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_ones_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version @@ -556,7 +548,6 @@ def test_ones_like(zarr_version, at_root): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_full_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version diff --git a/zarr/tests/test_dim_separator.py b/zarr/tests/test_dim_separator.py index 987852dfd0..0a5814e65f 100644 --- a/zarr/tests/test_dim_separator.py +++ b/zarr/tests/test_dim_separator.py @@ -46,7 +46,6 @@ def dataset(tmpdir, request): static = project_root / "fixture" / suffix if not static.exists(): # pragma: no cover - if "nested" in which: # No way to reproduce the nested_legacy file via code generator = NestedDirectoryStore diff --git a/zarr/tests/test_filters.py b/zarr/tests/test_filters.py index d55be9145f..fc63cdca8d 100644 --- a/zarr/tests/test_filters.py +++ b/zarr/tests/test_filters.py @@ -30,7 +30,6 @@ def test_array_with_delta_filter(): - # setup astype = "u1" dtype = "i8" @@ -38,7 +37,6 @@ def test_array_with_delta_filter(): data = np.arange(100, dtype=dtype) for compressor in compressors: - a = array(data, chunks=10, compressor=compressor, filters=filters) # check round-trip @@ -57,7 +55,6 @@ def test_array_with_delta_filter(): def test_array_with_astype_filter(): - # setup encode_dtype = "i1" decode_dtype = "i8" @@ -68,7 +65,6 @@ def test_array_with_astype_filter(): data = np.arange(shape, dtype=decode_dtype) for compressor in compressors: - a = array(data, chunks=chunks, compressor=compressor, filters=filters) # check round-trip @@ -88,7 +84,6 @@ def test_array_with_astype_filter(): def test_array_with_scaleoffset_filter(): - # setup astype = "u1" dtype = "f8" @@ -97,7 +92,6 @@ def test_array_with_scaleoffset_filter(): data = np.linspace(1000, 1001, 34, dtype="f8") for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip @@ -116,7 +110,6 @@ def test_array_with_scaleoffset_filter(): def test_array_with_quantize_filter(): - # setup dtype = "f8" digits = 3 @@ -125,7 +118,6 @@ def test_array_with_quantize_filter(): data = np.linspace(0, 1, 34, dtype=dtype) for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip @@ -144,14 +136,12 @@ def test_array_with_quantize_filter(): def test_array_with_packbits_filter(): - # setup flt = PackBits() filters = [flt] data = np.random.randint(0, 2, size=100, dtype=bool) for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip @@ -170,14 +160,12 @@ def test_array_with_packbits_filter(): def test_array_with_categorize_filter(): - # setup data = np.random.choice(["foo", "bar", "baz"], size=100) flt = Categorize(dtype=data.dtype, labels=["foo", "bar", "baz"]) filters = [flt] for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index cbf59c55c3..6c08d7b88a 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -1085,7 +1085,6 @@ def test_paths(self): g1.store.close() def test_pickle(self): - # setup group g = self.create_group() d = g.create_dataset("foo/bar", shape=100, chunks=10) @@ -1113,7 +1112,6 @@ def test_pickle(self): g2.store.close() def test_context_manager(self): - with self.create_group() as g: d = g.create_dataset("foo/bar", shape=100, chunks=10) d[:] = np.arange(100) @@ -1375,7 +1373,6 @@ def create_store(): return store, None def test_context_manager(self): - with self.create_group() as g: store = g.store d = g.create_dataset("foo/bar", shape=100, chunks=10) diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py index 8a34c1e715..f10360e8b7 100644 --- a/zarr/tests/test_indexing.py +++ b/zarr/tests/test_indexing.py @@ -17,7 +17,6 @@ def test_normalize_integer_selection(): - assert 1 == normalize_integer_selection(1, 100) assert 99 == normalize_integer_selection(-1, 100) with pytest.raises(IndexError): @@ -29,7 +28,6 @@ def test_normalize_integer_selection(): def test_replace_ellipsis(): - # 1D, single item assert (0,) == replace_ellipsis(0, (100,)) @@ -68,7 +66,6 @@ def test_replace_ellipsis(): def test_get_basic_selection_0d(): - # setup a = np.array(42) z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) @@ -191,7 +188,6 @@ def _test_get_basic_selection(a, z, selection): # noinspection PyStatementEffect def test_get_basic_selection_1d(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -264,7 +260,6 @@ def test_get_basic_selection_1d(): # noinspection PyStatementEffect def test_get_basic_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -423,7 +418,6 @@ def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): def test_set_basic_selection_0d(): - # setup v = np.array(42) a = np.zeros_like(v) @@ -479,7 +473,6 @@ def _test_get_orthogonal_selection(a, z, selection): # noinspection PyStatementEffect def test_get_orthogonal_selection_1d_bool(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -502,7 +495,6 @@ def test_get_orthogonal_selection_1d_bool(): # noinspection PyStatementEffect def test_get_orthogonal_selection_1d_int(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -561,7 +553,6 @@ def _test_get_orthogonal_selection_2d(a, z, ix0, ix1): # noinspection PyStatementEffect def test_get_orthogonal_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -570,7 +561,6 @@ def test_get_orthogonal_selection_2d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -641,7 +631,6 @@ def _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2): def test_get_orthogonal_selection_3d(): - # setup a = np.arange(100000, dtype=int).reshape(200, 50, 10) z = zarr.create(shape=a.shape, chunks=(60, 20, 3), dtype=a.dtype) @@ -650,7 +639,6 @@ def test_get_orthogonal_selection_3d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -673,7 +661,6 @@ def test_get_orthogonal_selection_3d(): def test_orthogonal_indexing_edge_cases(): - a = np.arange(6).reshape(1, 2, 3) z = zarr.create(shape=a.shape, chunks=(1, 2, 3), dtype=a.dtype) z[:] = a @@ -706,7 +693,6 @@ def _test_set_orthogonal_selection(v, a, z, selection): def test_set_orthogonal_selection_1d(): - # setup v = np.arange(1050, dtype=int) a = np.empty(v.shape, dtype=int) @@ -715,7 +701,6 @@ def test_set_orthogonal_selection_1d(): # test with different degrees of sparseness np.random.seed(42) for p in 0.5, 0.1, 0.01: - # boolean arrays ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_set_orthogonal_selection(v, a, z, ix) @@ -734,7 +719,6 @@ def test_set_orthogonal_selection_1d(): def _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1): - selections = [ # index both axes with array (ix0, ix1), @@ -749,7 +733,6 @@ def _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1): def test_set_orthogonal_selection_2d(): - # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) @@ -758,7 +741,6 @@ def test_set_orthogonal_selection_2d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -780,7 +762,6 @@ def test_set_orthogonal_selection_2d(): def _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2): - selections = ( # single value (84, 42, 4), @@ -807,7 +788,6 @@ def _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2): def test_set_orthogonal_selection_3d(): - # setup v = np.arange(100000, dtype=int).reshape(200, 50, 10) a = np.empty_like(v) @@ -816,7 +796,6 @@ def test_set_orthogonal_selection_3d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -888,7 +867,6 @@ def _test_get_coordinate_selection(a, z, selection): # noinspection PyStatementEffect def test_get_coordinate_selection_1d(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -932,7 +910,6 @@ def test_get_coordinate_selection_1d(): def test_get_coordinate_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -1027,7 +1004,6 @@ def test_set_coordinate_selection_1d(): def test_set_coordinate_selection_2d(): - # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) @@ -1258,7 +1234,6 @@ def _test_get_mask_selection(a, z, selection): # noinspection PyStatementEffect def test_get_mask_selection_1d(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -1285,7 +1260,6 @@ def test_get_mask_selection_1d(): # noinspection PyStatementEffect def test_get_mask_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -1318,7 +1292,6 @@ def _test_set_mask_selection(v, a, z, selection): def test_set_mask_selection_1d(): - # setup v = np.arange(1050, dtype=int) a = np.empty_like(v) @@ -1338,7 +1311,6 @@ def test_set_mask_selection_1d(): def test_set_mask_selection_2d(): - # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) @@ -1352,7 +1324,6 @@ def test_set_mask_selection_2d(): def test_get_selection_out(): - # basic selections a = np.arange(1050) z = zarr.create(shape=1050, chunks=100, dtype=a.dtype) @@ -1426,7 +1397,6 @@ def test_get_selection_out(): def test_get_selections_with_fields(): - a = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] a = np.array(a, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) z = zarr.create(shape=a.shape, chunks=2, dtype=a.dtype, fill_value=None) @@ -1444,7 +1414,6 @@ def test_get_selections_with_fields(): ] for fields in fields_fixture: - # total selection expect = a[fields] actual = z.get_basic_selection(Ellipsis, fields=fields) @@ -1534,7 +1503,6 @@ def test_get_selections_with_fields(): def test_set_selections_with_fields(): - v = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] v = np.array(v, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) a = np.empty_like(v) @@ -1553,7 +1521,6 @@ def test_set_selections_with_fields(): ] for fields in fields_fixture: - # currently multi-field assignment is not supported in numpy, so we won't support # it either if isinstance(fields, list) and len(fields) > 1: @@ -1567,7 +1534,6 @@ def test_set_selections_with_fields(): z.set_mask_selection([True, False, True], v, fields=fields) else: - if isinstance(fields, list) and len(fields) == 1: # work around numpy does not support multi-field assignment even if there # is only one field @@ -1752,7 +1718,6 @@ def test_accessed_chunks(shape, chunks, ops): z = zarr.create(shape=shape, chunks=chunks, store=store) for ii, (optype, slices) in enumerate(ops): - # Resolve the slices into the accessed chunks for each dimension chunks_per_dim = [] for N, C, sl in zip(shape, chunks, slices): diff --git a/zarr/tests/test_info.py b/zarr/tests/test_info.py index 7fb6feb11b..96eae999f4 100644 --- a/zarr/tests/test_info.py +++ b/zarr/tests/test_info.py @@ -7,7 +7,6 @@ @pytest.mark.parametrize("array_size", [10, 15000]) def test_info(array_size): - # setup g = zarr.group(store=dict(), chunk_store=dict(), synchronizer=zarr.ThreadSynchronizer()) g.create_group("foo") diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index db50560c8e..3e1e0f9d63 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -34,7 +34,6 @@ def assert_json_equal(expect, actual): def test_encode_decode_array_1(): - meta = dict( shape=(100,), chunks=(10,), @@ -76,7 +75,6 @@ def test_encode_decode_array_1(): def test_encode_decode_array_2(): - # some variations df = Delta(astype=" Tupl def normalize_dtype(dtype: Union[str, np.dtype], object_codec) -> Tuple[np.dtype, Any]: - # convenience API for object arrays if inspect.isclass(dtype): dtype = dtype.__name__ # type: ignore @@ -245,7 +244,6 @@ def is_total_slice(item, shape: Tuple[int]) -> bool: def normalize_resize_args(old_shape, *args): - # normalize new shape argument if len(args) == 1: new_shape = args[0] @@ -294,7 +292,6 @@ def normalize_dimension_separator(sep: Optional[str]) -> Optional[str]: def normalize_fill_value(fill_value, dtype: np.dtype): - if fill_value is None or dtype.hasobject: # no fill value pass @@ -332,7 +329,6 @@ def normalize_fill_value(fill_value, dtype: np.dtype): def normalize_storage_path(path: Union[str, bytes, None]) -> str: - # handle bytes if isinstance(path, bytes): path = str(path, "ascii") @@ -342,7 +338,6 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: path = str(path) if path: - # convert backslash to forward slash path = path.replace("\\", "/") @@ -506,7 +501,6 @@ def tree_widget(group, expand, level): class TreeViewer: def __init__(self, group, expand=False, level=None): - self.group = group self.expand = expand self.level = level From 54e31e9814a41cd7fd81255695971ce5e700ee3e Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 7 Dec 2023 22:29:28 +0100 Subject: [PATCH 161/213] Use list comprehension where applicable (#1555) Even if this is only a test, list comprehensions are faster than repeatedly call append(). Also use tuple instead of list when possible. Co-authored-by: Davis Bennett --- zarr/tests/test_indexing.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py index f10360e8b7..af046e9d28 100644 --- a/zarr/tests/test_indexing.py +++ b/zarr/tests/test_indexing.py @@ -1719,17 +1719,15 @@ def test_accessed_chunks(shape, chunks, ops): for ii, (optype, slices) in enumerate(ops): # Resolve the slices into the accessed chunks for each dimension - chunks_per_dim = [] - for N, C, sl in zip(shape, chunks, slices): - chunk_ind = np.arange(N, dtype=int)[sl] // C - chunks_per_dim.append(np.unique(chunk_ind)) + chunks_per_dim = [ + np.unique(np.arange(N, dtype=int)[sl] // C) for N, C, sl in zip(shape, chunks, slices) + ] # Combine and generate the cartesian product to determine the chunks keys that # will be accessed - chunks_accessed = [] - for comb in itertools.product(*chunks_per_dim): - chunks_accessed.append(".".join([str(ci) for ci in comb])) - + chunks_accessed = ( + ".".join([str(ci) for ci in comb]) for comb in itertools.product(*chunks_per_dim) + ) counts_before = store.counter.copy() # Perform the operation From 7d2c9bf5ce4c998d95630d9a1202e27e58926838 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 7 Dec 2023 21:35:23 +0000 Subject: [PATCH 162/213] Bump numcodecs from 0.11.0 to 0.12.1 (#1580) Bumps [numcodecs](https://github.com/zarr-developers/numcodecs) from 0.11.0 to 0.12.1. - [Release notes](https://github.com/zarr-developers/numcodecs/releases) - [Changelog](https://github.com/zarr-developers/numcodecs/blob/main/docs/release.rst) - [Commits](https://github.com/zarr-developers/numcodecs/compare/v0.11.0...v0.12.1) --- updated-dependencies: - dependency-name: numcodecs dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Joe Hamman --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index e2be6eb825..afea816d87 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -1,7 +1,7 @@ # library requirements asciitree==0.3.3 fasteners==0.19 -numcodecs==0.11.0 +numcodecs==0.12.1 msgpack-python==0.5.6 setuptools-scm==8.0.4 # test requirements From 10dee6ba0c0ce6ab29333e7a50f0afa4f6de06ca Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 7 Dec 2023 22:40:21 +0100 Subject: [PATCH 163/213] Use format specification mini-language to format string (#1558) Co-authored-by: Joe Hamman --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 585417f59c..5ba8071395 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -784,7 +784,7 @@ def __len__(self): return len(self._mutable_mapping) def __repr__(self): - return f"<{self.__class__.__name__}: \n{repr(self._mutable_mapping)}\n at {hex(id(self))}>" + return f"<{self.__class__.__name__}: \n{self._mutable_mapping!r}\n at {id(self):#x}>" def __eq__(self, other): if isinstance(other, KVStore): From 40a6e817b17e1fe600b188478ba38fb6978a5273 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 7 Dec 2023 22:45:50 +0100 Subject: [PATCH 164/213] Single startswith() call instead of multiple ones (#1556) It's faster and probably more readable. Co-authored-by: Davis Bennett Co-authored-by: Joe Hamman --- zarr/_storage/store.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 80e4ad8f75..667ca38147 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -221,9 +221,8 @@ def _validate_key(self, key: str): ) if ( - not key.startswith("data/") - and (not key.startswith("meta/")) - and (not key == "zarr.json") + not key.startswith(("data/", "meta/")) + and key != "zarr.json" # TODO: Possibly allow key == ".zmetadata" too if we write a # consolidated metadata spec corresponding to this? ): From 5954ff95803c1343d022f6181ed397c7095f4a0e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 7 Dec 2023 21:46:14 +0000 Subject: [PATCH 165/213] Bump pymongo from 4.5.0 to 4.6.1 (#1585) Bumps [pymongo](https://github.com/mongodb/mongo-python-driver) from 4.5.0 to 4.6.1. - [Release notes](https://github.com/mongodb/mongo-python-driver/releases) - [Changelog](https://github.com/mongodb/mongo-python-driver/blob/master/doc/changelog.rst) - [Commits](https://github.com/mongodb/mongo-python-driver/compare/4.5.0...4.6.1) --- updated-dependencies: - dependency-name: pymongo dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Joe Hamman --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index f3ea80a546..5a3340a282 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -11,7 +11,7 @@ azure-storage-blob==12.16.0 # pyup: ignore redis==5.0.1 types-redis types-setuptools -pymongo==4.5.0 +pymongo==4.6.1 # optional test requirements coverage pytest-cov==4.1.0 From 6ad7b0e2ddabdcc5087e23f003edf123d21e9a25 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Fri, 8 Dec 2023 00:07:10 +0100 Subject: [PATCH 166/213] Move codespell options around (#1196) Starting with codespell 2.2.2, options can be specified in `pyrpoject.toml` in addition to `setup.cfg`: https://github.com/codespell-project/codespell#using-a-config-file Specifying options in a config file instead of command line options in `.pre-commit-config.yaml` ensures codespell uses the same options when run as pre-commit hook or from the command line in the repository root directory. --- .pre-commit-config.yaml | 1 - pyproject.toml | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e985d24000..029dcda58f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,6 @@ repos: rev: v2.2.5 hooks: - id: codespell - args: ["-L", "ba,ihs,kake,nd,noe,nwo,te,fo,zar", "-S", "fixture"] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: diff --git a/pyproject.toml b/pyproject.toml index 22ea19f28f..36a0d896ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,3 +137,8 @@ filterwarnings = [ "ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning", "ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning", ] + + +[tool.codespell] +ignore-words-list = "ba,ihs,kake,nd,noe,nwo,te,fo,zar" +skip = 'fixture,.git' From cf32382b9a228eaaafe30ab82d05b9303824a783 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 8 Dec 2023 15:19:14 +0100 Subject: [PATCH 167/213] Bump fsspec from 2023.10.0 to 2023.12.1 (#1600) * Bump fsspec from 2023.10.0 to 2023.12.1 Bumps [fsspec](https://github.com/fsspec/filesystem_spec) from 2023.10.0 to 2023.12.1. - [Commits](https://github.com/fsspec/filesystem_spec/compare/2023.10.0...2023.12.1) --- updated-dependencies: - dependency-name: fsspec dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update s3fs as well * Fix s3fs --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 5a3340a282..13385a243a 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,6 +18,6 @@ pytest-cov==4.1.0 pytest-doctestplus==1.0.0 pytest-timeout==2.2.0 h5py==3.10.0 -fsspec==2023.10.0 -s3fs==2023.10.0 +fsspec==2023.12.1 +s3fs==2023.12.1 moto[server]>=4.0.8 From 4d79cfc84f7f3914a04d9468666685520cc21276 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Fri, 8 Dec 2023 16:41:51 +0000 Subject: [PATCH 168/213] Add type hints to zarr.create (#1536) * Add type hints to zarr.create * Use protocol for MetaArray * Use protocol for Synchronizer * Fix Path typing * Add release note * Fix dim separator typing * Ignore ... in coverage reporting * Fix chunk typing --------- Co-authored-by: Davis Bennett --- docs/release.rst | 6 ++++++ pyproject.toml | 1 + zarr/_storage/store.py | 3 ++- zarr/creation.py | 46 +++++++++++++++++++++++------------------ zarr/storage.py | 2 +- zarr/sync.py | 12 +++++++++-- zarr/tests/test_core.py | 8 ++++--- zarr/types.py | 13 ++++++++++++ zarr/util.py | 5 +++-- 9 files changed, 67 insertions(+), 29 deletions(-) create mode 100644 zarr/types.py diff --git a/docs/release.rst b/docs/release.rst index 842c36e290..c18e0b8c20 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,12 @@ Release notes Unreleased ---------- +Enhancements +~~~~~~~~~~~~ + +* Added type hints to ``zarr.creation.create()``. + By :user:`David Stansby ` :issue:`1536`. + Docs ~~~~ diff --git a/pyproject.toml b/pyproject.toml index 36a0d896ea..4b7fef6003 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,7 @@ Homepage = "https://github.com/zarr-developers/zarr-python" exclude_lines = [ "pragma: no cover", "pragma: ${PY_MAJOR_VERSION} no cover", + '.*\.\.\.' # Ignore "..." lines ] [tool.coverage.run] diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 667ca38147..09f0b68602 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -9,6 +9,7 @@ from zarr.meta import Metadata2, Metadata3 from zarr.util import normalize_storage_path from zarr.context import Context +from zarr.types import ZARR_VERSION # v2 store keys array_meta_key = ".zarray" @@ -19,7 +20,7 @@ meta_root = "meta/root/" data_root = "data/root/" -DEFAULT_ZARR_VERSION = 2 +DEFAULT_ZARR_VERSION: ZARR_VERSION = 2 v3_api_available = os.environ.get("ZARR_V3_EXPERIMENTAL_API", "0").lower() not in ["0", "false"] diff --git a/zarr/creation.py b/zarr/creation.py index 6227f90b7b..d4f570895a 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -1,7 +1,10 @@ -from typing import Optional +from collections.abc import MutableMapping +from typing import Optional, Tuple, Union, Sequence from warnings import warn import numpy as np +import numpy.typing as npt +from numcodecs.abc import Codec from numcodecs.registry import codec_registry from zarr._storage.store import DEFAULT_ZARR_VERSION @@ -19,32 +22,35 @@ normalize_storage_path, normalize_store_arg, ) +from zarr._storage.store import StorageTransformer +from zarr.sync import Synchronizer +from zarr.types import ZARR_VERSION, DIMENSION_SEPARATOR, MEMORY_ORDER, MetaArray, PathLike from zarr.util import normalize_dimension_separator def create( - shape, - chunks=True, - dtype=None, + shape: Union[int, Tuple[int, ...]], + chunks: Union[int, Tuple[int, ...], bool] = True, + dtype: Optional[npt.DTypeLike] = None, compressor="default", fill_value: Optional[int] = 0, - order="C", - store=None, - synchronizer=None, - overwrite=False, - path=None, - chunk_store=None, - filters=None, - cache_metadata=True, - cache_attrs=True, - read_only=False, - object_codec=None, - dimension_separator=None, - write_empty_chunks=True, + order: MEMORY_ORDER = "C", + store: Optional[Union[str, MutableMapping]] = None, + synchronizer: Optional[Synchronizer] = None, + overwrite: bool = False, + path: Optional[PathLike] = None, + chunk_store: Optional[MutableMapping] = None, + filters: Optional[Sequence[Codec]] = None, + cache_metadata: bool = True, + cache_attrs: bool = True, + read_only: bool = False, + object_codec: Optional[Codec] = None, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, + write_empty_chunks: bool = True, *, - zarr_version=None, - meta_array=None, - storage_transformers=(), + zarr_version: Optional[ZARR_VERSION] = None, + meta_array: Optional[MetaArray] = None, + storage_transformers: Sequence[StorageTransformer] = (), **kwargs, ): """Create an array. diff --git a/zarr/storage.py b/zarr/storage.py index 5ba8071395..1c3b39862a 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -40,6 +40,7 @@ from numcodecs.compat import ensure_bytes, ensure_text, ensure_contiguous_ndarray_like from numcodecs.registry import codec_registry from zarr.context import Context +from zarr.types import PathLike as Path from zarr.errors import ( MetadataError, @@ -105,7 +106,6 @@ default_compressor = Zlib() -Path = Union[str, bytes, None] # allow MutableMapping for backwards compatibility StoreLike = Union[BaseStore, MutableMapping] diff --git a/zarr/sync.py b/zarr/sync.py index 49684a51ee..2e843f6557 100644 --- a/zarr/sync.py +++ b/zarr/sync.py @@ -1,11 +1,19 @@ import os from collections import defaultdict from threading import Lock +from typing import Protocol import fasteners -class ThreadSynchronizer: +class Synchronizer(Protocol): + """Base class for synchronizers.""" + + def __getitem__(self, item): + ... + + +class ThreadSynchronizer(Synchronizer): """Provides synchronization using thread locks.""" def __init__(self): @@ -24,7 +32,7 @@ def __setstate__(self, *args): self.__init__() -class ProcessSynchronizer: +class ProcessSynchronizer(Synchronizer): """Provides synchronization using file locks via the `fasteners `_ package. diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index f3ca73dea8..a3fde4050d 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -3,7 +3,7 @@ import sys import pickle import shutil -from typing import Any, Literal, Optional, Tuple, Union +from typing import Any, Literal, Optional, Tuple, Union, Sequence import unittest from itertools import zip_longest from tempfile import mkdtemp @@ -26,6 +26,7 @@ VLenUTF8, Zlib, ) +from numcodecs.abc import Codec from numcodecs.compat import ensure_bytes, ensure_ndarray from numcodecs.tests.common import greetings from numpy.testing import assert_array_almost_equal, assert_array_equal @@ -73,6 +74,7 @@ from zarr.tests.test_storage_v3 import DummyStorageTransfomer from zarr.util import buffer_size from zarr.tests.util import abs_container, skip_test_env_var, have_fsspec, mktemp +from zarr.types import DIMENSION_SEPARATOR # noinspection PyMethodMayBeStatic @@ -82,8 +84,8 @@ class TestArray: root = "" path = "" compressor = Zlib(level=1) - filters = None - dimension_separator: Literal["/", ".", None] = None + filters: Optional[Sequence[Codec]] = None + dimension_separator: Optional[DIMENSION_SEPARATOR] = None cache_metadata = True cache_attrs = True partial_decompress: bool = False diff --git a/zarr/types.py b/zarr/types.py new file mode 100644 index 0000000000..1de270f25c --- /dev/null +++ b/zarr/types.py @@ -0,0 +1,13 @@ +from typing import Literal, Protocol, Union + +ZARR_VERSION = Literal[2, 3] +DIMENSION_SEPARATOR = Literal[".", "/"] +MEMORY_ORDER = Literal["C", "F"] + + +PathLike = Union[str, bytes, None] + + +class MetaArray(Protocol): + def __array_function__(self, func, types, args, kwargs): + ... diff --git a/zarr/util.py b/zarr/util.py index df1cd9d409..f97094b93a 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -31,6 +31,7 @@ from numcodecs.ndarray_like import NDArrayLike from numcodecs.registry import codec_registry from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo +from zarr.types import DIMENSION_SEPARATOR KeyType = TypeVar("KeyType") ValueType = TypeVar("ValueType") @@ -284,9 +285,9 @@ def normalize_order(order: str) -> str: return order -def normalize_dimension_separator(sep: Optional[str]) -> Optional[str]: +def normalize_dimension_separator(sep: Optional[str]) -> Optional[DIMENSION_SEPARATOR]: if sep in (".", "/", None): - return sep + return cast(Optional[DIMENSION_SEPARATOR], sep) else: raise ValueError("dimension_separator must be either '.' or '/', found: %r" % sep) From 12abd4e434e816e9b8f19b1ceb89438fa4269737 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Mon, 18 Dec 2023 11:57:44 +0000 Subject: [PATCH 169/213] Remove unused mypy ignore comments (#1602) Co-authored-by: Davis Bennett --- pyproject.toml | 5 +++-- zarr/_storage/store.py | 4 ++-- zarr/_storage/v3_storage_transformers.py | 2 +- zarr/meta.py | 4 ++-- zarr/storage.py | 12 ++++++------ zarr/util.py | 2 +- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4b7fef6003..33e8573830 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,9 +120,10 @@ exclude = ''' ''' [tool.mypy] -python_version = "3.8" ignore_missing_imports = true -follow_imports = "silent" +warn_unused_configs = true +warn_redundant_casts = true +warn_unused_ignores = true [tool.pytest.ini_options] doctest_optionflags = [ diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 09f0b68602..36b596769a 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -642,10 +642,10 @@ def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: sfx = _get_metadata_suffix(store) array_meta_file = meta_dir + ".array" + sfx if array_meta_file in store: - store.erase(array_meta_file) # type: ignore + store.erase(array_meta_file) group_meta_file = meta_dir + ".group" + sfx if group_meta_file in store: - store.erase(group_meta_file) # type: ignore + store.erase(group_meta_file) def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str]: diff --git a/zarr/_storage/v3_storage_transformers.py b/zarr/_storage/v3_storage_transformers.py index ff31a7281c..3afc3823a3 100644 --- a/zarr/_storage/v3_storage_transformers.py +++ b/zarr/_storage/v3_storage_transformers.py @@ -351,7 +351,7 @@ def erase_prefix(self, prefix): def rmdir(self, path=None): path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) # type: ignore + _rmdir_from_keys_v3(self, path) def __contains__(self, key): if self._is_data_key(key): diff --git a/zarr/meta.py b/zarr/meta.py index f23889f3ea..d9797e4754 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -234,8 +234,8 @@ def decode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> return np.array(v, dtype=dtype)[()] elif dtype.kind in "c": v = ( - cls.decode_fill_value(v[0], dtype.type().real.dtype), # type: ignore - cls.decode_fill_value(v[1], dtype.type().imag.dtype), # type: ignore + cls.decode_fill_value(v[0], dtype.type().real.dtype), + cls.decode_fill_value(v[1], dtype.type().imag.dtype), ) v = v[0] + 1j * v[1] return np.array(v, dtype=dtype)[()] diff --git a/zarr/storage.py b/zarr/storage.py index 1c3b39862a..aa27e98e6f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -206,7 +206,7 @@ def rmdir(store: StoreLike, path: Path = None): store_version = getattr(store, "_store_version", 2) if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore # pass through - store.rmdir(path) # type: ignore + store.rmdir(path) else: # slow version, delete one key at a time if store_version == 2: @@ -236,7 +236,7 @@ def listdir(store: BaseStore, path: Path = None): path = normalize_storage_path(path) if hasattr(store, "listdir"): # pass through - return store.listdir(path) # type: ignore + return store.listdir(path) else: # slow version, iterate through all keys warnings.warn( @@ -289,7 +289,7 @@ def getsize(store: BaseStore, path: Path = None) -> int: if hasattr(store, "getsize"): # pass through path = normalize_storage_path(path) - return store.getsize(path) # type: ignore + return store.getsize(path) elif isinstance(store, MutableMapping): return _getsize(store, path) else: @@ -627,7 +627,7 @@ def _init_array_metadata( key = _prefix_to_array_key(store, _path_to_prefix(path)) if hasattr(store, "_metadata_class"): - store[key] = store._metadata_class.encode_array_metadata(meta) # type: ignore + store[key] = store._metadata_class.encode_array_metadata(meta) else: store[key] = encode_array_metadata(meta) @@ -730,10 +730,10 @@ def _init_group_metadata( if store_version == 3: meta = {"attributes": {}} # type: ignore else: - meta = {} # type: ignore + meta = {} key = _prefix_to_group_key(store, _path_to_prefix(path)) if hasattr(store, "_metadata_class"): - store[key] = store._metadata_class.encode_group_metadata(meta) # type: ignore + store[key] = store._metadata_class.encode_group_metadata(meta) else: store[key] = encode_group_metadata(meta) diff --git a/zarr/util.py b/zarr/util.py index f97094b93a..54c389db69 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -183,7 +183,7 @@ def normalize_chunks(chunks: Any, shape: Tuple[int, ...], typesize: int) -> Tupl def normalize_dtype(dtype: Union[str, np.dtype], object_codec) -> Tuple[np.dtype, Any]: # convenience API for object arrays if inspect.isclass(dtype): - dtype = dtype.__name__ # type: ignore + dtype = dtype.__name__ if isinstance(dtype, str): # allow ':' to delimit class from codec arguments tokens = dtype.split(":") From c2f5f0058d25eaa6695334e399b7b3a2d23f7a10 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Dec 2023 20:47:31 -0700 Subject: [PATCH 170/213] Bump actions/setup-python from 4.7.1 to 5.0.0 (#1605) Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4.7.1 to 5.0.0. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4.7.1...v5.0.0) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Joe Hamman --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 3bd25bfbf7..8d8512294d 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -16,7 +16,7 @@ jobs: submodules: true fetch-depth: 0 - - uses: actions/setup-python@v4.7.1 + - uses: actions/setup-python@v5.0.0 name: Install Python with: python-version: '3.8' From 490e0fe4e59f234cde85b103252acefa34927184 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Dec 2023 22:01:40 +0100 Subject: [PATCH 171/213] Bump github/codeql-action from 2 to 3 (#1609) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2 to 3. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/v2...v3) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 7013f1784f..bb3d433629 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -42,7 +42,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -56,7 +56,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -69,4 +69,4 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 From b5f79ddfe7821cc9387fc4084bd7672f59215400 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Dec 2023 14:38:04 -0700 Subject: [PATCH 172/213] chore: update pre-commit hooks (#1448) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: update pre-commit hooks updates: - https://github.com/charliermarsh/ruff-pre-commit → https://github.com/astral-sh/ruff-pre-commit - [github.com/astral-sh/ruff-pre-commit: v0.0.224 → v0.1.8](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.224...v0.1.8) - [github.com/psf/black: 23.10.1 → 23.12.0](https://github.com/psf/black/compare/23.10.1...23.12.0) - [github.com/codespell-project/codespell: v2.2.5 → v2.2.6](https://github.com/codespell-project/codespell/compare/v2.2.5...v2.2.6) - [github.com/pre-commit/pre-commit-hooks: v4.4.0 → v4.5.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.4.0...v4.5.0) - [github.com/pre-commit/mirrors-mypy: v1.3.0 → v1.7.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.3.0...v1.7.1) * Attempt to fix ruff * Use isinstance --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- .pre-commit-config.yaml | 14 ++++++-------- zarr/core.py | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 029dcda58f..b4e7ab3ccf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,27 +6,25 @@ default_stages: [commit, push] default_language_version: python: python3 repos: - - repo: https://github.com/charliermarsh/ruff-pre-commit + - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.0.224' + rev: 'v0.1.8' hooks: - id: ruff - # Respect `exclude` and `extend-exclude` settings. - args: ["--force-exclude"] - repo: https://github.com/psf/black - rev: 23.10.1 + rev: 23.12.0 hooks: - id: black - repo: https://github.com/codespell-project/codespell - rev: v2.2.5 + rev: v2.2.6 hooks: - id: codespell - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.3.0 + rev: v1.7.1 hooks: - id: mypy files: zarr diff --git a/zarr/core.py b/zarr/core.py index c07a31e95f..d22a9d79c3 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -2536,7 +2536,7 @@ def hexdigest(self, hashname="sha1"): checksum = binascii.hexlify(self.digest(hashname=hashname)) # This is a bytes object on Python 3 and we want a str. - if type(checksum) is not str: + if not isinstance(checksum, str): checksum = checksum.decode("utf8") return checksum From e09ee149c4525213b07ace9eaf914ca9f552a703 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 26 Dec 2023 10:01:20 -0700 Subject: [PATCH 173/213] chore: update pre-commit hooks (#1618) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.1.8 → v0.1.9](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.8...v0.1.9) - [github.com/psf/black: 23.12.0 → 23.12.1](https://github.com/psf/black/compare/23.12.0...23.12.1) - [github.com/pre-commit/mirrors-mypy: v1.7.1 → v1.8.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.7.1...v1.8.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b4e7ab3ccf..80d3439dc7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,11 +8,11 @@ default_language_version: repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.1.8' + rev: 'v0.1.9' hooks: - id: ruff - repo: https://github.com/psf/black - rev: 23.12.0 + rev: 23.12.1 hooks: - id: black - repo: https://github.com/codespell-project/codespell @@ -24,7 +24,7 @@ repos: hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.7.1 + rev: v1.8.0 hooks: - id: mypy files: zarr From cd139895b45a2d7d347c29b703aa2f6775a1e7c9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Dec 2023 10:24:59 -0700 Subject: [PATCH 174/213] Bump fsspec from 2023.12.1 to 2023.12.2 (#1606) * Bump fsspec from 2023.12.1 to 2023.12.2 Bumps [fsspec](https://github.com/fsspec/filesystem_spec) from 2023.12.1 to 2023.12.2. - [Commits](https://github.com/fsspec/filesystem_spec/compare/2023.12.1...2023.12.2) --- updated-dependencies: - dependency-name: fsspec dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Update requirements_dev_optional.txt --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Joe Hamman --- requirements_dev_optional.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 13385a243a..5916083cfc 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,6 +18,6 @@ pytest-cov==4.1.0 pytest-doctestplus==1.0.0 pytest-timeout==2.2.0 h5py==3.10.0 -fsspec==2023.12.1 -s3fs==2023.12.1 +fsspec==2023.12.2 +s3fs==2023.12.2 moto[server]>=4.0.8 From 5fb420fcfbabd484e663c78e55d04edd4ac9e486 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 27 Dec 2023 11:32:02 +0100 Subject: [PATCH 175/213] Bump pytest-doctestplus from 1.0.0 to 1.1.0 (#1619) Bumps [pytest-doctestplus](https://github.com/scientific-python/pytest-doctestplus) from 1.0.0 to 1.1.0. - [Release notes](https://github.com/scientific-python/pytest-doctestplus/releases) - [Changelog](https://github.com/scientific-python/pytest-doctestplus/blob/main/CHANGES.rst) - [Commits](https://github.com/scientific-python/pytest-doctestplus/compare/v1.0.0...v1.1.0) --- updated-dependencies: - dependency-name: pytest-doctestplus dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 5916083cfc..b4de5fd515 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -15,7 +15,7 @@ pymongo==4.6.1 # optional test requirements coverage pytest-cov==4.1.0 -pytest-doctestplus==1.0.0 +pytest-doctestplus==1.1.0 pytest-timeout==2.2.0 h5py==3.10.0 fsspec==2023.12.2 From 435a7ca7306fc31dc880ed23631e3af61bf53d66 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jan 2024 11:26:58 -0700 Subject: [PATCH 176/213] Bump pytest from 7.4.3 to 7.4.4 (#1622) --- requirements_dev_minimal.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_minimal.txt b/requirements_dev_minimal.txt index afea816d87..94d3fff8a6 100644 --- a/requirements_dev_minimal.txt +++ b/requirements_dev_minimal.txt @@ -5,4 +5,4 @@ numcodecs==0.12.1 msgpack-python==0.5.6 setuptools-scm==8.0.4 # test requirements -pytest==7.4.3 +pytest==7.4.4 From 6961fa9fb87ed73c85f979d84bfe65238933b5ae Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Jan 2024 20:31:31 -0800 Subject: [PATCH 177/213] chore: update pre-commit hooks (#1626) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.1.9 → v0.1.11](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.9...v0.1.11) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 80d3439dc7..340366ef53 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ default_language_version: repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.1.9' + rev: 'v0.1.11' hooks: - id: ruff - repo: https://github.com/psf/black From ee518358d888caaabb6157c0498cb231d2ddb7a7 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 10 Jan 2024 06:47:36 -0800 Subject: [PATCH 178/213] Create TEAM.md (#1628) --- TEAM.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 TEAM.md diff --git a/TEAM.md b/TEAM.md new file mode 100644 index 0000000000..a95885ebe5 --- /dev/null +++ b/TEAM.md @@ -0,0 +1,25 @@ +## Active core-developers +- @joshmoore (Josh Moore) +- @jni (Juan Nunez-Iglesias) +- @rabernat (Ryan Abernathey) +- @jhamman (Joe Hamman) +- @d-v-b (Davis Bennett) +- @jakirkham (jakirkham) +- @martindurant (Martin Durant) + +## Emeritus core-developers +- @alimanfoo (Alistair Miles) +- @shoyer (Stephan Hoyer) +- @ryan-williams (Ryan Williams) +- @jrbourbeau (James Bourbeau) +- @mzjp2 (Zain Patel) +- @grlee77 (Gregory Lee) + +## Former core-developers +- @jeromekelleher (Jerome Kelleher) +- @tjcrone (Tim Crone) +- @funkey (Jan Funke) +- @shikharsg +- @Carreau (Matthias Bussonnier) +- @dazzag24 +- @WardF (Ward Fisher) From c7d66b4f8d7e9a4d50e5e01e5484ff8df612cb51 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Wed, 10 Jan 2024 20:52:42 +0100 Subject: [PATCH 179/213] Drop python 3.8 and numpy 1.20 (#1557) * Drop 3.8 and add 3.12 * Try removing line_profiler * Also bump the minimal numpy to 1.21 * Drop 3.12 again * Revert "Try removing line_profiler" This reverts commit 837854bec99a9d25aece2ead9666f01690d228cc. * Update release.rst --------- Co-authored-by: Joe Hamman Co-authored-by: jakirkham --- .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- .github/workflows/python-package.yml | 8 ++++---- .github/workflows/releases.yml | 2 +- .github/workflows/windows-testing.yml | 2 +- docs/release.rst | 3 +++ environment.yml | 2 +- pyproject.toml | 5 ++--- 7 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index ba05f23fcc..ec98af029e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -27,7 +27,7 @@ body: attributes: label: Python Version description: Version of Python interpreter - placeholder: 3.8.5, 3.9, 3.10, etc. + placeholder: 3.9, 3.10, 3.11, etc. validations: required: true - type: input diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0c3c49d78d..d74df9ce67 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,13 +15,13 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] - numpy_version: ['>=1.22.0', '==1.20.*'] + python-version: ['3.9', '3.10', '3.11'] + numpy_version: ['>=1.22.0', '==1.21.*'] exclude: - python-version: '3.10' - numpy_version: '==1.20.*' + numpy_version: '==1.21.*' - python-version: '3.11' - numpy_version: '==1.20.*' + numpy_version: '==1.21.*' services: redis: image: redis diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 8d8512294d..31a7e2770c 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-python@v5.0.0 name: Install Python with: - python-version: '3.8' + python-version: '3.9' - name: Install PyBuild run: | diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index eeee5b704d..5c3252c0ba 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: True matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v4 with: diff --git a/docs/release.rst b/docs/release.rst index c18e0b8c20..a3e0831ba4 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -49,6 +49,9 @@ Docs Maintenance ~~~~~~~~~~~ +* Drop Python 3.8 and NumPy 1.20 + By :user:`Josh Moore `; :issue:`1557`. + * Cache result of ``FSStore._fsspec_installed()``. By :user:`Janick Martinez Esturo ` :issue:`1581`. diff --git a/environment.yml b/environment.yml index dc99507427..ff2f9eedef 100644 --- a/environment.yml +++ b/environment.yml @@ -4,7 +4,7 @@ channels: dependencies: - wheel - numcodecs >= 0.6.4 - - numpy >= 1.20 + - numpy >= 1.21 - pip - pip: - asciitree diff --git a/pyproject.toml b/pyproject.toml index 33e8573830..a85e49e82c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,10 +10,10 @@ readme = { file = "README.md", content-type = "text/markdown" } maintainers = [ { name = "Alistair Miles", email = "alimanfoo@googlemail.com" } ] -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ 'asciitree', - 'numpy>=1.20,!=1.21.0', + 'numpy>=1.21.1', 'fasteners', 'numcodecs>=0.10.0', ] @@ -30,7 +30,6 @@ classifiers = [ 'Topic :: Software Development :: Libraries :: Python Modules', 'Operating System :: Unix', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', From 6ad464bb04bffa83b9665dd09caf0f8aaf6b367d Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 10 Jan 2024 11:59:46 -0800 Subject: [PATCH 180/213] Add Norman Rzepka to core-dev team (#1630) --- TEAM.md | 1 + 1 file changed, 1 insertion(+) diff --git a/TEAM.md b/TEAM.md index a95885ebe5..6a22d83d1f 100644 --- a/TEAM.md +++ b/TEAM.md @@ -6,6 +6,7 @@ - @d-v-b (Davis Bennett) - @jakirkham (jakirkham) - @martindurant (Martin Durant) +- @normanrz (Norman Rzepka) ## Emeritus core-developers - @alimanfoo (Alistair Miles) From a292dc43f8d0181214ded83124ebd4f85db0ff50 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jan 2024 11:14:02 -0800 Subject: [PATCH 181/213] chore: update pre-commit hooks (#1633) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.1.11 → v0.1.13](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.11...v0.1.13) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 340366ef53..7d1f9254ae 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ default_language_version: repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.1.11' + rev: 'v0.1.13' hooks: - id: ruff - repo: https://github.com/psf/black From 68c87bb51d922487647fa6188392caf8c1d9a83c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Jan 2024 20:33:42 +0100 Subject: [PATCH 182/213] Bump actions/download-artifact from 3 to 4 (#1611) * Bump actions/download-artifact from 3 to 4 Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Also bump upload-artifact see https://github.com/actions/download-artifact?tab=readme-ov-file#breaking-changes > Downloading artifacts that were created from action/upload-artifact@v3 and below are not supported. --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Joe Hamman Co-authored-by: Josh Moore --- .github/workflows/releases.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 31a7e2770c..250c6112c8 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -36,7 +36,7 @@ jobs: else echo "All seem good" fi - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: releases path: dist @@ -45,7 +45,7 @@ jobs: needs: [build_artifacts] runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist @@ -60,7 +60,7 @@ jobs: runs-on: ubuntu-latest if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: releases path: dist From 1d56da0eb54f64840b1fb0f42c72622233f2f1f6 Mon Sep 17 00:00:00 2001 From: Jeff Peck Date: Tue, 16 Jan 2024 07:00:17 -0500 Subject: [PATCH 183/213] Update tutorial.rst to include section about accessing Zip Files on S3 (#1615) * Update tutorial.rst to include section about accessing Zip Files on S3 Per discussion here, add information about about accessing zip files on s3: https://github.com/zarr-developers/zarr-python/discussions/1613 * Update release.rst * Implement d-v-b's suggestions --------- Co-authored-by: Davis Bennett Co-authored-by: Josh Moore --- docs/release.rst | 2 ++ docs/tutorial.rst | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index a3e0831ba4..ab74a3debd 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -45,6 +45,8 @@ Docs * Minor tweak to advanced indexing tutorial examples. By :user:`Ross Barnowski ` :issue:`1550`. +* Added section about accessing zip files that are on s3. + By :user:`Jeff Peck ` :issue:`1613`. Maintenance ~~~~~~~~~~~ diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 4099bac1c8..351eef064a 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -1000,6 +1000,31 @@ separately from Zarr. .. _tutorial_copy: +Accessing Zip Files on S3 +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The built-in `ZipStore` will only work with paths on the local file-system, however +it is also possible to access ``.zarr.zip`` data on the cloud. Here is an example of +accessing a zipped Zarr file on s3: + + >>> s3_path = "s3://path/to/my.zarr.zip" + >>> + >>> s3 = s3fs.S3FileSystem() + >>> f = s3.open(s3_path) + >>> fs = ZipFileSystem(f, mode="r") + >>> store = FSMap("", fs, check=False) + >>> + >>> # cache is optional, but may be a good idea depending on the situation + >>> cache = zarr.storage.LRUStoreCache(store, max_size=2**28) + >>> z = zarr.group(store=cache) + +This store can also be generated with ``fsspec``'s handler chaining, like so: + + >>> store = zarr.storage.FSStore(url=f"zip::{s3_path}", mode="r") + +This can be especially useful if you have a very large ``.zarr.zip`` file on s3 +and only need to access a small portion of it. + Consolidating metadata ~~~~~~~~~~~~~~~~~~~~~~ From 8ac8553f25eb338d6044d1232b4a643036979486 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 16 Jan 2024 09:14:10 -0800 Subject: [PATCH 184/213] doc(v3): add v3 roadmap and design document (#1583) * doc(v3): add v3 roadmap and design document * Update v3-roadmap-and-design.md * updates after latest round of reviews * Update v3-roadmap-and-design.md Co-authored-by: Norman Rzepka * Update v3-roadmap-and-design.md Co-authored-by: Sanket Verma --------- Co-authored-by: Norman Rzepka Co-authored-by: Sanket Verma --- v3-roadmap-and-design.md | 429 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 429 insertions(+) create mode 100644 v3-roadmap-and-design.md diff --git a/v3-roadmap-and-design.md b/v3-roadmap-and-design.md new file mode 100644 index 0000000000..696799e56f --- /dev/null +++ b/v3-roadmap-and-design.md @@ -0,0 +1,429 @@ +# Zarr Python Roadmap + +- Status: draft +- Author: Joe Hamman +- Created On: October 31, 2023 +- Input from: + - Davis Bennett / @d-v-b + - Norman Rzepka / @normanrz + - Deepak Cherian @dcherian + - Brian Davis / @monodeldiablo + - Oliver McCormack / @olimcc + - Ryan Abernathey / @rabernat + - Jack Kelly / @JackKelly + - Martin Durrant / @martindurant + +## Introduction + +This document lays out a design proposal for version 3.0 of the [Zarr-Python](https://zarr.readthedocs.io/en/stable/) package. A specific focus of the design is to bring Zarr-Python's API up to date with the [Zarr V3 specification](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html), with the hope of enabling the development of the many features and extensions that motivated the V3 Spec. The ideas presented here are expected to result in a major release of Zarr-Python (version 3.0) including significant a number of breaking API changes. +For clarity, “V3” will be used to describe the version of the Zarr specification and “3.0” will be used to describe the release tag of the Zarr-Python project. + +### Current status of V3 in Zarr-Python + +During the development of the V3 Specification, a [prototype implementation](https://github.com/zarr-developers/zarr-python/pull/898) was added to the Zarr-Python library. Since that implementation, the V3 spec evolved in significant ways and as a result, the Zarr-Python library is now out of sync with the approved spec. Downstream libraries (e.g. [Xarray](https://github.com/pydata/xarray)) have added support for this implementation and will need to migrate to the accepted spec when its available in Zarr-Python. + +## Goals + +- Provide a complete implementation of Zarr V3 through the Zarr-Python API +- Clear the way for exciting extensions / ZEPs (i.e. [sharding](https://zarr-specs.readthedocs.io/en/latest/v3/codecs/sharding-indexed/v1.0.html), [variable chunking](https://zarr.dev/zeps/draft/ZEP0003.html), etc.) +- Provide a developer API that can be used to implement and register V3 extensions +- Improve the performance of Zarr-Python by streamlining the interface between the Store layer and higher level APIs (e.g. Groups and Arrays) +- Clean up the internal and user facing APIs +- Improve code quality and robustness (e.g. achieve 100% type hint coverage) +- Align the Zarr-Python array API with the [array API Standard](https://data-apis.org/array-api/latest/) + +## Examples of what 3.0 will enable? +1. Reading and writing V3 spec-compliant groups and arrays +2. V3 extensions including sharding and variable chunking. +3. Improved performance by leveraging concurrency when creating/reading/writing to stores (imagine a `create_hierarchy(zarr_objects)` function). +4. User-developed extensions (e.g. storage-transformers) can be registered with Zarr-Python at runtime + +## Non-goals (of this document) + +- Implementation of any unaccepted Zarr V3 extensions +- Major revisions to the Zarr V3 spec + +## Requirements + +1. Read and write spec compliant V2 and V3 data +2. Limit unnecessary traffic to/from the store +3. Cleanly define the Array/Group/Store abstractions +4. Cleanly define how V2 will be supported going forward +5. Provide a clear roadmap to help users upgrade to 3.0 +6. Developer tools / hooks for registering extensions + +## Design + +### Async API + +Zarr-Python is an IO library. As such, supporting concurrent action against the storage layer is critical to achieving acceptable performance. The Zarr-Python 2 was not designed with asynchronous computation in mind and as a result has struggled to effectively leverage the benefits of concurrency. At one point, `getitems` and `setitems` support was added to the Zarr store model but that is only used for operating on a set of chunks in a single variable. + +With Zarr-Python 3.0, we have the opportunity to revisit this design. The proposal here is as follows: + +1. The `Store` interface will be entirely async. +2. On top of the async `Store` interface, we will provide an `AsyncArray` and `AsyncGroup` interface. +3. Finally, the primary user facing API will be synchronous `Array` and `Group` classes that wrap the async equivalents. + +**Examples** + +- **Store** + + ```python + class Store: + ... + async def get(self, key: str) -> bytes: + ... + async def get_partial_values(self, key_ranges: List[Tuple[str, Tuple[int, Optional[int]]]]) -> bytes: + ... + # (no sync interface here) + ``` +- **Array** + + ```python + class AsyncArray: + ... + + async def getitem(self, selection: Selection) -> np.ndarray: + # the core logic for getitem goes here + + class Array: + _async_array: AsyncArray + + def __getitem__(self, selection: Selection) -> np.ndarray: + return sync(self._async_array.getitem(selection)) + ``` +- **Group** + + ```python + class AsyncGroup: + ... + + async def create_group(self, path: str, **kwargs) -> AsyncGroup: + # the core logic for create_group goes here + + class Group: + _async_group: AsyncGroup + + def create_group(self, path: str, **kwargs) -> Group: + return sync(self._async_group.create_group(path, **kwargs)) + ``` +**Internal Synchronization API** + +With the `Store` and core `AsyncArray`/ `AsyncGroup` classes being predominantly async, Zarr-Python will need an internal API to provide a synchronous API. The proposal here is to use the approach in [fsspec](https://github.com/fsspec/filesystem_spec/blob/master/fsspec/asyn.py) to provide a high-level `sync` function that takes an `awaitable` and runs it in its managed IO Loop / thread. + +**FAQ** +1. Why two levels of Arrays/groups? + a. First, this is an intentional decision and departure from the current Zarrita implementation + b. The idea is that users rarely want to mix interfaces. Either they are working within an async context (currently quite rare) or they are in a typical synchronous context. + c. Splitting the two will allow us to clearly define behavior on the `AsyncObj` and simply wrap it in the `SyncObj`. +2. What if a store is only has a synchronous backend? + a. First off, this is expected to be a fairly rare occurrence. Most storage backends have async interfaces. + b. But in the event a storage backend doesn’t have a async interface, there is nothing wrong with putting synchronous code in `async` methods. There are approaches to enabling concurrent action through wrappers like AsyncIO's `loop.run_in_executor` ([ref 1](https://stackoverflow.com/questions/38865050/is-await-in-python3-cooperative-multitasking ), [ref 2](https://stackoverflow.com/a/43263397/732596), [ref 3](https://bbc.github.io/cloudfit-public-docs/asyncio/asyncio-part-5.html), [ref 4](https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.run_in_executor). +3. Will Zarr help manage the async contexts encouraged by some libraries (e.g. [AioBotoCore](https://aiobotocore.readthedocs.io/en/latest/tutorial.html#using-botocore))? + a. Many async IO libraries require entering an async context before interacting with the API. We expect some experimentation to be needed here but the initial design will follow something close to what fsspec does ([example in s3fs](https://github.com/fsspec/s3fs/blob/949442693ec940b35cda3420c17a864fbe426567/s3fs/core.py#L527)). +4. Why not provide a synchronous Store interface? + a. We could but this design is simpler. It would mean supporting it in the `AsyncGroup` and `AsyncArray` classes which, may be more trouble than its worth. Storage backends that do not have an async API will be encouraged to wrap blocking calls in an async wrapper (e.g. `loop.run_in_executor`). + +### Store API + +The `Store` API is specified directly in the V3 specification. All V3 stores should implement this abstract API, omitting Write and List support as needed. As described above, all stores will be expected to expose the required methods as async methods. + +**Example** + +```python +class ReadWriteStore: + ... + async def get(self, key: str) -> bytes: + ... + + async def get_partial_values(self, key_ranges: List[Tuple[str, int, int]) -> bytes: + ... + + async def set(self, key: str, value: Union[bytes, bytearray, memoryview]) -> None: + ... # required for writable stores + + async def set_partial_values(self, key_start_values: List[Tuple[str, int, Union[bytes, bytearray, memoryview]]]) -> None: + ... # required for writable stores + + async def list(self) -> List[str]: + ... # required for listable stores + + async def list_prefix(self, prefix: str) -> List[str]: + ... # required for listable stores + + async def list_dir(self, prefix: str) -> List[str]: + ... # required for listable stores + + # additional (optional methods) + async def getsize(self, prefix: str) -> int: + ... + + async def rename(self, src: str, dest: str) -> None + ... + +``` + +Recognizing that there are many Zarr applications today that rely on the `MutableMapping` interface supported by Zarr-Python 2, a wrapper store will be developed to allow existing stores to plug directly into this API. + +### Array API + +The user facing array interface will implement a subset of the [Array API Standard](https://data-apis.org/array-api/latest/). Most of the computational parts of the Array API Standard don’t fit into Zarr right now. That’s okay. What matters most is that we ensure we can give downstream applications a compliant API. + +*Note, Zarr already does most of this so this is more about formalizing the relationship than a substantial change in API.* + +| | Included | Not Included | Unknown / Maybe possible? | +| --- | --- | --- | --- | +| Attributes | `dtype` | `mT` | `device` | +| | `ndim` | `T` | | +| | `shape` | | | +| | `size` | | | +| Methods | `__getitem__` | `__array_namespace__` | `to_device` | +| | `__setitem__` | `__abs__` | `__bool__` | +| | `__eq__` | `__add__` | `__complex__` | +| | `__bool__` | `__and__` | `__dlpack__` | +| | | `__floordiv__` | `__dlpack_device__` | +| | | `__ge__` | `__float__` | +| | | `__gt__` | `__index__` | +| | | `__invert__` | `__int__` | +| | | `__le__` | | +| | | `__lshift__` | | +| | | `__lt__` | | +| | | `__matmul__` | | +| | | `__mod__` | | +| | | `__mul__` | | +| | | `__ne__` | | +| | | `__neg__` | | +| | | `__or__` | | +| | | `__pos__` | | +| | | `__pow__` | | +| | | `__rshift__` | | +| | | `__sub__` | | +| | | `__truediv__` | | +| | | `__xor__` | | +| Creation functions (`zarr.creation`) | `zeros` | | `arange` | +| | `zeros_like` | | `asarray` | +| | `ones` | | `eye` | +| | `ones_like` | | `from_dlpack` | +| | `full` | | `linspace` | +| | `full_like` | | `meshgrid` | +| | `empty` | | `tril` | +| | `empty_like` | | `triu` | + +In addition to the core array API defined above, the Array class should have the following Zarr specific properties: + +- `.metadata` (see Metadata Interface below) +- `.attrs` - (pull from metadata object) +- `.info` - (pull from existing property †) + +*† In Zarr-Python 2, the info property lists the store to identify initialized chunks. By default this will be turned off in 3.0 but will be configurable.* + +**Indexing** + +Zarr-Python currently supports `__getitem__` style indexing and the special `oindex` and `vindex` indexers. These are not part of the current Array API standard (see [data-apis/array-api\#669](https://github.com/data-apis/array-api/issues/669)) but they have been [proposed as a NEP](https://numpy.org/neps/nep-0021-advanced-indexing.html). Zarr-Python will maintain these in 3.0. + +We are also exploring a new high-level indexing API that will enabled optimized batch/concurrent loading of many chunks. We expect this to be important to enable performant loading of data in the context of sharding. See [this discussion](https://github.com/zarr-developers/zarr-python/discussions/1569) for more detail. + +Concurrent indexing across multiple arrays will be possible using the AsyncArray API. + +**Async and Sync Array APIs** + +Most the logic to support Zarr Arrays will live in the `AsyncArray` class. There are a few notable differences that should be called out. + +| Sync Method | Async Method | +| --- | --- | +| `__getitem__` | `getitem` | +| `__setitem__` | `setitem` | +| `__eq__` | `equals` | + +**Metadata interface** + +Zarr-Python 2.* closely mirrors the V2 spec metadata schema in the Array and Group classes. In 3.0, we plan to move the underlying metadata representation to a separate interface (e.g. `Array.metadata`). This interface will return either a `V2ArrayMetadata` or `V3ArrayMetadata` object (both will inherit from a parent `ArrayMetadataABC` class. The `V2ArrayMetadata` and `V3ArrayMetadata` classes will be responsible for producing valid JSON representations of their metadata, and yielding a consistent view to the `Array` or `Group` class. + +### Group API + +The main question is how closely we should follow the existing Zarr-Python implementation / `MutableMapping` interface. The table below shows the primary `Group` methods in Zarr-Python 2 and attempts to identify if and how they would be implemented in 3.0. + +| V2 Group Methods | `AsyncGroup` | `Group` | `h5py_compat.Group`` | +| --- | --- | --- | --- | +| `__len__` | `length` | `__len__` | `__len__` | +| `__iter__` | `__aiter__` | `__iter__` | `__iter__` | +| `__contains__` | `contains` | `__contains__` | `__contains__` | +| `__getitem__` | `getitem` | `__getitem__` | `__getitem__` | +| `__enter__` | N/A | N/A | `__enter__` | +| `__exit__` | N/A | N/A | `__exit__` | +| `group_keys` | `group_keys` | `group_keys` | N/A | +| `groups` | `groups` | `groups` | N/A | +| `array_keys` | `array_key` | `array_keys` | N/A | +| `arrays` | `arrays`* | `arrays` | N/A | +| `visit` | ? | ? | `visit` | +| `visitkeys` | ? | ? | ? | +| `visitvalues` | ? | ? | ? | +| `visititems` | ? | ? | `visititems` | +| `tree` | `tree` | `tree` | `Both` | +| `create_group` | `create_group` | `create_group` | `create_group` | +| `require_group` | N/A | N/A | `require_group` | +| `create_groups` | ? | ? | N/A | +| `require_groups` | ? | ? | ? | +| `create_dataset` | N/A | N/A | `create_dataset` | +| `require_dataset` | N/A | N/A | `require_dataset` | +| `create` | `create_array` | `create_array` | N/A | +| `empty` | `empty` | `empty` | N/A | +| `zeros` | `zeros` | `zeros` | N/A | +| `ones` | `ones` | `ones` | N/A | +| `full` | `full` | `full` | N/A | +| `array` | `create_array` | `create_array` | N/A | +| `empty_like` | `empty_like` | `empty_like` | N/A | +| `zeros_like` | `zeros_like` | `zeros_like` | N/A | +| `ones_like` | `ones_like` | `ones_like` | N/A | +| `full_like` | `full_like` | `full_like` | N/A | +| `move` | `move` | `move` | `move` | + +**`zarr.h5compat.Group`** + +Zarr-Python 2.* made an attempt to align its API with that of [h5py](https://docs.h5py.org/en/stable/index.html). With 3.0, we will relax this alignment in favor of providing an explicit compatibility module (`zarr.h5py_compat`). This module will expose the `Group` and `Dataset` APIs that map to Zarr-Python’s `Group` and `Array` objects. + +### Creation API + +Zarr-Python 2.* bundles together the creation and serialization of Zarr objects. Zarr-Python 3.* will make it possible to create objects in memory separate from serializing them. This will specifically enable writing hierarchies of Zarr objects in a single batch step. For example: + +```python + +arr1 = Array(shape=(10, 10), path="foo/bar", dtype="i4", store=store) +arr2 = Array(shape=(10, 10), path="foo/spam", dtype="f8", store=store) + +arr1.save() +arr2.save() + +# or equivalently + +zarr.save_many([arr1 ,arr2]) +``` + +*Note: this batch creation API likely needs additional design effort prior to implementation.* + +### Plugin API + +Zarr V3 was designed to be extensible at multiple layers. Zarr-Python will support these extensions through a combination of [Abstract Base Classes](https://docs.python.org/3/library/abc.html) (ABCs) and [Entrypoints](https://packaging.python.org/en/latest/specifications/entry-points/). + +**ABCs** + +Zarr V3 will expose Abstract base classes for the following objects: + +- `Store`, `ReadStore`, `ReadWriteStore`, `ReadListStore`, and `ReadWriteListStore` +- `BaseArray`, `SynchronousArray`, and `AsynchronousArray` +- `BaseGroup`, `SynchronousGroup`, and `AsynchronousGroup` +- `Codec`, `ArrayArrayCodec`, `ArrayBytesCodec`, `BytesBytesCodec` + +**Entrypoints** + +Lots more thinking here but the idea here is to provide entrypoints for `data type`, `chunk grid`, `chunk key encoding`, `codecs`, `storage_transformers` and `stores`. These might look something like: + +``` +entry_points=""" + [zarr.codecs] + blosc_codec=codec_plugin:make_blosc_codec + zlib_codec=codec_plugin:make_zlib_codec +""" +``` + +### Python type hints and static analysis + +Target 100% Mypy coverage in 3.0 source. + +### Observability + +A persistent problem in Zarr-Python is diagnosing problems that span many parts of the stack. To address this in 3.0, we will add a basic logging framework that can be used to debug behavior at various levels of the stack. We propose to add the separate loggers for the following namespaces: + +- `array` +- `group` +- `store` +- `codec` + +These should be documented such that users know how to activate them and developers know how to use them when developing extensions. + +### Dependencies + +Today, Zarr-Python has the following required dependencies: + +```python +dependencies = [ + 'asciitree', + 'numpy>=1.20,!=1.21.0', + 'fasteners', + 'numcodecs>=0.10.0', +] +``` + +What other dependencies should be considered? + +1. Attrs - Zarrita makes extensive use of the Attrs library +2. Fsspec - Zarrita has a hard dependency on Fsspec. This could be easily relaxed though. + +## Breaking changes relative to Zarr-Python 2.* + +1. H5py compat moved to a stand alone module? +2. `Group.__getitem__` support moved to `Group.members.__getitem__`? +3. Others? + +## Open questions + +1. How to treat V2 + a. Note: Zarrita currently implements a separate `V2Array` and `V3Array` classes. This feels less than ideal. + b. We could easily convert metadata from v2 to the V3 Array, but what about writing? + c. Ideally, we don’t have completely separate code paths. But if its too complicated to support both within one interface, its probably better. +2. How and when to remove the current implementation of V3. + a. It's hidden behind a hard-to-use feature flag so we probably don't need to do anything. +4. How to model runtime configuration? +5. Which extensions belong in Zarr-Python and which belong in separate packages? + a. We don't need to take a strong position on this here. It's likely that someone will want to put Sharding in. That will be useful to develop in parallel because it will give us a good test case for the plugin interface. + +## Testing + +Zarr-python 3.0 adds a major new dimension to Zarr: Async support. This also comes with a compatibility risk, we will need to thoroughly test support in key execution environments. Testing plan: +- Reuse the existing test suite for testing the `v3` API. + - `xfail` tests that expose breaking changes with `3.0 - breaking change` description. This will help identify additional and/or unintentional breaking changes + - Rework tests that were only testing internal APIs. +- Add a set of functional / integration tests targeting real-world workflows in various contexts (e.g. w/ Dask) + +## Development process + +Zarr-Python 3.0 will introduce a number of new APIs and breaking changes to existing APIs. In order to facilitate ongoing support for Zarr-Python 2.*, we will take on the following development process: + +- Create a `v3` branch that can be use for developing the core functionality apart from the `main` branch. This will allow us to support ongoing work and bug fixes on the `main` branch. +- Put the `3.0` APIs inside a `zarr.v3` module. Imports from this namespace will all be new APIs that users can develop and test against once the `v3` branch is merged to `main`. +- Kickstart the process by pulling in the current state of `zarrita` - which has many of the features described in this design. +- Release a series of 2.* releases with the `v3` namespace +- When `v3` is complete, move contents of `v3` to the package root + +**Milestones** + +Below are a set of specific milestones leading toward the completion of this process. As work begins, we expect this list to grow in specificity. + +1. Port current version of Zarrita to Zarr-Python +2. Formalize Async interface by splitting `Array` and `Group` objects into Sync and Async versions +4. Implement "fancy" indexing operations on the `AsyncArray` +6. Implement an abstract base class for the `Store` interface and a wrapper `Store` to make use of existing `MutableMapping` stores. +7. Rework the existing unit test suite to use the `v3` namespace. +8. Develop a plugin interface for extensions +9. Develop a set of functional and integration tests +10. Work with downstream libraries (Xarray, Dask, etc.) to test new APIs + +## TODOs + +The following subjects are not covered in detail above but perhaps should be. Including them here so they are not forgotten. + +1. [Store] Should Zarr provide an API for caching objects after first read/list/etc. Read only stores? +2. [Array] buffer protocol support +3. [Array] `meta_array` support +4. [Extensions] Define how Zarr-Python will consume the various plugin types +5. [Misc] H5py compatibility requires a bit more work and a champion to drive it forward. +6. [Misc] Define `chunk_store` API in 3.0 +7. [Misc] Define `synchronizer` API in 3.0 + +## References + +1. [Zarr-Python repository](https://github.com/zarr-developers/zarr-python) +2. [Zarr core specification (version 3.0) — Zarr specs documentation](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#) +3. [Zarrita repository](https://github.com/scalableminds/zarrita) +4. [Async-Zarr](https://github.com/martindurant/async-zarr) +5. [Zarr-Python Discussion Topic](https://github.com/zarr-developers/zarr-python/discussions/1569) From a81db0782535ba04c32c277102a6457d118a73e8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jan 2024 11:50:32 -0800 Subject: [PATCH 185/213] chore: update pre-commit hooks (#1636) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.1.13 → v0.1.14](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.13...v0.1.14) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7d1f9254ae..a7f48d7cd6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ default_language_version: repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.1.13' + rev: 'v0.1.14' hooks: - id: ruff - repo: https://github.com/psf/black From 4f2ace4b8708cf91f4bd29ae3ed210a3e66f235c Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 14 Feb 2024 04:09:41 -0800 Subject: [PATCH 186/213] Fix zarr sync (#1663) This patch removes fasteners and disables zarr.sync which uses process and thread Co-authored-by: Wei Ouyang --- pyproject.toml | 2 +- zarr/sync.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a85e49e82c..4da3079808 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ requires-python = ">=3.9" dependencies = [ 'asciitree', 'numpy>=1.21.1', - 'fasteners', + 'fasteners; sys_platform != "emscripten"', 'numcodecs>=0.10.0', ] dynamic = [ diff --git a/zarr/sync.py b/zarr/sync.py index 2e843f6557..03046a4a32 100644 --- a/zarr/sync.py +++ b/zarr/sync.py @@ -3,8 +3,6 @@ from threading import Lock from typing import Protocol -import fasteners - class Synchronizer(Protocol): """Base class for synchronizers.""" @@ -49,6 +47,8 @@ def __init__(self, path): self.path = path def __getitem__(self, item): + import fasteners + path = os.path.join(self.path, item) lock = fasteners.InterProcessLock(path) return lock From 0b0ac8857a52653fdb500cc9da7b51b0ec8a05b5 Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Thu, 15 Feb 2024 01:47:27 +0530 Subject: [PATCH 187/213] Update release.rst (#1621) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update release.rst * Update release.rst * Change 2.16.2 → 2.17.0 * Update moto for test_s3 * Skip bsddb3 tests to prevent warning failure * Fix more user warning tests * Fix even more user warning tests * Skip coverage for importorskips * Move to have_X skip method for deps * Update release.rst (PR#1663) * Fix test_core.py 'compile' issues * Add black formatting * Drop Windows/3.9 build due to unrelated failures * fix typo --------- Co-authored-by: Davis Bennett Co-authored-by: Josh Moore --- .github/workflows/windows-testing.yml | 2 +- docs/release.rst | 40 +++++++++++++++++++++++++++ requirements_dev_optional.txt | 2 +- zarr/tests/test_core.py | 28 +++++++++++++------ zarr/tests/test_storage.py | 2 +- zarr/tests/util.py | 24 ++++++++++++++++ 6 files changed, 87 insertions(+), 11 deletions(-) diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index 5c3252c0ba..0ef7f21758 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: True matrix: - python-version: ['3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] steps: - uses: actions/checkout@v4 with: diff --git a/docs/release.rst b/docs/release.rst index ab74a3debd..0f199aadd2 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,12 +18,20 @@ Release notes Unreleased ---------- +.. _release_2.17.0: + +2.17.0 +------ + Enhancements ~~~~~~~~~~~~ * Added type hints to ``zarr.creation.create()``. By :user:`David Stansby ` :issue:`1536`. +* Pyodide support: Don't require fasteners on Emscripten. + By :user:`Hood Chatham ` :issue:`1663`. + Docs ~~~~ @@ -45,9 +53,21 @@ Docs * Minor tweak to advanced indexing tutorial examples. By :user:`Ross Barnowski ` :issue:`1550`. +* Automatically document array members using sphinx-automodapi. + By :user:`David Stansby ` :issue:`1547`. + +* Add a markdown file documenting the current and former core-developer team. + By :user:`Joe Hamman ` :issue:`1628`. + +* Add Norman Rzepka to core-dev team. + By :user:`Joe Hamman ` :issue:`1630`. + * Added section about accessing zip files that are on s3. By :user:`Jeff Peck ` :issue:`1613`. +* Add V3 roadmap and design document. + By :user:`Joe Hamman ` :issue:`1583`. + Maintenance ~~~~~~~~~~~ @@ -75,6 +95,26 @@ Maintenance * Remove ``sphinx-rtd-theme`` dependency from ``pyproject.toml``. By :user:`Sanket Verma ` :issue:`1563`. +* Remove ``CODE_OF_CONDUCT.md`` file from the Zarr-Python repository. + By :user:`Sanket Verma ` :issue:`1572`. + +* Bump version of black in pre-commit. + By :user:`David Stansby ` :issue:`1559`. + +* Use list comprehension where applicable. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1555`. + +* Use format specification mini-language to format string. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1558`. + +* Single startswith() call instead of multiple ones. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1556`. + +* Move codespell options around. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1196`. + +* Remove unused mypy ignore comments. + By :user:`David Stansby ` :issue:`1602`. .. _release_2.16.1: diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index b4de5fd515..d1ee5a891d 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -20,4 +20,4 @@ pytest-timeout==2.2.0 h5py==3.10.0 fsspec==2023.12.2 s3fs==2023.12.2 -moto[server]>=4.0.8 +moto[server]>=5.0.1 diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index a3fde4050d..cf15703497 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -73,7 +73,15 @@ ) from zarr.tests.test_storage_v3 import DummyStorageTransfomer from zarr.util import buffer_size -from zarr.tests.util import abs_container, skip_test_env_var, have_fsspec, mktemp +from zarr.tests.util import ( + abs_container, + have_bsddb3, + have_fsspec, + have_lmdb, + have_sqlite3, + mktemp, + skip_test_env_var, +) from zarr.types import DIMENSION_SEPARATOR # noinspection PyMethodMayBeStatic @@ -2038,9 +2046,11 @@ def test_nbytes_stored(self): pass # not implemented +@pytest.mark.skipif(have_bsddb3 is False, reason="needs bsddb3") class TestArrayWithDBMStoreBerkeleyDB(TestArray): def create_store(self): - bsddb3 = pytest.importorskip("bsddb3") + import bsddb3 + path = mktemp(suffix=".dbm") atexit.register(os.remove, path) store = DBMStore(path, flag="n", open=bsddb3.btopen) @@ -2050,9 +2060,9 @@ def test_nbytes_stored(self): pass # not implemented +@pytest.mark.skipif(have_lmdb is False, reason="needs lmdb") class TestArrayWithLMDBStore(TestArray): def create_store(self): - pytest.importorskip("lmdb") path = mktemp(suffix=".lmdb") atexit.register(atexit_rmtree, path) store = LMDBStore(path, buffers=True) @@ -2065,9 +2075,9 @@ def test_nbytes_stored(self): pass # not implemented +@pytest.mark.skipif(have_lmdb is False, reason="needs lmdb") class TestArrayWithLMDBStoreNoBuffers(TestArray): def create_store(self): - pytest.importorskip("lmdb") path = mktemp(suffix=".lmdb") atexit.register(atexit_rmtree, path) store = LMDBStore(path, buffers=False) @@ -2077,9 +2087,9 @@ def test_nbytes_stored(self): pass # not implemented +@pytest.mark.skipif(have_sqlite3 is False, reason="needs sqlite3") class TestArrayWithSQLiteStore(TestArray): def create_store(self): - pytest.importorskip("sqlite3") path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStore(path) @@ -2758,9 +2768,11 @@ def test_nbytes_stored(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") +@pytest.mark.skipif(have_bsddb3 is False, reason="needs bsddb3") class TestArrayWithDBMStoreV3BerkeleyDB(TestArrayV3): def create_store(self) -> DBMStoreV3: - bsddb3 = pytest.importorskip("bsddb3") + import bsddb3 + path = mktemp(suffix=".dbm") atexit.register(os.remove, path) store = DBMStoreV3(path, flag="n", open=bsddb3.btopen) @@ -2771,11 +2783,11 @@ def test_nbytes_stored(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") +@pytest.mark.skipif(have_lmdb is False, reason="needs lmdb") class TestArrayWithLMDBStoreV3(TestArrayV3): lmdb_buffers = True def create_store(self) -> LMDBStoreV3: - pytest.importorskip("lmdb") path = mktemp(suffix=".lmdb") atexit.register(atexit_rmtree, path) store = LMDBStoreV3(path, buffers=self.lmdb_buffers) @@ -2797,9 +2809,9 @@ def test_nbytes_stored(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") +@pytest.mark.skipif(have_sqlite3 is False, reason="needs sqlite3") class TestArrayWithSQLiteStoreV3(TestArrayV3): def create_store(self): - pytest.importorskip("sqlite3") path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStoreV3(path) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 25863749d8..e4e3d93f5f 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1396,7 +1396,7 @@ def s3(request): port = 5555 endpoint_uri = "http://127.0.0.1:%d/" % port proc = subprocess.Popen( - shlex.split("moto_server s3 -p %d" % port), + shlex.split("moto_server -p %d" % port), stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL, ) diff --git a/zarr/tests/util.py b/zarr/tests/util.py index b4f00f703d..b3c3249cab 100644 --- a/zarr/tests/util.py +++ b/zarr/tests/util.py @@ -69,6 +69,30 @@ def skip_test_env_var(name): have_fsspec = False +try: + import bsddb3 # noqa: F401 + + have_bsddb3 = True +except ImportError: # pragma: no cover + have_bsddb3 = False + + +try: + import lmdb # noqa: F401 + + have_lmdb = True +except ImportError: # pragma: no cover + have_lmdb = False + + +try: + import sqlite3 # noqa: F401 + + have_sqlite3 = True +except ImportError: # pragma: no cover + have_sqlite3 = False + + def abs_container(): from azure.core.exceptions import ResourceExistsError import azure.storage.blob as asb From e50b47196eb4e4071158baba22567713ad012837 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 14 Feb 2024 22:04:51 +0100 Subject: [PATCH 188/213] Bump numpy from 1.24.3 to 1.26.1 (#1543) Bumps [numpy](https://github.com/numpy/numpy) from 1.24.3 to 1.26.1. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst) - [Commits](https://github.com/numpy/numpy/compare/v1.24.3...v1.26.1) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Davis Bennett Co-authored-by: Josh Moore Co-authored-by: Joe Hamman --- requirements_dev_numpy.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_numpy.txt b/requirements_dev_numpy.txt index a6135bd831..c8c5f7d7ab 100644 --- a/requirements_dev_numpy.txt +++ b/requirements_dev_numpy.txt @@ -1,4 +1,4 @@ # Break this out into a separate file to allow testing against # different versions of numpy. This file should pin to the latest # numpy version. -numpy==1.24.3 +numpy==1.26.1 From 81bbb2e7f28d64335d835523057041f11cdc7843 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 Feb 2024 22:05:20 +0100 Subject: [PATCH 189/213] chore: update pre-commit hooks (#1642) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: update pre-commit hooks updates: - [github.com/astral-sh/ruff-pre-commit: v0.1.14 → v0.2.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.14...v0.2.1) - [github.com/psf/black: 23.12.1 → 24.2.0](https://github.com/psf/black/compare/23.12.1...24.2.0) * run black incl. comments for '...' --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Josh Moore --- .pre-commit-config.yaml | 4 ++-- zarr/convenience.py | 1 + zarr/core.py | 8 +++++--- zarr/indexing.py | 10 +++++----- zarr/n5.py | 1 + zarr/storage.py | 1 + zarr/sync.py | 1 + zarr/types.py | 1 + 8 files changed, 17 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a7f48d7cd6..c7d4f32c68 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,11 +8,11 @@ default_language_version: repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.1.14' + rev: 'v0.2.1' hooks: - id: ruff - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.2.0 hooks: - id: black - repo: https://github.com/codespell-project/codespell diff --git a/zarr/convenience.py b/zarr/convenience.py index 9c0deeea47..b4b8bb5293 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -1,4 +1,5 @@ """Convenience functions for storing and loading data.""" + import itertools import os import re diff --git a/zarr/core.py b/zarr/core.py index d22a9d79c3..5727afa884 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -2060,9 +2060,11 @@ def _process_chunk( index_selection = PartialChunkIterator(chunk_selection, self.chunks) for start, nitems, partial_out_selection in index_selection: expected_shape = [ - len(range(*partial_out_selection[i].indices(self.chunks[0] + 1))) - if i < len(partial_out_selection) - else dim + ( + len(range(*partial_out_selection[i].indices(self.chunks[0] + 1))) + if i < len(partial_out_selection) + else dim + ) for i, dim in enumerate(self.chunks) ] if isinstance(cdata, UncompressedPartialReadBufferV3): diff --git a/zarr/indexing.py b/zarr/indexing.py index 3042147ebb..5a2b7c0eb4 100644 --- a/zarr/indexing.py +++ b/zarr/indexing.py @@ -545,11 +545,11 @@ def ix_(selection, shape): # replace slice and int as these are not supported by numpy.ix_ selection = [ - slice_to_range(dim_sel, dim_len) - if isinstance(dim_sel, slice) - else [dim_sel] - if is_integer(dim_sel) - else dim_sel + ( + slice_to_range(dim_sel, dim_len) + if isinstance(dim_sel, slice) + else [dim_sel] if is_integer(dim_sel) else dim_sel + ) for dim_sel, dim_len in zip(selection, shape) ] diff --git a/zarr/n5.py b/zarr/n5.py index 44b44e69e2..c50c18f718 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -1,5 +1,6 @@ """This module contains a storage class and codec to support the N5 format. """ + import os import struct import sys diff --git a/zarr/storage.py b/zarr/storage.py index aa27e98e6f..a26dc636db 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -14,6 +14,7 @@ path) and a `getsize` method (return the size in bytes of a given value). """ + import atexit import errno import glob diff --git a/zarr/sync.py b/zarr/sync.py index 03046a4a32..ba1c5df5b3 100644 --- a/zarr/sync.py +++ b/zarr/sync.py @@ -8,6 +8,7 @@ class Synchronizer(Protocol): """Base class for synchronizers.""" def __getitem__(self, item): + # see subclasses ... diff --git a/zarr/types.py b/zarr/types.py index 1de270f25c..cc29a350f5 100644 --- a/zarr/types.py +++ b/zarr/types.py @@ -10,4 +10,5 @@ class MetaArray(Protocol): def __array_function__(self, func, types, args, kwargs): + # To be extended ... From 367848836535e02eecd92a11ef734dd944285615 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 14 Feb 2024 22:05:40 +0100 Subject: [PATCH 190/213] Bump ipywidgets from 8.1.0 to 8.1.1 (#1538) Bumps [ipywidgets](https://github.com/jupyter-widgets/ipywidgets) from 8.1.0 to 8.1.1. - [Release notes](https://github.com/jupyter-widgets/ipywidgets/releases) - [Commits](https://github.com/jupyter-widgets/ipywidgets/compare/8.1.0...8.1.1) --- updated-dependencies: - dependency-name: ipywidgets dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Davis Bennett Co-authored-by: Josh Moore --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index d1ee5a891d..0ac4922ce1 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -3,7 +3,7 @@ lmdb==1.4.1; sys_platform != 'win32' # optional library requirements for Jupyter ipytree==0.2.2 -ipywidgets==8.1.0 +ipywidgets==8.1.1 # optional library requirements for services # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) From 720bea687b444b2082638eb7edc3bb6a4f8fa805 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 14 Feb 2024 22:14:43 +0100 Subject: [PATCH 191/213] Proper argument for numpy.reshape (#1425) `numpy.reshape` not only accepts a tuple of ints, but also a simple int. Besides `(10)` is not a tuple and is identical to `10`, unlike `(10,)`. --- zarr/tests/test_indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py index af046e9d28..a3afc101c5 100644 --- a/zarr/tests/test_indexing.py +++ b/zarr/tests/test_indexing.py @@ -1632,7 +1632,7 @@ def test_set_selections_with_fields(): ), ( (slice(0, 10, 1),), - np.arange(0, 10).reshape((10)), + np.arange(0, 10).reshape(10), [(0, 10, (slice(0, 10, 1),))], ), ((0,), np.arange(0, 100).reshape((10, 10)), [(0, 10, (slice(0, 1, 1),))]), @@ -1644,7 +1644,7 @@ def test_set_selections_with_fields(): np.arange(0, 100).reshape((10, 10)), [(0, 1, (slice(0, 1, 1), slice(0, 1, 1)))], ), - ((0,), np.arange(0, 10).reshape((10)), [(0, 1, (slice(0, 1, 1),))]), + ((0,), np.arange(0, 10).reshape(10), [(0, 1, (slice(0, 1, 1),))]), pytest.param( (slice(5, 8, 1), slice(2, 4, 1), slice(0, 5, 1)), np.arange(2, 100002).reshape((10, 1, 10000)), From 74498538c180855172573f2983207f74674cbc1c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 15 Feb 2024 09:22:59 +0100 Subject: [PATCH 192/213] Bump ipywidgets from 8.1.1 to 8.1.2 (#1666) Bumps [ipywidgets](https://github.com/jupyter-widgets/ipywidgets) from 8.1.1 to 8.1.2. - [Release notes](https://github.com/jupyter-widgets/ipywidgets/releases) - [Commits](https://github.com/jupyter-widgets/ipywidgets/compare/8.1.1...8.1.2) --- updated-dependencies: - dependency-name: ipywidgets dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 0ac4922ce1..e94b814173 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -3,7 +3,7 @@ lmdb==1.4.1; sys_platform != 'win32' # optional library requirements for Jupyter ipytree==0.2.2 -ipywidgets==8.1.1 +ipywidgets==8.1.2 # optional library requirements for services # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) From 3db41760e18fb0a69b5066e8c7aba9752a8c474e Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 15 Feb 2024 10:54:23 +0100 Subject: [PATCH 193/213] docs: ZIP-related tweaks (#1641) * docs: use 'ZIP archive' instead of 'zip file'; clarify utility of caching in s3 + ZIP example; style * docs: update release notes, correct spelling of greg lee's name in past release notes, and fix markup in past release notes * docs: use 'ZIP archive' instead of 'zip file'; clarify utility of caching in s3 + ZIP example; style * docs: update release notes, correct spelling of greg lee's name in past release notes, and fix markup in past release notes --- docs/release.rst | 20 ++++++++++---------- docs/tutorial.rst | 27 ++++++++++++++------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 0f199aadd2..b73dcec34f 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -62,8 +62,8 @@ Docs * Add Norman Rzepka to core-dev team. By :user:`Joe Hamman ` :issue:`1630`. -* Added section about accessing zip files that are on s3. - By :user:`Jeff Peck ` :issue:`1613`. +* Added section about accessing ZIP archives on s3. + By :user:`Jeff Peck ` :issue:`1613`, :issue:`1615`, and :user:`Davis Bennett ` :issue:`1641`. * Add V3 roadmap and design document. By :user:`Joe Hamman ` :issue:`1583`. @@ -157,10 +157,10 @@ Maintenance By :user:`Davis Bennett ` :issue:`1462`. * Style the codebase with ``ruff`` and ``black``. - By :user:`Davis Bennett` :issue:`1459` + By :user:`Davis Bennett ` :issue:`1459` * Ensure that chunks is tuple of ints upon array creation. - By :user:`Philipp Hanslovsky` :issue:`1461` + By :user:`Philipp Hanslovsky ` :issue:`1461` .. _release_2.15.0: @@ -548,7 +548,7 @@ Maintenance By :user:`Saransh Chopra ` :issue:`1079`. * Remove option to return None from _ensure_store. - By :user:`Greggory Lee ` :issue:`1068`. + By :user:`Gregory Lee ` :issue:`1068`. * Fix a typo of "integers". By :user:`Richard Scott ` :issue:`1056`. @@ -566,7 +566,7 @@ Enhancements Since the format is not yet finalized, the classes and functions are not automatically imported into the regular `zarr` name space. Setting the `ZARR_V3_EXPERIMENTAL_API` environment variable will activate them. - By :user:`Greggory Lee `; :issue:`898`, :issue:`1006`, and :issue:`1007` + By :user:`Gregory Lee `; :issue:`898`, :issue:`1006`, and :issue:`1007` as well as by :user:`Josh Moore ` :issue:`1032`. * **Create FSStore from an existing fsspec filesystem**. If you have created @@ -688,7 +688,7 @@ Enhancements higher-level array creation and convenience functions still accept plain Python dicts or other mutable mappings for the ``store`` argument, but will internally convert these to a ``KVStore``. - By :user:`Greggory Lee `; :issue:`839`, :issue:`789`, and :issue:`950`. + By :user:`Gregory Lee `; :issue:`839`, :issue:`789`, and :issue:`950`. * Allow to assign array ``fill_values`` and update metadata accordingly. By :user:`Ryan Abernathey `, :issue:`662`. @@ -835,7 +835,7 @@ Bug fixes ~~~~~~~~~ * Fix FSStore.listdir behavior for nested directories. - By :user:`Greggory Lee `; :issue:`802`. + By :user:`Gregory Lee `; :issue:`802`. .. _release_2.9.4: @@ -919,7 +919,7 @@ Bug fixes By :user:`Josh Moore `; :issue:`781`. * avoid NumPy 1.21.0 due to https://github.com/numpy/numpy/issues/19325 - By :user:`Greggory Lee `; :issue:`791`. + By :user:`Gregory Lee `; :issue:`791`. Maintenance ~~~~~~~~~~~ @@ -931,7 +931,7 @@ Maintenance By :user:`Elliott Sales de Andrade `; :issue:`799`. * TST: add missing assert in test_hexdigest. - By :user:`Greggory Lee `; :issue:`801`. + By :user:`Gregory Lee `; :issue:`801`. .. _release_2.8.3: diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 351eef064a..1f7accab3a 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -774,7 +774,7 @@ the following code:: Any other compatible storage class could be used in place of :class:`zarr.storage.DirectoryStore` in the code examples above. For example, -here is an array stored directly into a Zip file, via the +here is an array stored directly into a ZIP archive, via the :class:`zarr.storage.ZipStore` class:: >>> store = zarr.ZipStore('data/example.zip', mode='w') @@ -798,12 +798,12 @@ Re-open and check that data have been written:: [42, 42, 42, ..., 42, 42, 42]], dtype=int32) >>> store.close() -Note that there are some limitations on how Zip files can be used, because items -within a Zip file cannot be updated in place. This means that data in the array +Note that there are some limitations on how ZIP archives can be used, because items +within a ZIP archive cannot be updated in place. This means that data in the array should only be written once and write operations should be aligned with chunk boundaries. Note also that the ``close()`` method must be called after writing any data to the store, otherwise essential records will not be written to the -underlying zip file. +underlying ZIP archive. Another storage alternative is the :class:`zarr.storage.DBMStore` class, added in Zarr version 2.2. This class allows any DBM-style database to be used for @@ -846,7 +846,7 @@ respectively require the `redis-py `_ and `pymongo `_ packages to be installed. For compatibility with the `N5 `_ data format, Zarr also provides -an N5 backend (this is currently an experimental feature). Similar to the zip storage class, an +an N5 backend (this is currently an experimental feature). Similar to the ZIP storage class, an :class:`zarr.n5.N5Store` can be instantiated directly:: >>> store = zarr.N5Store('data/example.n5') @@ -1000,12 +1000,13 @@ separately from Zarr. .. _tutorial_copy: -Accessing Zip Files on S3 -~~~~~~~~~~~~~~~~~~~~~~~~~ +Accessing ZIP archives on S3 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The built-in `ZipStore` will only work with paths on the local file-system, however -it is also possible to access ``.zarr.zip`` data on the cloud. Here is an example of -accessing a zipped Zarr file on s3: +The built-in :class:`zarr.storage.ZipStore` will only work with paths on the local file-system; however +it is possible to access ZIP-archived Zarr data on the cloud via the `ZipFileSystem `_ +class from ``fsspec``. The following example demonstrates how to access +a ZIP-archived Zarr group on s3 using `s3fs `_ and ``ZipFileSystem``: >>> s3_path = "s3://path/to/my.zarr.zip" >>> @@ -1014,7 +1015,7 @@ accessing a zipped Zarr file on s3: >>> fs = ZipFileSystem(f, mode="r") >>> store = FSMap("", fs, check=False) >>> - >>> # cache is optional, but may be a good idea depending on the situation + >>> # caching may improve performance when repeatedly reading the same data >>> cache = zarr.storage.LRUStoreCache(store, max_size=2**28) >>> z = zarr.group(store=cache) @@ -1022,7 +1023,7 @@ This store can also be generated with ``fsspec``'s handler chaining, like so: >>> store = zarr.storage.FSStore(url=f"zip::{s3_path}", mode="r") -This can be especially useful if you have a very large ``.zarr.zip`` file on s3 +This can be especially useful if you have a very large ZIP-archived Zarr array or group on s3 and only need to access a small portion of it. Consolidating metadata @@ -1161,7 +1162,7 @@ re-compression, and so should be faster. E.g.:: └── spam (100,) int64 >>> new_root['foo/bar/baz'][:] array([ 0, 1, 2, ..., 97, 98, 99]) - >>> store2.close() # zip stores need to be closed + >>> store2.close() # ZIP stores need to be closed .. _tutorial_strings: From d23683d21728d9be5a978719fbb75b1cb45b4441 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 16 Feb 2024 08:37:26 +0100 Subject: [PATCH 194/213] Bump numpy from 1.26.1 to 1.26.4 (#1669) Bumps [numpy](https://github.com/numpy/numpy) from 1.26.1 to 1.26.4. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst) - [Commits](https://github.com/numpy/numpy/compare/v1.26.1...v1.26.4) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_numpy.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_numpy.txt b/requirements_dev_numpy.txt index c8c5f7d7ab..d8d6c3d097 100644 --- a/requirements_dev_numpy.txt +++ b/requirements_dev_numpy.txt @@ -1,4 +1,4 @@ # Break this out into a separate file to allow testing against # different versions of numpy. This file should pin to the latest # numpy version. -numpy==1.26.1 +numpy==1.26.4 From 003ff33e70ce0a28411a7e9fde608354b1b8ee9b Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Fri, 16 Feb 2024 20:45:43 +0100 Subject: [PATCH 195/213] Change occurrences of % and format() to f-strings (#1423) Co-authored-by: Joe Hamman Co-authored-by: Josh Moore --- docs/release.rst | 3 + zarr/_storage/absstore.py | 6 +- zarr/_storage/store.py | 2 +- zarr/_storage/v3.py | 2 +- zarr/convenience.py | 44 +++++------ zarr/core.py | 20 ++--- zarr/creation.py | 4 +- zarr/errors.py | 4 +- zarr/hierarchy.py | 14 ++-- zarr/indexing.py | 37 +++++----- zarr/meta.py | 10 +-- zarr/meta_v1.py | 4 +- zarr/n5.py | 6 +- zarr/storage.py | 10 +-- zarr/tests/test_core.py | 10 +-- zarr/tests/test_meta.py | 146 ++++++++++++++----------------------- zarr/tests/test_storage.py | 6 +- zarr/util.py | 59 +++++++-------- 18 files changed, 167 insertions(+), 220 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index b73dcec34f..8ce4b2e33c 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,9 @@ Release notes Unreleased ---------- +* Change occurrences of % and format() to f-strings. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1423`. + .. _release_2.17.0: 2.17.0 diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index c9a113148c..b6b386f468 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -84,7 +84,7 @@ def __init__( blob_service_kwargs = blob_service_kwargs or {} client = ContainerClient( - "https://{}.blob.core.windows.net/".format(account_name), + f"https://{account_name}.blob.core.windows.net/", container, credential=account_key, **blob_service_kwargs, @@ -141,7 +141,7 @@ def __getitem__(self, key): try: return self.client.download_blob(blob_name).readall() except ResourceNotFoundError: - raise KeyError("Blob %s not found" % blob_name) + raise KeyError(f"Blob {blob_name} not found") def __setitem__(self, key, value): value = ensure_bytes(value) @@ -154,7 +154,7 @@ def __delitem__(self, key): try: self.client.delete_blob(self._append_path_to_prefix(key)) except ResourceNotFoundError: - raise KeyError("Blob %s not found" % key) + raise KeyError(f"Blob {key} not found") def __eq__(self, other): return ( diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 36b596769a..209f118534 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -227,7 +227,7 @@ def _validate_key(self, key: str): # TODO: Possibly allow key == ".zmetadata" too if we write a # consolidated metadata spec corresponding to this? ): - raise ValueError("keys starts with unexpected value: `{}`".format(key)) + raise ValueError(f"key starts with unexpected value: `{key}`") if key.endswith("/"): raise ValueError("keys may not end in /") diff --git a/zarr/_storage/v3.py b/zarr/_storage/v3.py index 32e78f7a34..56bae74361 100644 --- a/zarr/_storage/v3.py +++ b/zarr/_storage/v3.py @@ -569,7 +569,7 @@ def __init__(self, store: StoreLike, metadata_key=meta_root + "consolidated/.zme consolidated_format = meta.get("zarr_consolidated_format", None) if consolidated_format != 1: raise MetadataError( - "unsupported zarr consolidated metadata format: %s" % consolidated_format + f"unsupported zarr consolidated metadata format: {consolidated_format}" ) # decode metadata diff --git a/zarr/convenience.py b/zarr/convenience.py index b4b8bb5293..7ca5d426f0 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -259,7 +259,7 @@ def save_group(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): try: grp = _create_group(_store, path=path, overwrite=True, zarr_version=zarr_version) for i, arr in enumerate(args): - k = "arr_{}".format(i) + k = f"arr_{i}" grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) for k, arr in kwargs.items(): grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) @@ -499,7 +499,7 @@ def __init__(self, log): self.log_file = log else: raise TypeError( - "log must be a callable function, file path or " "file-like object, found %r" % log + f"log must be a callable function, file path or file-like object, found {log!r}" ) def __enter__(self): @@ -526,9 +526,9 @@ def _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied): message = "dry run: " else: message = "all done: " - message += "{:,} copied, {:,} skipped".format(n_copied, n_skipped) + message += f"{n_copied:,} copied, {n_skipped:,} skipped" if not dry_run: - message += ", {:,} bytes copied".format(n_bytes_copied) + message += f", {n_bytes_copied:,} bytes copied" log(message) @@ -657,9 +657,7 @@ def copy_store( # check if_exists parameter valid_if_exists = ["raise", "replace", "skip"] if if_exists not in valid_if_exists: - raise ValueError( - "if_exists must be one of {!r}; found {!r}".format(valid_if_exists, if_exists) - ) + raise ValueError(f"if_exists must be one of {valid_if_exists!r}; found {if_exists!r}") # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 @@ -720,20 +718,20 @@ def copy_store( if if_exists != "replace": if dest_key in dest: if if_exists == "raise": - raise CopyError("key {!r} exists in destination".format(dest_key)) + raise CopyError(f"key {dest_key!r} exists in destination") elif if_exists == "skip": do_copy = False # take action if do_copy: - log("copy {}".format(descr)) + log(f"copy {descr}") if not dry_run: data = source[source_key] n_bytes_copied += buffer_size(data) dest[dest_key] = data n_copied += 1 else: - log("skip {}".format(descr)) + log(f"skip {descr}") n_skipped += 1 # log a final message with a summary of what happened @@ -744,7 +742,7 @@ def copy_store( def _check_dest_is_group(dest): if not hasattr(dest, "create_dataset"): - raise ValueError("dest must be a group, got {!r}".format(dest)) + raise ValueError(f"dest must be a group, got {dest!r}") def copy( @@ -910,11 +908,9 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ # check if_exists parameter valid_if_exists = ["raise", "replace", "skip", "skip_initialized"] if if_exists not in valid_if_exists: - raise ValueError( - "if_exists must be one of {!r}; found {!r}".format(valid_if_exists, if_exists) - ) + raise ValueError(f"if_exists must be one of {valid_if_exists!r}; found {if_exists!r}") if dest_h5py and if_exists == "skip_initialized": - raise ValueError("{!r} can only be used when copying to zarr".format(if_exists)) + raise ValueError(f"{if_exists!r} can only be used when copying to zarr") # determine name to copy to if name is None: @@ -934,9 +930,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ exists = dest is not None and name in dest if exists: if if_exists == "raise": - raise CopyError( - "an object {!r} already exists in destination " "{!r}".format(name, dest.name) - ) + raise CopyError(f"an object {name!r} already exists in destination {dest.name!r}") elif if_exists == "skip": do_copy = False elif if_exists == "skip_initialized": @@ -947,7 +941,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ # take action if do_copy: # log a message about what we're going to do - log("copy {} {} {}".format(source.name, source.shape, source.dtype)) + log(f"copy {source.name} {source.shape} {source.dtype}") if not dry_run: # clear the way @@ -1015,7 +1009,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ n_copied += 1 else: - log("skip {} {} {}".format(source.name, source.shape, source.dtype)) + log(f"skip {source.name} {source.shape} {source.dtype}") n_skipped += 1 elif root or not shallow: @@ -1026,16 +1020,14 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ exists_array = dest is not None and name in dest and hasattr(dest[name], "shape") if exists_array: if if_exists == "raise": - raise CopyError( - "an array {!r} already exists in destination " "{!r}".format(name, dest.name) - ) + raise CopyError(f"an array {name!r} already exists in destination {dest.name!r}") elif if_exists == "skip": do_copy = False # take action if do_copy: # log action - log("copy {}".format(source.name)) + log(f"copy {source.name}") if not dry_run: # clear the way @@ -1078,7 +1070,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ n_copied += 1 else: - log("skip {}".format(source.name)) + log(f"skip {source.name}") n_skipped += 1 return n_copied, n_skipped, n_bytes_copied @@ -1327,7 +1319,7 @@ def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", ** store, storage_options=kwargs.get("storage_options"), mode=mode, zarr_version=zarr_version ) if mode not in {"r", "r+"}: - raise ValueError("invalid mode, expected either 'r' or 'r+'; found {!r}".format(mode)) + raise ValueError(f"invalid mode, expected either 'r' or 'r+'; found {mode!r}") path = kwargs.pop("path", None) if store._store_version == 2: diff --git a/zarr/core.py b/zarr/core.py index 5727afa884..c3184c6652 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -2396,11 +2396,11 @@ def _encode_chunk(self, chunk): def __repr__(self): t = type(self) - r = "<{}.{}".format(t.__module__, t.__name__) + r = f"<{t.__module__}.{t.__name__}" if self.name: - r += " %r" % self.name - r += " %s" % str(self.shape) - r += " %s" % self.dtype + r += f" {self.name!r}" + r += f" {str(self.shape)}" + r += f" {self.dtype}" if self._read_only: r += " read-only" r += ">" @@ -2436,11 +2436,11 @@ def info_items(self): def _info_items_nosync(self): def typestr(o): - return "{}.{}".format(type(o).__module__, type(o).__name__) + return f"{type(o).__module__}.{type(o).__name__}" def bytestr(n): if n > 2**10: - return "{} ({})".format(n, human_readable_size(n)) + return f"{n} ({human_readable_size(n)})" else: return str(n) @@ -2451,7 +2451,7 @@ def bytestr(n): items += [("Name", self.name)] items += [ ("Type", typestr(self)), - ("Data type", "%s" % self.dtype), + ("Data type", str(self.dtype)), ("Shape", str(self.shape)), ("Chunk shape", str(self.chunks)), ("Order", self.order), @@ -2461,7 +2461,7 @@ def bytestr(n): # filters if self.filters: for i, f in enumerate(self.filters): - items += [("Filter [%s]" % i, repr(f))] + items += [(f"Filter [{i}]", repr(f))] # compressor items += [("Compressor", repr(self.compressor))] @@ -2478,9 +2478,9 @@ def bytestr(n): if self.nbytes_stored > 0: items += [ ("No. bytes stored", bytestr(self.nbytes_stored)), - ("Storage ratio", "%.1f" % (self.nbytes / self.nbytes_stored)), + ("Storage ratio", f"{self.nbytes / self.nbytes_stored:.1f}"), ] - items += [("Chunks initialized", "{}/{}".format(self.nchunks_initialized, self.nchunks))] + items += [("Chunks initialized", f"{self.nchunks_initialized}/{self.nchunks}")] return items diff --git a/zarr/creation.py b/zarr/creation.py index d4f570895a..264715b040 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -287,7 +287,7 @@ def _kwargs_compat(compressor, fill_value, kwargs): compressor = compression else: - raise ValueError("bad value for compression: %r" % compression) + raise ValueError(f"bad value for compression: {compression!r}") # handle 'fillvalue' if "fillvalue" in kwargs: @@ -297,7 +297,7 @@ def _kwargs_compat(compressor, fill_value, kwargs): # ignore other keyword arguments for k in kwargs: - warn("ignoring keyword argument %r" % k) + warn(f"ignoring keyword argument {k!r}") return compressor, fill_value diff --git a/zarr/errors.py b/zarr/errors.py index 30c9b13d39..85789fbcbf 100644 --- a/zarr/errors.py +++ b/zarr/errors.py @@ -67,9 +67,7 @@ def __init__(self): def err_too_many_indices(selection, shape): - raise IndexError( - "too many indices for array; expected {}, got {}".format(len(shape), len(selection)) - ) + raise IndexError(f"too many indices for array; expected {len(shape)}, got {len(selection)}") class VindexInvalidSelectionError(_BaseZarrIndexError): diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 1cfea89c81..44af1d63d1 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -340,9 +340,9 @@ def __len__(self): def __repr__(self): t = type(self) - r = "<{}.{}".format(t.__module__, t.__name__) + r = f"<{t.__module__}.{t.__name__}" if self.name: - r += " %r" % self.name + r += f" {self.name!r}" if self._read_only: r += " read-only" r += ">" @@ -358,7 +358,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): def info_items(self): def typestr(o): - return "{}.{}".format(type(o).__module__, type(o).__name__) + return f"{type(o).__module__}.{type(o).__name__}" items = [] @@ -1157,17 +1157,15 @@ def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, **kwargs shape = normalize_shape(shape) if shape != a.shape: raise TypeError( - "shape do not match existing array; expected {}, got {}".format(a.shape, shape) + f"shape do not match existing array; expected {a.shape}, got {shape}" ) dtype = np.dtype(dtype) if exact: if dtype != a.dtype: - raise TypeError( - "dtypes do not match exactly; expected {}, got {}".format(a.dtype, dtype) - ) + raise TypeError(f"dtypes do not match exactly; expected {a.dtype}, got {dtype}") else: if not np.can_cast(dtype, a.dtype): - raise TypeError("dtypes ({}, {}) cannot be safely cast".format(dtype, a.dtype)) + raise TypeError(f"dtypes ({dtype}, {a.dtype}) cannot be safely cast") return a else: diff --git a/zarr/indexing.py b/zarr/indexing.py index 5a2b7c0eb4..9889fcadad 100644 --- a/zarr/indexing.py +++ b/zarr/indexing.py @@ -338,8 +338,8 @@ def __init__(self, selection, array): else: raise IndexError( - "unsupported selection item for basic indexing; " - "expected integer or slice, got {!r}".format(type(dim_sel)) + f"unsupported selection item for basic indexing; " + f"expected integer or slice, got {type(dim_sel)!r}" ) dim_indexers.append(dim_indexer) @@ -370,8 +370,8 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): # check shape if dim_sel.shape[0] != dim_len: raise IndexError( - "Boolean array has the wrong length for dimension; " - "expected {}, got {}".format(dim_len, dim_sel.shape[0]) + f"Boolean array has the wrong length for dimension; " + f"expected {dim_len}, got { dim_sel.shape[0]}" ) # store attributes @@ -610,9 +610,9 @@ def __init__(self, selection, array): else: raise IndexError( - "unsupported selection item for orthogonal indexing; " - "expected integer, slice, integer array or Boolean " - "array, got {!r}".format(type(dim_sel)) + f"unsupported selection item for orthogonal indexing; " + f"expected integer, slice, integer array or Boolean " + f"array, got {type(dim_sel)!r}" ) dim_indexers.append(dim_indexer) @@ -698,8 +698,8 @@ def __init__(self, selection, array): if dim_sel.step not in {1, None}: raise IndexError( - "unsupported selection item for block indexing; " - "expected integer or slice with step=1, got {!r}".format(type(dim_sel)) + f"unsupported selection item for block indexing; " + f"expected integer or slice with step=1, got {type(dim_sel)!r}" ) # Can't reuse wraparound_indices because it expects a numpy array @@ -715,8 +715,8 @@ def __init__(self, selection, array): else: raise IndexError( - "unsupported selection item for block indexing; " - "expected integer or slice, got {!r}".format(type(dim_sel)) + f"unsupported selection item for block indexing; " + f"expected integer or slice, got {type(dim_sel)!r}" ) dim_indexer = SliceDimIndexer(slice_, dim_len, dim_chunk_size) @@ -782,9 +782,9 @@ def __init__(self, selection, array): # validation if not is_coordinate_selection(selection, array): raise IndexError( - "invalid coordinate selection; expected one integer " - "(coordinate) array per dimension of the target array, " - "got {!r}".format(selection) + f"invalid coordinate selection; expected one integer " + f"(coordinate) array per dimension of the target array, " + f"got {selection!r}" ) # handle wraparound, boundscheck @@ -874,8 +874,8 @@ def __init__(self, selection, array): # validation if not is_mask_selection(selection, array): raise IndexError( - "invalid mask selection; expected one Boolean (mask)" - "array with the same shape as the target array, got {!r}".format(selection) + f"invalid mask selection; expected one Boolean (mask)" + f"array with the same shape as the target array, got {selection!r}" ) # convert to indices @@ -919,8 +919,7 @@ def check_fields(fields, dtype): # check type if not isinstance(fields, (str, list, tuple)): raise IndexError( - "'fields' argument must be a string or list of strings; found " - "{!r}".format(type(fields)) + f"'fields' argument must be a string or list of strings; found " f"{type(fields)!r}" ) if fields: if dtype.names is None: @@ -933,7 +932,7 @@ def check_fields(fields, dtype): # multiple field selection out_dtype = np.dtype([(f, dtype[f]) for f in fields]) except KeyError as e: - raise IndexError("invalid 'fields' argument, field not found: {!r}".format(e)) + raise IndexError(f"invalid 'fields' argument, field not found: {e!r}") else: return out_dtype else: diff --git a/zarr/meta.py b/zarr/meta.py index d9797e4754..4b360270de 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -111,7 +111,7 @@ def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType # check metadata format zarr_format = meta.get("zarr_format", None) if zarr_format != cls.ZARR_FORMAT: - raise MetadataError("unsupported zarr format: %s" % zarr_format) + raise MetadataError(f"unsupported zarr format: {zarr_format}") # extract array metadata fields try: @@ -199,7 +199,7 @@ def decode_group_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType # check metadata format version zarr_format = meta.get("zarr_format", None) if zarr_format != cls.ZARR_FORMAT: - raise MetadataError("unsupported zarr format: %s" % zarr_format) + raise MetadataError(f"unsupported zarr format: {zarr_format}") meta = dict(zarr_format=zarr_format) return meta @@ -346,7 +346,7 @@ def decode_group_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType # # check metadata format version # zarr_format = meta.get("zarr_format", None) # if zarr_format != cls.ZARR_FORMAT: - # raise MetadataError("unsupported zarr format: %s" % zarr_format) + # raise MetadataError(f"unsupported zarr format: {zarr_format}") assert "attributes" in meta # meta = dict(attributes=meta['attributes']) @@ -383,7 +383,7 @@ def decode_hierarchy_metadata(cls, s: Union[MappingType, bytes, str]) -> Mapping # check metadata format # zarr_format = meta.get("zarr_format", None) # if zarr_format != "https://purl.org/zarr/spec/protocol/core/3.0": - # raise MetadataError("unsupported zarr format: %s" % zarr_format) + # raise MetadataError(f"unsupported zarr format: {zarr_format}") if set(meta.keys()) != { "zarr_format", "metadata_encoding", @@ -518,7 +518,7 @@ def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType meta["storage_transformers"] = storage_transformers except Exception as e: - raise MetadataError("error decoding metadata: %s" % e) + raise MetadataError(f"error decoding metadata: {e}") else: return meta diff --git a/zarr/meta_v1.py b/zarr/meta_v1.py index 4ac381f2ca..65bfd3488e 100644 --- a/zarr/meta_v1.py +++ b/zarr/meta_v1.py @@ -10,7 +10,7 @@ def decode_metadata(b): meta = json.loads(s) zarr_format = meta.get("zarr_format", None) if zarr_format != 1: - raise MetadataError("unsupported zarr format: %s" % zarr_format) + raise MetadataError(f"unsupported zarr format: {zarr_format}") try: meta = dict( zarr_format=meta["zarr_format"], @@ -23,7 +23,7 @@ def decode_metadata(b): order=meta["order"], ) except Exception as e: - raise MetadataError("error decoding metadata: %s" % e) + raise MetadataError(f"error decoding metadata: {e}") else: return meta diff --git a/zarr/n5.py b/zarr/n5.py index c50c18f718..fdd3d5babf 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -826,9 +826,9 @@ def decode(self, chunk, out=None) -> bytes: if out is not None: # out should only be used if we read a complete chunk - assert chunk_shape == self.chunk_shape, "Expected chunk of shape {}, found {}".format( - self.chunk_shape, chunk_shape - ) + assert ( + chunk_shape == self.chunk_shape + ), f"Expected chunk of shape {self.chunk_shape}, found {chunk_shape}" if self._compressor: self._compressor.decode(chunk, out) diff --git a/zarr/storage.py b/zarr/storage.py index a26dc636db..73a6dc9630 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2700,14 +2700,12 @@ def listdir(self, path=None): path = normalize_storage_path(path) sep = "_" if path == "" else "/" keys = self.cursor.execute( - """ + f""" SELECT DISTINCT SUBSTR(m, 0, INSTR(m, "/")) AS l FROM ( SELECT LTRIM(SUBSTR(k, LENGTH(?) + 1), "/") || "/" AS m FROM zarr WHERE k LIKE (? || "{sep}%") ) ORDER BY l ASC - """.format( - sep=sep - ), + """, (path, path), ) keys = list(map(operator.itemgetter(0), keys)) @@ -2863,7 +2861,7 @@ def __init__(self, prefix="zarr", dimension_separator=None, **kwargs): self.client = redis.Redis(**kwargs) def _key(self, key): - return "{prefix}:{key}".format(prefix=self._prefix, key=key) + return f"{self._prefix}:{key}" def __getitem__(self, key): return self.client[self._key(key)] @@ -2948,7 +2946,7 @@ def __init__(self, store: StoreLike, metadata_key=".zmetadata"): consolidated_format = meta.get("zarr_consolidated_format", None) if consolidated_format != 1: raise MetadataError( - "unsupported zarr consolidated metadata format: %s" % consolidated_format + f"unsupported zarr consolidated metadata format: {consolidated_format}" ) # decode metadata diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index cf15703497..d9447c0832 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -188,7 +188,7 @@ def test_store_has_text_keys(self): for k in z.chunk_store.keys(): if not isinstance(k, expected_type): # pragma: no cover - pytest.fail("Non-text key: %s" % repr(k)) + pytest.fail(f"Non-text key: {k!r}") z.store.close() @@ -202,7 +202,7 @@ def test_store_has_binary_values(self): try: ensure_ndarray(v) except TypeError: # pragma: no cover - pytest.fail("Non-bytes-like value: %s" % repr(v)) + pytest.fail(f"Non-bytes-like value: {v!r}") z.store.close() @@ -1212,7 +1212,7 @@ def test_dtypes(self): # datetime, timedelta for base_type in "Mm": for resolution in "D", "us", "ns": - dtype = "{}8[{}]".format(base_type, resolution) + dtype = f"{base_type}8[{resolution}]" z = self.create_array(shape=100, dtype=dtype, fill_value=0) assert z.dtype == np.dtype(dtype) a = np.random.randint( @@ -1402,7 +1402,7 @@ def compare_arrays(expected, actual, item_dtype): # convenience API for item_type in "int", " Tuple[np.dtype object_codec = codec_registry[codec_id](*args) except KeyError: # pragma: no cover raise ValueError( - "codec %r for object type %r is not " - "available; please provide an " - "object_codec manually" % (codec_id, key) + f"codec {codec_id!r} for object type {key!r} is not " + f"available; please provide an object_codec manually" ) return dtype, object_codec @@ -241,7 +240,7 @@ def is_total_slice(item, shape: Tuple[int]) -> bool: for it, sh in zip(item, shape) ) else: - raise TypeError("expected slice or tuple of slices, found %r" % item) + raise TypeError(f"expected slice or tuple of slices, found {item!r}") def normalize_resize_args(old_shape, *args): @@ -265,23 +264,23 @@ def normalize_resize_args(old_shape, *args): def human_readable_size(size) -> str: if size < 2**10: - return "%s" % size + return f"{size}" elif size < 2**20: - return "%.1fK" % (size / float(2**10)) + return f"{size / float(2**10):.1f}K" elif size < 2**30: - return "%.1fM" % (size / float(2**20)) + return f"{size / float(2**20):.1f}M" elif size < 2**40: - return "%.1fG" % (size / float(2**30)) + return f"{size / float(2**30):.1f}G" elif size < 2**50: - return "%.1fT" % (size / float(2**40)) + return f"{size / float(2**40):.1f}T" else: - return "%.1fP" % (size / float(2**50)) + return f"{size / float(2**50):.1f}P" def normalize_order(order: str) -> str: order = str(order).upper() if order not in ["C", "F"]: - raise ValueError("order must be either 'C' or 'F', found: %r" % order) + raise ValueError(f"order must be either 'C' or 'F', found: {order!r}") return order @@ -289,7 +288,7 @@ def normalize_dimension_separator(sep: Optional[str]) -> Optional[DIMENSION_SEPA if sep in (".", "/", None): return cast(Optional[DIMENSION_SEPARATOR], sep) else: - raise ValueError("dimension_separator must be either '.' or '/', found: %r" % sep) + raise ValueError(f"dimension_separator must be either '.' or '/', found: {sep!r}") def normalize_fill_value(fill_value, dtype: np.dtype): @@ -307,8 +306,8 @@ def normalize_fill_value(fill_value, dtype: np.dtype): if not isinstance(fill_value, str): raise ValueError( - "fill_value {!r} is not valid for dtype {}; must be a " - "unicode string".format(fill_value, dtype) + f"fill_value {fill_value!r} is not valid for dtype {dtype}; " + f"must be a unicode string" ) else: @@ -322,8 +321,8 @@ def normalize_fill_value(fill_value, dtype: np.dtype): except Exception as e: # re-raise with our own error message to be helpful raise ValueError( - "fill_value {!r} is not valid for dtype {}; nested " - "exception: {}".format(fill_value, dtype, e) + f"fill_value {fill_value!r} is not valid for dtype {dtype}; " + f"nested exception: {e}" ) return fill_value @@ -396,10 +395,10 @@ def info_html_report(items) -> str: report += "" for k, v in items: report += ( - "" - '%s' - '%s' - "" % (k, v) + f"" + f'{k}' + f'{v}' + f"" ) report += "" report += "" @@ -435,7 +434,7 @@ def get_children(self): def get_text(self): name = self.obj.name.split("/")[-1] or "/" if hasattr(self.obj, "shape"): - name += " {} {}".format(self.obj.shape, self.obj.dtype) + name += f" {self.obj.shape} {self.obj.dtype}" return name def get_type(self): @@ -463,7 +462,7 @@ def tree_get_icon(stype: str) -> str: elif stype == "Group": return tree_group_icon else: - raise ValueError("Unknown type: %s" % stype) + raise ValueError(f"Unknown type: {stype}") def tree_widget_sublist(node, root=False, expand=False): @@ -487,10 +486,10 @@ def tree_widget(group, expand, level): import ipytree except ImportError as error: raise ImportError( - "{}: Run `pip install zarr[jupyter]` or `conda install ipytree`" - "to get the required ipytree dependency for displaying the tree " - "widget. If using jupyterlab<3, you also need to run " - "`jupyter labextension install ipytree`".format(error) + f"{error}: Run `pip install zarr[jupyter]` or `conda install ipytree`" + f"to get the required ipytree dependency for displaying the tree " + f"widget. If using jupyterlab<3, you also need to run " + f"`jupyter labextension install ipytree`" ) result = ipytree.Tree() @@ -549,14 +548,10 @@ def _repr_mimebundle_(self, **kwargs): def check_array_shape(param, array, shape): if not hasattr(array, "shape"): - raise TypeError( - "parameter {!r}: expected an array-like object, got {!r}".format(param, type(array)) - ) + raise TypeError(f"parameter {param!r}: expected an array-like object, got {type(array)!r}") if array.shape != shape: raise ValueError( - "parameter {!r}: expected array with shape {!r}, got {!r}".format( - param, shape, array.shape - ) + f"parameter {param!r}: expected array with shape {shape!r}, got {array.shape!r}" ) From f80f697c2612cf41c5bdb158a602c1ae8a737e70 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 20 Feb 2024 18:28:10 +0530 Subject: [PATCH 196/213] chore: update pre-commit hooks (#1672) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.2.1 → v0.2.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.2.1...v0.2.2) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c7d4f32c68..41b65f1d02 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ default_language_version: repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.2.1' + rev: 'v0.2.2' hooks: - id: ruff - repo: https://github.com/psf/black From 54bc90c8682472cc40fba35ec6b313cb1f046c34 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Feb 2024 09:57:06 -0800 Subject: [PATCH 197/213] Bump pymongo from 4.6.1 to 4.6.2 (#1674) Bumps [pymongo](https://github.com/mongodb/mongo-python-driver) from 4.6.1 to 4.6.2. - [Release notes](https://github.com/mongodb/mongo-python-driver/releases) - [Changelog](https://github.com/mongodb/mongo-python-driver/blob/4.6.2/doc/changelog.rst) - [Commits](https://github.com/mongodb/mongo-python-driver/compare/4.6.1...4.6.2) --- updated-dependencies: - dependency-name: pymongo dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index e94b814173..85f6fccffc 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -11,7 +11,7 @@ azure-storage-blob==12.16.0 # pyup: ignore redis==5.0.1 types-redis types-setuptools -pymongo==4.6.1 +pymongo==4.6.2 # optional test requirements coverage pytest-cov==4.1.0 From 70a15bbe595031ad24b82ca5cee9468a8229e775 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Feb 2024 10:09:11 -0800 Subject: [PATCH 198/213] Bump conda-incubator/setup-miniconda from 3.0.1 to 3.0.2 (#1677) Bumps [conda-incubator/setup-miniconda](https://github.com/conda-incubator/setup-miniconda) from 3.0.1 to 3.0.2. - [Release notes](https://github.com/conda-incubator/setup-miniconda/releases) - [Changelog](https://github.com/conda-incubator/setup-miniconda/blob/main/CHANGELOG.md) - [Commits](https://github.com/conda-incubator/setup-miniconda/compare/v3.0.1...v3.0.2) --- updated-dependencies: - dependency-name: conda-incubator/setup-miniconda dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/minimal.yml | 2 +- .github/workflows/python-package.yml | 2 +- .github/workflows/windows-testing.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml index 2cc0213781..d95b2bc540 100644 --- a/.github/workflows/minimal.yml +++ b/.github/workflows/minimal.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v3.0.1 + uses: conda-incubator/setup-miniconda@v3.0.2 with: channels: conda-forge environment-file: environment.yml diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index d74df9ce67..946b7efa7d 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -42,7 +42,7 @@ jobs: with: fetch-depth: 0 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v3.0.1 + uses: conda-incubator/setup-miniconda@v3.0.2 with: channels: conda-forge python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index 0ef7f21758..85e5c3e6b6 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -21,7 +21,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: conda-incubator/setup-miniconda@v3.0.1 + - uses: conda-incubator/setup-miniconda@v3.0.2 with: auto-update-conda: true python-version: ${{ matrix.python-version }} From ec4d2162828c2616a388dda2bdbcf40c8747a36d Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Tue, 27 Feb 2024 15:26:29 +0100 Subject: [PATCH 199/213] Update config.yml with Zulip --- .github/ISSUE_TEMPLATE/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 9cb5ec9a78..907121f858 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -3,8 +3,8 @@ contact_links: - name: ✨ Propose a new major feature url: https://github.com/zarr-developers/zarr-specs about: A new major feature should be discussed in the Zarr specifications repository. - - name: ❓ Discuss something on gitter - url: https://gitter.im/zarr-developers/community + - name: ❓ Discuss something on Zulip + url: https://ossci.zulipchat.com/ about: For questions like "How do I do X with Zarr?", you can move to our Gitter channel. - name: ❓ Discuss something on GitHub Discussions url: https://github.com/zarr-developers/zarr-python/discussions From a0e5559c38bf1a9d7c1a70a81f51f5eece5701c2 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 27 Feb 2024 17:04:50 +0100 Subject: [PATCH 200/213] Type dimension separator (#1620) Co-authored-by: Davis Bennett --- zarr/_storage/absstore.py | 5 ++- zarr/_storage/v3.py | 7 ++-- zarr/_storage/v3_storage_transformers.py | 3 +- zarr/creation.py | 2 +- zarr/storage.py | 45 +++++++++++++++++------- 5 files changed, 44 insertions(+), 18 deletions(-) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index b6b386f468..217b2a29e0 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -1,9 +1,12 @@ """This module contains storage classes related to Azure Blob Storage (ABS)""" +from typing import Optional import warnings + from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path from zarr._storage.store import _get_metadata_suffix, data_root, meta_root, Store, StoreV3 +from zarr.types import DIMENSION_SEPARATOR __doctest_requires__ = { ("ABSStore", "ABSStore.*"): ["azure.storage.blob"], @@ -67,7 +70,7 @@ def __init__( account_name=None, account_key=None, blob_service_kwargs=None, - dimension_separator=None, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, client=None, ): self._dimension_separator = dimension_separator diff --git a/zarr/_storage/v3.py b/zarr/_storage/v3.py index 56bae74361..4987f820cf 100644 --- a/zarr/_storage/v3.py +++ b/zarr/_storage/v3.py @@ -3,13 +3,14 @@ from collections import OrderedDict from collections.abc import MutableMapping from threading import Lock -from typing import Union, Dict, Any +from typing import Union, Dict, Any, Optional from zarr.errors import ( MetadataError, ReadOnlyError, ) from zarr.util import buffer_size, json_loads, normalize_storage_path +from zarr.types import DIMENSION_SEPARATOR from zarr._storage.absstore import ABSStoreV3 # noqa: F401 from zarr._storage.store import ( # noqa: F401 @@ -224,7 +225,9 @@ def get_partial_values(self, key_ranges): class MemoryStoreV3(MemoryStore, StoreV3): - def __init__(self, root=None, cls=dict, dimension_separator=None): + def __init__( + self, root=None, cls=dict, dimension_separator: Optional[DIMENSION_SEPARATOR] = None + ): if root is None: self.root = cls() else: diff --git a/zarr/_storage/v3_storage_transformers.py b/zarr/_storage/v3_storage_transformers.py index 3afc3823a3..37e56f8ecd 100644 --- a/zarr/_storage/v3_storage_transformers.py +++ b/zarr/_storage/v3_storage_transformers.py @@ -8,6 +8,7 @@ from zarr._storage.store import StorageTransformer, StoreV3, _rmdir_from_keys_v3 from zarr.util import normalize_storage_path +from zarr.types import DIMENSION_SEPARATOR MAX_UINT_64 = 2**64 - 1 @@ -118,7 +119,7 @@ def _copy_for_array(self, array, inner_store): return transformer_copy @property - def dimension_separator(self) -> str: + def dimension_separator(self) -> DIMENSION_SEPARATOR: assert ( self._dimension_separator is not None ), "dimension_separator is not initialized, first get a copy via _copy_for_array." diff --git a/zarr/creation.py b/zarr/creation.py index 264715b040..c541531d54 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -470,7 +470,7 @@ def open_array( write_empty_chunks=True, *, zarr_version=None, - dimension_separator=None, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, meta_array=None, **kwargs, ): diff --git a/zarr/storage.py b/zarr/storage.py index 73a6dc9630..f6903d29b2 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -41,7 +41,8 @@ from numcodecs.compat import ensure_bytes, ensure_text, ensure_contiguous_ndarray_like from numcodecs.registry import codec_registry from zarr.context import Context -from zarr.types import PathLike as Path +from zarr.types import PathLike as Path, DIMENSION_SEPARATOR +from zarr.util import NoLock from zarr.errors import ( MetadataError, @@ -327,7 +328,7 @@ def init_array( chunk_store: Optional[StoreLike] = None, filters=None, object_codec=None, - dimension_separator=None, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, storage_transformers=(), ): """Initialize an array store with the given configuration. Note that this is a low-level @@ -481,7 +482,7 @@ def _init_array_metadata( chunk_store: Optional[StoreLike] = None, filters=None, object_codec=None, - dimension_separator=None, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, storage_transformers=(), ): store_version = getattr(store, "_store_version", 2) @@ -1054,7 +1055,9 @@ class DirectoryStore(Store): """ - def __init__(self, path, normalize_keys=False, dimension_separator=None): + def __init__( + self, path, normalize_keys=False, dimension_separator: Optional[DIMENSION_SEPARATOR] = None + ): # guard conditions path = os.path.abspath(path) if os.path.exists(path) and not os.path.isdir(path): @@ -1349,7 +1352,7 @@ def __init__( key_separator=None, mode="w", exceptions=(KeyError, PermissionError, IOError), - dimension_separator=None, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, fs=None, check=False, create=False, @@ -1568,7 +1571,12 @@ class TempStore(DirectoryStore): # noinspection PyShadowingBuiltins def __init__( - self, suffix="", prefix="zarr", dir=None, normalize_keys=False, dimension_separator=None + self, + suffix="", + prefix="zarr", + dir=None, + normalize_keys=False, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, ): path = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir) atexit.register(atexit_rmtree, path) @@ -1652,7 +1660,9 @@ class NestedDirectoryStore(DirectoryStore): """ - def __init__(self, path, normalize_keys=False, dimension_separator="/"): + def __init__( + self, path, normalize_keys=False, dimension_separator: Optional[DIMENSION_SEPARATOR] = "/" + ): super().__init__(path, normalize_keys=normalize_keys) if dimension_separator is None: dimension_separator = "/" @@ -1765,7 +1775,7 @@ def __init__( compression=zipfile.ZIP_STORED, allowZip64=True, mode="a", - dimension_separator=None, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, ): # store properties path = os.path.abspath(path) @@ -2058,7 +2068,7 @@ def __init__( mode=0o666, open=None, write_lock=True, - dimension_separator=None, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, **open_kwargs, ): if open is None: @@ -2073,6 +2083,7 @@ def __init__( self.mode = mode self.open = open self.write_lock = write_lock + self.write_mutex: Union[Lock, NoLock] if write_lock: # This may not be required as some dbm implementations manage their own # locks, but err on the side of caution. @@ -2229,7 +2240,13 @@ class LMDBStore(Store): """ - def __init__(self, path, buffers=True, dimension_separator=None, **kwargs): + def __init__( + self, + path, + buffers=True, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, + **kwargs, + ): import lmdb # set default memory map size to something larger than the lmdb default, which is @@ -2580,7 +2597,7 @@ class SQLiteStore(Store): >>> store.close() # don't forget to call this when you're done """ - def __init__(self, path, dimension_separator=None, **kwargs): + def __init__(self, path, dimension_separator: Optional[DIMENSION_SEPARATOR] = None, **kwargs): import sqlite3 self._dimension_separator = dimension_separator @@ -2776,7 +2793,7 @@ def __init__( self, database="mongodb_zarr", collection="zarr_collection", - dimension_separator=None, + dimension_separator: Optional[DIMENSION_SEPARATOR] = None, **kwargs, ): import pymongo @@ -2851,7 +2868,9 @@ class RedisStore(Store): """ - def __init__(self, prefix="zarr", dimension_separator=None, **kwargs): + def __init__( + self, prefix="zarr", dimension_separator: Optional[DIMENSION_SEPARATOR] = None, **kwargs + ): import redis self._prefix = prefix From 99e03c684729b188457024a53afc45cb1b160027 Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Wed, 28 Feb 2024 18:22:28 +0530 Subject: [PATCH 201/213] Replace Gitter with new Zulip Chat link (#1685) * Replace Gitter with Zulip * Replace Gitter with Zulip in remaining places --- .github/ISSUE_TEMPLATE/config.yml | 2 +- README.md | 6 +++--- docs/index.rst | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 907121f858..9ceaab2ae7 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -5,7 +5,7 @@ contact_links: about: A new major feature should be discussed in the Zarr specifications repository. - name: ❓ Discuss something on Zulip url: https://ossci.zulipchat.com/ - about: For questions like "How do I do X with Zarr?", you can move to our Gitter channel. + about: For questions like "How do I do X with Zarr?", you can move to our Zulip Chat. - name: ❓ Discuss something on GitHub Discussions url: https://github.com/zarr-developers/zarr-python/discussions about: For questions like "How do I do X with Zarr?", you can move to GitHub Discussions. diff --git a/README.md b/README.md index b035ffa597..e379c9719f 100644 --- a/README.md +++ b/README.md @@ -70,10 +70,10 @@ - Gitter + Zulip - - + + diff --git a/docs/index.rst b/docs/index.rst index 06f79b7e7c..a5dbfbc5bf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,7 +25,7 @@ Zarr-Python `Installation `_ | `Source Repository `_ | `Issue Tracker `_ | -`Gitter `_ +`Zulip Chat `_ Zarr is a file storage format for chunked, compressed, N-dimensional arrays based on an open-source specification. From 67d5d82317451c9072a187efd6d638e718cdaced Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 2 Mar 2024 02:03:35 +0530 Subject: [PATCH 202/213] Bump redis from 5.0.1 to 5.0.2 (#1688) Bumps [redis](https://github.com/redis/redis-py) from 5.0.1 to 5.0.2. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v5.0.1...v5.0.2) --- updated-dependencies: - dependency-name: redis dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 85f6fccffc..c3d747a47e 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.1.2 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.16.0 # pyup: ignore -redis==5.0.1 +redis==5.0.2 types-redis types-setuptools pymongo==4.6.2 From 9c2a412d70ed717165966bc47615bdef195d68c5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 Mar 2024 15:48:15 +0100 Subject: [PATCH 203/213] Bump pypa/gh-action-pypi-publish from 1.8.11 to 1.8.12 (#1691) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.11 to 1.8.12. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.11...v1.8.12) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 250c6112c8..6d417042b5 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -64,7 +64,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.8.11 + - uses: pypa/gh-action-pypi-publish@v1.8.12 with: user: __token__ password: ${{ secrets.pypi_password }} From 237f934f5ac7d7a04c6b144f97e54776eda628c6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 6 Mar 2024 01:02:48 +0530 Subject: [PATCH 204/213] Bump pytest-doctestplus from 1.1.0 to 1.2.0 (#1693) Bumps [pytest-doctestplus](https://github.com/scientific-python/pytest-doctestplus) from 1.1.0 to 1.2.0. - [Release notes](https://github.com/scientific-python/pytest-doctestplus/releases) - [Changelog](https://github.com/scientific-python/pytest-doctestplus/blob/main/CHANGES.rst) - [Commits](https://github.com/scientific-python/pytest-doctestplus/compare/v1.1.0...v1.2.0) --- updated-dependencies: - dependency-name: pytest-doctestplus dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index c3d747a47e..0f4493b1d4 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -15,7 +15,7 @@ pymongo==4.6.2 # optional test requirements coverage pytest-cov==4.1.0 -pytest-doctestplus==1.1.0 +pytest-doctestplus==1.2.0 pytest-timeout==2.2.0 h5py==3.10.0 fsspec==2023.12.2 From 240bb824b86a68aa0eb6f03fecbe5439882607ca Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Wed, 6 Mar 2024 19:48:22 +0530 Subject: [PATCH 205/213] Fix RTD build (#1694) --- .readthedocs.yaml | 4 +++- docs/index.rst | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 08cac8d78d..e45cae1b45 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -16,4 +16,6 @@ python: extra_requirements: - docs -formats: all +formats: + - htmlzip + - pdf diff --git a/docs/index.rst b/docs/index.rst index a5dbfbc5bf..cf54e261af 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,7 +19,7 @@ Zarr-Python **Version**: |version| -**Download documentation**: `PDF/Zipped HTML/EPUB `_ +**Download documentation**: `PDF/Zipped HTML `_ **Useful links**: `Installation `_ | From a1fbedb18c1fc70f026c423fafca6d84ad88ce53 Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Thu, 7 Mar 2024 00:53:47 +0530 Subject: [PATCH 206/213] Update release.rst for v2.17.1 (#1673) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update release.rst for v2.17.1 * Change the copyright year from 2023 → 2024. * Update release.rst for v2.17.1 --- LICENSE.txt | 2 +- docs/conf.py | 2 +- docs/release.rst | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index 850a0d8772..a4de1c39d3 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2015-2023 Zarr Developers +Copyright (c) 2015-2024 Zarr Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/conf.py b/docs/conf.py index 318843a9fb..048e77f51d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -72,7 +72,7 @@ # General information about the project. project = "zarr" -copyright = "2023, Zarr Developers" +copyright = "2024, Zarr Developers" author = "Zarr Developers" version = zarr.__version__ diff --git a/docs/release.rst b/docs/release.rst index 8ce4b2e33c..037432ca58 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,9 +18,41 @@ Release notes Unreleased ---------- +.. _release_2.17.1: + +2.17.1 +------ + +Enhancements +~~~~~~~~~~~~ + * Change occurrences of % and format() to f-strings. By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1423`. +* Proper argument for numpy.reshape. + By :user:`Dimitri Papadopoulos Orfanos ` :issue:`1425`. + +* Add typing to dimension separator arguments. + By :user:`David Stansby ` :issue:`1620`. + +Docs +~~~~ + +* ZIP related tweaks. + By :user:`Davis Bennett ` :issue:`1641`. + +Maintenance +~~~~~~~~~~~ + +* Update config.yml with Zulip. + By :user:`Josh Moore `. + +* Replace Gitter with the new Zulip Chat link. + By :user:`Sanket Verma ` :issue:`1685`. + +* Fix RTD build. + By :user:`Sanket Verma ` :issue:`1694`. + .. _release_2.17.0: 2.17.0 From d986f8973eafbf847179d8c3f7e16451d0fcd63d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 8 Mar 2024 17:40:55 -0800 Subject: [PATCH 207/213] Bump pytest-timeout from 2.2.0 to 2.3.1 (#1697) Bumps [pytest-timeout](https://github.com/pytest-dev/pytest-timeout) from 2.2.0 to 2.3.1. - [Commits](https://github.com/pytest-dev/pytest-timeout/compare/2.2.0...2.3.1) --- updated-dependencies: - dependency-name: pytest-timeout dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 0f4493b1d4..b14381dd6e 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -16,7 +16,7 @@ pymongo==4.6.2 coverage pytest-cov==4.1.0 pytest-doctestplus==1.2.0 -pytest-timeout==2.2.0 +pytest-timeout==2.3.1 h5py==3.10.0 fsspec==2023.12.2 s3fs==2023.12.2 From d642da6320793c64dc227cf8062a9936d0fd398e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 Mar 2024 14:26:24 -0700 Subject: [PATCH 208/213] Bump conda-incubator/setup-miniconda from 3.0.2 to 3.0.3 (#1690) Bumps [conda-incubator/setup-miniconda](https://github.com/conda-incubator/setup-miniconda) from 3.0.2 to 3.0.3. - [Release notes](https://github.com/conda-incubator/setup-miniconda/releases) - [Changelog](https://github.com/conda-incubator/setup-miniconda/blob/main/CHANGELOG.md) - [Commits](https://github.com/conda-incubator/setup-miniconda/compare/v3.0.2...v3.0.3) --- updated-dependencies: - dependency-name: conda-incubator/setup-miniconda dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Sanket Verma --- .github/workflows/minimal.yml | 2 +- .github/workflows/python-package.yml | 2 +- .github/workflows/windows-testing.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml index d95b2bc540..dba6918514 100644 --- a/.github/workflows/minimal.yml +++ b/.github/workflows/minimal.yml @@ -15,7 +15,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v3.0.2 + uses: conda-incubator/setup-miniconda@v3.0.3 with: channels: conda-forge environment-file: environment.yml diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 946b7efa7d..fd2603ff95 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -42,7 +42,7 @@ jobs: with: fetch-depth: 0 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v3.0.2 + uses: conda-incubator/setup-miniconda@v3.0.3 with: channels: conda-forge python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index 85e5c3e6b6..d580ef3f0e 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -21,7 +21,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: conda-incubator/setup-miniconda@v3.0.2 + - uses: conda-incubator/setup-miniconda@v3.0.3 with: auto-update-conda: true python-version: ${{ matrix.python-version }} From 029cff71b86871cde76c7909cfecd28764953377 Mon Sep 17 00:00:00 2001 From: "Daniel Jahn (dahn)" Date: Sun, 10 Mar 2024 22:37:36 +0100 Subject: [PATCH 209/213] docs(tutorial.rst): fix link to GCSMap (#1689) --- docs/tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 1f7accab3a..214dd4f63f 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -868,7 +868,7 @@ implementations of the ``MutableMapping`` interface for Amazon S3 (`S3Map Distributed File System (`HDFSMap `_) and Google Cloud Storage (`GCSMap -`_), which +`_), which can be used with Zarr. Here is an example using S3Map to read an array created previously:: From 9fc4981ddfb0e032fcc76fa6585b5a66dc5d2f06 Mon Sep 17 00:00:00 2001 From: Sanket Verma Date: Mon, 11 Mar 2024 03:19:28 +0530 Subject: [PATCH 210/213] Update installation.rst stating version support policy (#1665) * Update installation.rst stating version support policy * Update docs/installation.rst Co-authored-by: Joe Hamman * Update docs/installation.rst --------- Co-authored-by: Joe Hamman --- docs/installation.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/installation.rst b/docs/installation.rst index 8553d451cb..35865c764d 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -6,6 +6,11 @@ Zarr depends on NumPy. It is generally best to `install NumPy appropriate for your operating system and Python distribution. Other dependencies should be installed automatically if using one of the installation methods below. +Note: Zarr has endorsed `Scientific-Python SPEC 0 `_ and now follows the version support window as outlined below: + +- Python: 36 months after initial release +- Core package dependencies (e.g. NumPy): 24 months after initial release + Install Zarr from PyPI:: $ pip install zarr From f58065b221452acd70235902ad59d920da6fb02f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Mar 2024 08:47:15 -0700 Subject: [PATCH 211/213] Bump pypa/gh-action-pypi-publish from 1.8.12 to 1.8.14 (#1700) Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.8.12 to 1.8.14. - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.8.12...v1.8.14) --- updated-dependencies: - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/releases.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml index 6d417042b5..fe168d2862 100644 --- a/.github/workflows/releases.yml +++ b/.github/workflows/releases.yml @@ -64,7 +64,7 @@ jobs: with: name: releases path: dist - - uses: pypa/gh-action-pypi-publish@v1.8.12 + - uses: pypa/gh-action-pypi-publish@v1.8.14 with: user: __token__ password: ${{ secrets.pypi_password }} From bbac25472e0a781dc5c0256d26a481eacb27390b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 16:58:21 +0100 Subject: [PATCH 212/213] Bump pytest-doctestplus from 1.2.0 to 1.2.1 (#1699) Bumps [pytest-doctestplus](https://github.com/scientific-python/pytest-doctestplus) from 1.2.0 to 1.2.1. - [Release notes](https://github.com/scientific-python/pytest-doctestplus/releases) - [Changelog](https://github.com/scientific-python/pytest-doctestplus/blob/main/CHANGES.rst) - [Commits](https://github.com/scientific-python/pytest-doctestplus/compare/v1.2.0...v1.2.1) --- updated-dependencies: - dependency-name: pytest-doctestplus dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Sanket Verma --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index b14381dd6e..62b257ea70 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -15,7 +15,7 @@ pymongo==4.6.2 # optional test requirements coverage pytest-cov==4.1.0 -pytest-doctestplus==1.2.0 +pytest-doctestplus==1.2.1 pytest-timeout==2.3.1 h5py==3.10.0 fsspec==2023.12.2 From 6fe553df925c224fcc0a12ecdd074997ce9e56f7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 17:53:29 +0100 Subject: [PATCH 213/213] Bump redis from 5.0.2 to 5.0.3 (#1698) Bumps [redis](https://github.com/redis/redis-py) from 5.0.2 to 5.0.3. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v5.0.2...v5.0.3) --- updated-dependencies: - dependency-name: redis dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Sanket Verma --- requirements_dev_optional.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 62b257ea70..7ff673cebd 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -8,7 +8,7 @@ ipywidgets==8.1.2 # don't let pyup change pinning for azure-storage-blob, need to pin to older # version to get compatibility with azure storage emulator on appveyor (FIXME) azure-storage-blob==12.16.0 # pyup: ignore -redis==5.0.2 +redis==5.0.3 types-redis types-setuptools pymongo==4.6.2