From 3960f268c8b5b19145fb330fc84497e52cec77f4 Mon Sep 17 00:00:00 2001 From: Norman Rzepka Date: Tue, 8 Aug 2023 14:35:02 +0200 Subject: [PATCH] update zarrita, relaxes numpy requirement (#932) * update zarrita, relaxes numpy requirement * fix endian_codec * fix codecs * update zarrita * fix zarr3 testdata * fixes in zarrita * new default shard_shapes for Dataset.from_imaegs * fixes chunk_shape --- webknossos/poetry.lock | 69 +++++++++++++++++-- webknossos/pyproject.toml | 2 +- .../simple_zarr3_dataset/color/1/zarr.json | 33 ++++++++- webknossos/tests/dataset/test_dataset.py | 5 +- webknossos/webknossos/dataset/_array.py | 43 +++++++----- webknossos/webknossos/dataset/dataset.py | 5 ++ webknossos/webknossos/dataset/defaults.py | 1 + 7 files changed, 133 insertions(+), 25 deletions(-) diff --git a/webknossos/poetry.lock b/webknossos/poetry.lock index 8d9b8f6cb..288e6d19f 100644 --- a/webknossos/poetry.lock +++ b/webknossos/poetry.lock @@ -3322,13 +3322,13 @@ jupyter = ["ipytree (>=0.2.2)", "ipywidgets (>=8.0.0)", "notebook"] [[package]] name = "zarrita" -version = "0.1.0a12" +version = "0.1.0a18" description = "" optional = false python-versions = ">=3.8,<4.0" files = [ - {file = "zarrita-0.1.0a12-py3-none-any.whl", hash = "sha256:e72e3c8e7a5d3cda169274ec8d69805580d2a82b7018c1c60d5a4cfc9e41f452"}, - {file = "zarrita-0.1.0a12.tar.gz", hash = "sha256:89f6499c93c7f5c617e54573522a622b56e7b7f81ec7fbe216396392d8631e15"}, + {file = "zarrita-0.1.0a18-py3-none-any.whl", hash = "sha256:0a12a7f64680d0fbb05e79c5f94a0f80a2f48fa714ebcbb275b977ee30afccc9"}, + {file = "zarrita-0.1.0a18.tar.gz", hash = "sha256:083e0e197fe51bee4477867452d2781f376249ca8423b1f9c01edfc49b46f101"}, ] [package.dependencies] @@ -3337,7 +3337,8 @@ cattrs = ">=22.2.0" crc32c = ">=2.3" fsspec = ">=2022.0.0" numcodecs = ">=0.11.0,<0.12.0" -numpy = ">=1.24.2,<2.0.0" +numpy = ">=1.22,<2.0" +zstandard = ">=0.21.0,<0.22.0" [[package]] name = "zipp" @@ -3354,6 +3355,64 @@ files = [ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] +[[package]] +name = "zstandard" +version = "0.21.0" +description = "Zstandard bindings for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "zstandard-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:649a67643257e3b2cff1c0a73130609679a5673bf389564bc6d4b164d822a7ce"}, + {file = "zstandard-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:144a4fe4be2e747bf9c646deab212666e39048faa4372abb6a250dab0f347a29"}, + {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b72060402524ab91e075881f6b6b3f37ab715663313030d0ce983da44960a86f"}, + {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8257752b97134477fb4e413529edaa04fc0457361d304c1319573de00ba796b1"}, + {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c053b7c4cbf71cc26808ed67ae955836232f7638444d709bfc302d3e499364fa"}, + {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2769730c13638e08b7a983b32cb67775650024632cd0476bf1ba0e6360f5ac7d"}, + {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7d3bc4de588b987f3934ca79140e226785d7b5e47e31756761e48644a45a6766"}, + {file = "zstandard-0.21.0-cp310-cp310-win32.whl", hash = "sha256:67829fdb82e7393ca68e543894cd0581a79243cc4ec74a836c305c70a5943f07"}, + {file = "zstandard-0.21.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6048a287f8d2d6e8bc67f6b42a766c61923641dd4022b7fd3f7439e17ba5a4d"}, + {file = "zstandard-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7f2afab2c727b6a3d466faee6974a7dad0d9991241c498e7317e5ccf53dbc766"}, + {file = "zstandard-0.21.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff0852da2abe86326b20abae912d0367878dd0854b8931897d44cfeb18985472"}, + {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d12fa383e315b62630bd407477d750ec96a0f438447d0e6e496ab67b8b451d39"}, + {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1b9703fe2e6b6811886c44052647df7c37478af1b4a1a9078585806f42e5b15"}, + {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df28aa5c241f59a7ab524f8ad8bb75d9a23f7ed9d501b0fed6d40ec3064784e8"}, + {file = "zstandard-0.21.0-cp311-cp311-win32.whl", hash = "sha256:0aad6090ac164a9d237d096c8af241b8dcd015524ac6dbec1330092dba151657"}, + {file = "zstandard-0.21.0-cp311-cp311-win_amd64.whl", hash = "sha256:48b6233b5c4cacb7afb0ee6b4f91820afbb6c0e3ae0fa10abbc20000acdf4f11"}, + {file = "zstandard-0.21.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e7d560ce14fd209db6adacce8908244503a009c6c39eee0c10f138996cd66d3e"}, + {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e6e131a4df2eb6f64961cea6f979cdff22d6e0d5516feb0d09492c8fd36f3bc"}, + {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1e0c62a67ff425927898cf43da2cf6b852289ebcc2054514ea9bf121bec10a5"}, + {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1545fb9cb93e043351d0cb2ee73fa0ab32e61298968667bb924aac166278c3fc"}, + {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe6c821eb6870f81d73bf10e5deed80edcac1e63fbc40610e61f340723fd5f7c"}, + {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ddb086ea3b915e50f6604be93f4f64f168d3fc3cef3585bb9a375d5834392d4f"}, + {file = "zstandard-0.21.0-cp37-cp37m-win32.whl", hash = "sha256:57ac078ad7333c9db7a74804684099c4c77f98971c151cee18d17a12649bc25c"}, + {file = "zstandard-0.21.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1243b01fb7926a5a0417120c57d4c28b25a0200284af0525fddba812d575f605"}, + {file = "zstandard-0.21.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ea68b1ba4f9678ac3d3e370d96442a6332d431e5050223626bdce748692226ea"}, + {file = "zstandard-0.21.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8070c1cdb4587a8aa038638acda3bd97c43c59e1e31705f2766d5576b329e97c"}, + {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4af612c96599b17e4930fe58bffd6514e6c25509d120f4eae6031b7595912f85"}, + {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff891e37b167bc477f35562cda1248acc115dbafbea4f3af54ec70821090965"}, + {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a9fec02ce2b38e8b2e86079ff0b912445495e8ab0b137f9c0505f88ad0d61296"}, + {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bdbe350691dec3078b187b8304e6a9c4d9db3eb2d50ab5b1d748533e746d099"}, + {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b69cccd06a4a0a1d9fb3ec9a97600055cf03030ed7048d4bcb88c574f7895773"}, + {file = "zstandard-0.21.0-cp38-cp38-win32.whl", hash = "sha256:9980489f066a391c5572bc7dc471e903fb134e0b0001ea9b1d3eff85af0a6f1b"}, + {file = "zstandard-0.21.0-cp38-cp38-win_amd64.whl", hash = "sha256:0e1e94a9d9e35dc04bf90055e914077c80b1e0c15454cc5419e82529d3e70728"}, + {file = "zstandard-0.21.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d2d61675b2a73edcef5e327e38eb62bdfc89009960f0e3991eae5cc3d54718de"}, + {file = "zstandard-0.21.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:25fbfef672ad798afab12e8fd204d122fca3bc8e2dcb0a2ba73bf0a0ac0f5f07"}, + {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62957069a7c2626ae80023998757e27bd28d933b165c487ab6f83ad3337f773d"}, + {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14e10ed461e4807471075d4b7a2af51f5234c8f1e2a0c1d37d5ca49aaaad49e8"}, + {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9cff89a036c639a6a9299bf19e16bfb9ac7def9a7634c52c257166db09d950e7"}, + {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:52b2b5e3e7670bd25835e0e0730a236f2b0df87672d99d3bf4bf87248aa659fb"}, + {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b1367da0dde8ae5040ef0413fb57b5baeac39d8931c70536d5f013b11d3fc3a5"}, + {file = "zstandard-0.21.0-cp39-cp39-win32.whl", hash = "sha256:db62cbe7a965e68ad2217a056107cc43d41764c66c895be05cf9c8b19578ce9c"}, + {file = "zstandard-0.21.0-cp39-cp39-win_amd64.whl", hash = "sha256:a8d200617d5c876221304b0e3fe43307adde291b4a897e7b0617a61611dfff6a"}, + {file = "zstandard-0.21.0.tar.gz", hash = "sha256:f08e3a10d01a247877e4cb61a82a319ea746c356a3786558bed2481e6c405546"}, +] + +[package.dependencies] +cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\""} + +[package.extras] +cffi = ["cffi (>=1.11)"] + [extras] all = ["JPype1", "imagecodecs", "pims", "pylibCZIrw", "tifffile", "zarrita"] bioformats = ["JPype1", "pims"] @@ -3365,4 +3424,4 @@ tifffile = ["pims", "tifffile"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.12" -content-hash = "74647410aeb5f42f69d7861f7d63a66e1ba7d9eb34d1301f92ffd9aae56fbbad" +content-hash = "a90645bdcf6b31f32a430e1df0692a97b1290aa976b272b6ab9cc9c282c2126e" diff --git a/webknossos/pyproject.toml b/webknossos/pyproject.toml index 11f15794f..f3b6644ef 100644 --- a/webknossos/pyproject.toml +++ b/webknossos/pyproject.toml @@ -63,7 +63,7 @@ JPype1 = { version = "^1.3.0", optional = true } pims = { version = "^0.6.0", optional = true } tifffile = { version = ">=2021.11.2", optional = true } pylibCZIrw = { version = "3.4.0", source = "scm", optional = true } -zarrita = "0.1.0a12" +zarrita = "0.1.0a18" [tool.poetry.extras] pims = ["pims"] diff --git a/webknossos/testdata/simple_zarr3_dataset/color/1/zarr.json b/webknossos/testdata/simple_zarr3_dataset/color/1/zarr.json index 52c43d299..a05848719 100644 --- a/webknossos/testdata/simple_zarr3_dataset/color/1/zarr.json +++ b/webknossos/testdata/simple_zarr3_dataset/color/1/zarr.json @@ -1 +1,32 @@ -{"shape": [3, 24, 24, 24], "data_type": "uint8", "chunk_grid": {"configuration": {"chunk_shape": [3, 32, 32, 32]}, "name": "regular"}, "chunk_key_encoding": {"configuration": {"separator": "/"}, "name": "default"}, "fill_value": 0, "attributes": {}, "codecs": [{"configuration": {"chunk_shape": [3, 16, 16, 16], "codecs": []}, "name": "sharding_indexed"}], "dimension_names": null, "zarr_format": 3, "node_type": "array"} \ No newline at end of file +{ + "shape": [3, 24, 24, 24], + "data_type": "uint8", + "chunk_grid": { + "configuration": { "chunk_shape": [3, 32, 32, 32] }, + "name": "regular" + }, + "chunk_key_encoding": { + "configuration": { "separator": "/" }, + "name": "default" + }, + "fill_value": 0, + "attributes": {}, + "codecs": [ + { + "configuration": { + "chunk_shape": [3, 16, 16, 16], + "codecs": [ + { "name": "endian", "configuration": { "endian": "little" } } + ], + "index_codecs": [ + { "name": "endian", "configuration": { "endian": "little" } }, + { "name": "crc32c" } + ] + }, + "name": "sharding_indexed" + } + ], + "dimension_names": null, + "zarr_format": 3, + "node_type": "array" +} diff --git a/webknossos/tests/dataset/test_dataset.py b/webknossos/tests/dataset/test_dataset.py index e4f805181..e4c586ad9 100644 --- a/webknossos/tests/dataset/test_dataset.py +++ b/webknossos/tests/dataset/test_dataset.py @@ -859,6 +859,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None: ds_path = prepare_dataset_path(data_format, output_path) ds = Dataset(ds_path, voxel_size=(2, 2, 1)) chunk_shape, chunks_per_shard = default_chunk_config(data_format, 8) + shard_shape = chunk_shape * chunks_per_shard layer = ds.add_layer("color", COLOR_CATEGORY, data_format=data_format) mag = layer.add_mag( @@ -874,7 +875,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None: with get_executor_for_args(None) as executor: mag.for_each_chunk( chunk_job, - chunk_shape=(64, 64, 64), + chunk_shape=shard_shape, executor=executor, ) assert np.array_equal(original_data + 50, mag.get_view().read()[0]) @@ -885,7 +886,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None: # Test without executor mag.for_each_chunk( chunk_job, - chunk_shape=(64, 64, 64), + chunk_shape=shard_shape, ) assert np.array_equal(original_data + 50, mag.get_view().read()[0]) diff --git a/webknossos/webknossos/dataset/_array.py b/webknossos/webknossos/dataset/_array.py index 72e215703..3d81c9878 100644 --- a/webknossos/webknossos/dataset/_array.py +++ b/webknossos/webknossos/dataset/_array.py @@ -485,16 +485,17 @@ def open(cls, path: Path) -> "ZarritaArray": Array.open_auto(store=path) # check that everything exists return cls(path) except Exception as exc: - raise ArrayException( - f"Could not open Zarr array at {path}. `.zarray` not found." - ) from exc + raise ArrayException(f"Could not open Zarr array at {path}.") from exc @staticmethod def _has_compression_codecs(codecs: List["zarrita.codecs.Codec"]) -> bool: - from zarrita.codecs import BloscCodec, GzipCodec + from zarrita.codecs import BloscCodec, GzipCodec, ZstdCodec return any( - isinstance(c, BloscCodec) or isinstance(c, GzipCodec) for c in codecs + isinstance(c, BloscCodec) + or isinstance(c, GzipCodec) + or isinstance(c, ZstdCodec) + for c in codecs ) @property @@ -504,26 +505,30 @@ def info(self) -> ArrayInfo: zarray = self._zarray if isinstance(zarray, Array): - if len(zarray.codecs) == 1 and isinstance(zarray.codecs[0], ShardingCodec): - sharding_codec = zarray.codecs[0] + if len(zarray.codec_pipeline.codecs) == 1 and isinstance( + zarray.codec_pipeline.codecs[0], ShardingCodec + ): + sharding_codec = zarray.codec_pipeline.codecs[0] + shard_shape = zarray.metadata.chunk_grid.configuration.chunk_shape + chunk_shape = sharding_codec.configuration.chunk_shape return ArrayInfo( data_format=DataFormat.Zarr3, num_channels=zarray.metadata.shape[0], voxel_type=zarray.metadata.dtype, compression_mode=self._has_compression_codecs( - sharding_codec.codecs + sharding_codec.codec_pipeline.codecs ), - chunk_shape=Vec3Int(sharding_codec.configuration.chunk_shape[1:4]), - chunks_per_shard=Vec3Int( - zarray.metadata.chunk_grid.configuration.chunk_shape[1:4] - ) - // Vec3Int(sharding_codec.configuration.chunk_shape[1:4]), + chunk_shape=Vec3Int(chunk_shape[1:4]), + chunks_per_shard=Vec3Int(shard_shape[1:4]) + // Vec3Int(chunk_shape[1:4]), ) return ArrayInfo( data_format=DataFormat.Zarr3, num_channels=zarray.metadata.shape[0], voxel_type=zarray.metadata.dtype, - compression_mode=self._has_compression_codecs(zarray.codecs), + compression_mode=self._has_compression_codecs( + zarray.codec_pipeline.codecs + ), chunk_shape=Vec3Int( zarray.metadata.chunk_grid.configuration.chunk_shape[1:4] ) @@ -560,10 +565,16 @@ def create(cls, path: Path, array_info: ArrayInfo) -> "ZarritaArray": + array_info.chunk_shape.to_tuple(), codecs=[ zarrita.codecs.transpose_codec("F"), - zarrita.codecs.blosc_codec(), + zarrita.codecs.endian_codec(), + zarrita.codecs.blosc_codec( + typesize=array_info.voxel_type.itemsize + ), ] if array_info.compression_mode - else [zarrita.codecs.transpose_codec("F")], + else [ + zarrita.codecs.transpose_codec("F"), + zarrita.codecs.endian_codec(), + ], ) ], ) diff --git a/webknossos/webknossos/dataset/dataset.py b/webknossos/webknossos/dataset/dataset.py index 5f301d3ba..e45f53bd8 100644 --- a/webknossos/webknossos/dataset/dataset.py +++ b/webknossos/webknossos/dataset/dataset.py @@ -37,6 +37,7 @@ from webknossos.dataset.defaults import ( DEFAULT_CHUNK_SHAPE, + DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES, DEFAULT_CHUNKS_PER_SHARD_ZARR, ) @@ -1167,6 +1168,10 @@ def add_layer_from_images( chunk_shape = DEFAULT_CHUNK_SHAPE.with_z(1) if chunks_per_shard is None: chunks_per_shard = DEFAULT_CHUNKS_PER_SHARD_ZARR.with_z(1) + + if chunks_per_shard is None and layer.data_format == DataFormat.Zarr3: + chunks_per_shard = DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES + mag_view = layer.add_mag( mag=mag, chunk_shape=chunk_shape, diff --git a/webknossos/webknossos/dataset/defaults.py b/webknossos/webknossos/dataset/defaults.py index fa1dcbb25..86413038f 100644 --- a/webknossos/webknossos/dataset/defaults.py +++ b/webknossos/webknossos/dataset/defaults.py @@ -7,3 +7,4 @@ DEFAULT_CHUNK_SHAPE = Vec3Int.full(32) DEFAULT_CHUNKS_PER_SHARD = Vec3Int.full(32) DEFAULT_CHUNKS_PER_SHARD_ZARR = Vec3Int.full(1) +DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES = Vec3Int(128, 128, 1)