update zarrita, relaxes numpy requirement (#932)

* update zarrita, relaxes numpy requirement * fix endian_codec * fix codecs * update zarrita * fix zarr3 testdata * fixes in zarrita * new default shard_shapes for Dataset.from_imaegs * fixes chunk_shape
scalableminds · Aug 8, 2023 · 3960f26 · 3960f26
1 parent b2d156c
commit 3960f26
Show file tree

Hide file tree

Showing 7 changed files with 133 additions and 25 deletions.
diff --git a/webknossos/poetry.lock b/webknossos/poetry.lock
diff --git a/webknossos/pyproject.toml b/webknossos/pyproject.toml
@@ -63,7 +63,7 @@ JPype1 = { version = "^1.3.0", optional = true }
 pims = { version = "^0.6.0", optional = true }
 tifffile = { version = ">=2021.11.2", optional = true }
 pylibCZIrw = { version = "3.4.0", source = "scm", optional = true }
-zarrita = "0.1.0a12"
+zarrita = "0.1.0a18"
 
 [tool.poetry.extras]
 pims = ["pims"]

diff --git a/webknossos/testdata/simple_zarr3_dataset/color/1/zarr.json b/webknossos/testdata/simple_zarr3_dataset/color/1/zarr.json
@@ -1 +1,32 @@
-{"shape": [3, 24, 24, 24], "data_type": "uint8", "chunk_grid": {"configuration": {"chunk_shape": [3, 32, 32, 32]}, "name": "regular"}, "chunk_key_encoding": {"configuration": {"separator": "/"}, "name": "default"}, "fill_value": 0, "attributes": {}, "codecs": [{"configuration": {"chunk_shape": [3, 16, 16, 16], "codecs": []}, "name": "sharding_indexed"}], "dimension_names": null, "zarr_format": 3, "node_type": "array"}
+{
+  "shape": [3, 24, 24, 24],
+  "data_type": "uint8",
+  "chunk_grid": {
+    "configuration": { "chunk_shape": [3, 32, 32, 32] },
+    "name": "regular"
+  },
+  "chunk_key_encoding": {
+    "configuration": { "separator": "/" },
+    "name": "default"
+  },
+  "fill_value": 0,
+  "attributes": {},
+  "codecs": [
+    {
+      "configuration": {
+        "chunk_shape": [3, 16, 16, 16],
+        "codecs": [
+          { "name": "endian", "configuration": { "endian": "little" } }
+        ],
+        "index_codecs": [
+          { "name": "endian", "configuration": { "endian": "little" } },
+          { "name": "crc32c" }
+        ]
+      },
+      "name": "sharding_indexed"
+    }
+  ],
+  "dimension_names": null,
+  "zarr_format": 3,
+  "node_type": "array"
+}
diff --git a/webknossos/tests/dataset/test_dataset.py b/webknossos/tests/dataset/test_dataset.py
@@ -859,6 +859,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None:
     ds_path = prepare_dataset_path(data_format, output_path)
     ds = Dataset(ds_path, voxel_size=(2, 2, 1))
     chunk_shape, chunks_per_shard = default_chunk_config(data_format, 8)
+    shard_shape = chunk_shape * chunks_per_shard
 
     layer = ds.add_layer("color", COLOR_CATEGORY, data_format=data_format)
     mag = layer.add_mag(
@@ -874,7 +875,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None:
     with get_executor_for_args(None) as executor:
         mag.for_each_chunk(
             chunk_job,
-            chunk_shape=(64, 64, 64),
+            chunk_shape=shard_shape,
             executor=executor,
         )
     assert np.array_equal(original_data + 50, mag.get_view().read()[0])
@@ -885,7 +886,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None:
     # Test without executor
     mag.for_each_chunk(
         chunk_job,
-        chunk_shape=(64, 64, 64),
+        chunk_shape=shard_shape,
     )
     assert np.array_equal(original_data + 50, mag.get_view().read()[0])
 

diff --git a/webknossos/webknossos/dataset/_array.py b/webknossos/webknossos/dataset/_array.py
@@ -485,16 +485,17 @@ def open(cls, path: Path) -> "ZarritaArray":
             Array.open_auto(store=path)  # check that everything exists
             return cls(path)
         except Exception as exc:
-            raise ArrayException(
-                f"Could not open Zarr array at {path}. `.zarray` not found."
-            ) from exc
+            raise ArrayException(f"Could not open Zarr array at {path}.") from exc
 
     @staticmethod
     def _has_compression_codecs(codecs: List["zarrita.codecs.Codec"]) -> bool:
-        from zarrita.codecs import BloscCodec, GzipCodec
+        from zarrita.codecs import BloscCodec, GzipCodec, ZstdCodec
 
         return any(
-            isinstance(c, BloscCodec) or isinstance(c, GzipCodec) for c in codecs
+            isinstance(c, BloscCodec)
+            or isinstance(c, GzipCodec)
+            or isinstance(c, ZstdCodec)
+            for c in codecs
         )
 
     @property
@@ -504,26 +505,30 @@ def info(self) -> ArrayInfo:
 
         zarray = self._zarray
         if isinstance(zarray, Array):
-            if len(zarray.codecs) == 1 and isinstance(zarray.codecs[0], ShardingCodec):
-                sharding_codec = zarray.codecs[0]
+            if len(zarray.codec_pipeline.codecs) == 1 and isinstance(
+                zarray.codec_pipeline.codecs[0], ShardingCodec
+            ):
+                sharding_codec = zarray.codec_pipeline.codecs[0]
+                shard_shape = zarray.metadata.chunk_grid.configuration.chunk_shape
+                chunk_shape = sharding_codec.configuration.chunk_shape
                 return ArrayInfo(
                     data_format=DataFormat.Zarr3,
                     num_channels=zarray.metadata.shape[0],
                     voxel_type=zarray.metadata.dtype,
                     compression_mode=self._has_compression_codecs(
-                        sharding_codec.codecs
+                        sharding_codec.codec_pipeline.codecs
                     ),
-                    chunk_shape=Vec3Int(sharding_codec.configuration.chunk_shape[1:4]),
-                    chunks_per_shard=Vec3Int(
-                        zarray.metadata.chunk_grid.configuration.chunk_shape[1:4]
-                    )
-                    // Vec3Int(sharding_codec.configuration.chunk_shape[1:4]),
+                    chunk_shape=Vec3Int(chunk_shape[1:4]),
+                    chunks_per_shard=Vec3Int(shard_shape[1:4])
+                    // Vec3Int(chunk_shape[1:4]),
                 )
             return ArrayInfo(
                 data_format=DataFormat.Zarr3,
                 num_channels=zarray.metadata.shape[0],
                 voxel_type=zarray.metadata.dtype,
-                compression_mode=self._has_compression_codecs(zarray.codecs),
+                compression_mode=self._has_compression_codecs(
+                    zarray.codec_pipeline.codecs
+                ),
                 chunk_shape=Vec3Int(
                     zarray.metadata.chunk_grid.configuration.chunk_shape[1:4]
                 )
@@ -560,10 +565,16 @@ def create(cls, path: Path, array_info: ArrayInfo) -> "ZarritaArray":
                         + array_info.chunk_shape.to_tuple(),
                         codecs=[
                             zarrita.codecs.transpose_codec("F"),
-                            zarrita.codecs.blosc_codec(),
+                            zarrita.codecs.endian_codec(),
+                            zarrita.codecs.blosc_codec(
+                                typesize=array_info.voxel_type.itemsize
+                            ),
                         ]
                         if array_info.compression_mode
-                        else [zarrita.codecs.transpose_codec("F")],
+                        else [
+                            zarrita.codecs.transpose_codec("F"),
+                            zarrita.codecs.endian_codec(),
+                        ],
                     )
                 ],
             )

diff --git a/webknossos/webknossos/dataset/dataset.py b/webknossos/webknossos/dataset/dataset.py
@@ -37,6 +37,7 @@
 
 from webknossos.dataset.defaults import (
     DEFAULT_CHUNK_SHAPE,
+    DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES,
     DEFAULT_CHUNKS_PER_SHARD_ZARR,
 )
 
@@ -1167,6 +1168,10 @@ def add_layer_from_images(
                     chunk_shape = DEFAULT_CHUNK_SHAPE.with_z(1)
                 if chunks_per_shard is None:
                     chunks_per_shard = DEFAULT_CHUNKS_PER_SHARD_ZARR.with_z(1)
+
+            if chunks_per_shard is None and layer.data_format == DataFormat.Zarr3:
+                chunks_per_shard = DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES
+
             mag_view = layer.add_mag(
                 mag=mag,
                 chunk_shape=chunk_shape,

diff --git a/webknossos/webknossos/dataset/defaults.py b/webknossos/webknossos/dataset/defaults.py
@@ -7,3 +7,4 @@
 DEFAULT_CHUNK_SHAPE = Vec3Int.full(32)
 DEFAULT_CHUNKS_PER_SHARD = Vec3Int.full(32)
 DEFAULT_CHUNKS_PER_SHARD_ZARR = Vec3Int.full(1)
+DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES = Vec3Int(128, 128, 1)