Skip to content

Commit

Permalink
update zarrita, relaxes numpy requirement (#932)
Browse files Browse the repository at this point in the history
* update zarrita, relaxes numpy requirement

* fix endian_codec

* fix codecs

* update zarrita

* fix zarr3 testdata

* fixes in zarrita

* new default shard_shapes for Dataset.from_imaegs

* fixes chunk_shape
  • Loading branch information
normanrz authored Aug 8, 2023
1 parent b2d156c commit 3960f26
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 25 deletions.
69 changes: 64 additions & 5 deletions webknossos/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion webknossos/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ JPype1 = { version = "^1.3.0", optional = true }
pims = { version = "^0.6.0", optional = true }
tifffile = { version = ">=2021.11.2", optional = true }
pylibCZIrw = { version = "3.4.0", source = "scm", optional = true }
zarrita = "0.1.0a12"
zarrita = "0.1.0a18"

[tool.poetry.extras]
pims = ["pims"]
Expand Down
33 changes: 32 additions & 1 deletion webknossos/testdata/simple_zarr3_dataset/color/1/zarr.json
Original file line number Diff line number Diff line change
@@ -1 +1,32 @@
{"shape": [3, 24, 24, 24], "data_type": "uint8", "chunk_grid": {"configuration": {"chunk_shape": [3, 32, 32, 32]}, "name": "regular"}, "chunk_key_encoding": {"configuration": {"separator": "/"}, "name": "default"}, "fill_value": 0, "attributes": {}, "codecs": [{"configuration": {"chunk_shape": [3, 16, 16, 16], "codecs": []}, "name": "sharding_indexed"}], "dimension_names": null, "zarr_format": 3, "node_type": "array"}
{
"shape": [3, 24, 24, 24],
"data_type": "uint8",
"chunk_grid": {
"configuration": { "chunk_shape": [3, 32, 32, 32] },
"name": "regular"
},
"chunk_key_encoding": {
"configuration": { "separator": "/" },
"name": "default"
},
"fill_value": 0,
"attributes": {},
"codecs": [
{
"configuration": {
"chunk_shape": [3, 16, 16, 16],
"codecs": [
{ "name": "endian", "configuration": { "endian": "little" } }
],
"index_codecs": [
{ "name": "endian", "configuration": { "endian": "little" } },
{ "name": "crc32c" }
]
},
"name": "sharding_indexed"
}
],
"dimension_names": null,
"zarr_format": 3,
"node_type": "array"
}
5 changes: 3 additions & 2 deletions webknossos/tests/dataset/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None:
ds_path = prepare_dataset_path(data_format, output_path)
ds = Dataset(ds_path, voxel_size=(2, 2, 1))
chunk_shape, chunks_per_shard = default_chunk_config(data_format, 8)
shard_shape = chunk_shape * chunks_per_shard

layer = ds.add_layer("color", COLOR_CATEGORY, data_format=data_format)
mag = layer.add_mag(
Expand All @@ -874,7 +875,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None:
with get_executor_for_args(None) as executor:
mag.for_each_chunk(
chunk_job,
chunk_shape=(64, 64, 64),
chunk_shape=shard_shape,
executor=executor,
)
assert np.array_equal(original_data + 50, mag.get_view().read()[0])
Expand All @@ -885,7 +886,7 @@ def test_chunking_wk(data_format: DataFormat, output_path: Path) -> None:
# Test without executor
mag.for_each_chunk(
chunk_job,
chunk_shape=(64, 64, 64),
chunk_shape=shard_shape,
)
assert np.array_equal(original_data + 50, mag.get_view().read()[0])

Expand Down
43 changes: 27 additions & 16 deletions webknossos/webknossos/dataset/_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,16 +485,17 @@ def open(cls, path: Path) -> "ZarritaArray":
Array.open_auto(store=path) # check that everything exists
return cls(path)
except Exception as exc:
raise ArrayException(
f"Could not open Zarr array at {path}. `.zarray` not found."
) from exc
raise ArrayException(f"Could not open Zarr array at {path}.") from exc

@staticmethod
def _has_compression_codecs(codecs: List["zarrita.codecs.Codec"]) -> bool:
from zarrita.codecs import BloscCodec, GzipCodec
from zarrita.codecs import BloscCodec, GzipCodec, ZstdCodec

return any(
isinstance(c, BloscCodec) or isinstance(c, GzipCodec) for c in codecs
isinstance(c, BloscCodec)
or isinstance(c, GzipCodec)
or isinstance(c, ZstdCodec)
for c in codecs
)

@property
Expand All @@ -504,26 +505,30 @@ def info(self) -> ArrayInfo:

zarray = self._zarray
if isinstance(zarray, Array):
if len(zarray.codecs) == 1 and isinstance(zarray.codecs[0], ShardingCodec):
sharding_codec = zarray.codecs[0]
if len(zarray.codec_pipeline.codecs) == 1 and isinstance(
zarray.codec_pipeline.codecs[0], ShardingCodec
):
sharding_codec = zarray.codec_pipeline.codecs[0]
shard_shape = zarray.metadata.chunk_grid.configuration.chunk_shape
chunk_shape = sharding_codec.configuration.chunk_shape
return ArrayInfo(
data_format=DataFormat.Zarr3,
num_channels=zarray.metadata.shape[0],
voxel_type=zarray.metadata.dtype,
compression_mode=self._has_compression_codecs(
sharding_codec.codecs
sharding_codec.codec_pipeline.codecs
),
chunk_shape=Vec3Int(sharding_codec.configuration.chunk_shape[1:4]),
chunks_per_shard=Vec3Int(
zarray.metadata.chunk_grid.configuration.chunk_shape[1:4]
)
// Vec3Int(sharding_codec.configuration.chunk_shape[1:4]),
chunk_shape=Vec3Int(chunk_shape[1:4]),
chunks_per_shard=Vec3Int(shard_shape[1:4])
// Vec3Int(chunk_shape[1:4]),
)
return ArrayInfo(
data_format=DataFormat.Zarr3,
num_channels=zarray.metadata.shape[0],
voxel_type=zarray.metadata.dtype,
compression_mode=self._has_compression_codecs(zarray.codecs),
compression_mode=self._has_compression_codecs(
zarray.codec_pipeline.codecs
),
chunk_shape=Vec3Int(
zarray.metadata.chunk_grid.configuration.chunk_shape[1:4]
)
Expand Down Expand Up @@ -560,10 +565,16 @@ def create(cls, path: Path, array_info: ArrayInfo) -> "ZarritaArray":
+ array_info.chunk_shape.to_tuple(),
codecs=[
zarrita.codecs.transpose_codec("F"),
zarrita.codecs.blosc_codec(),
zarrita.codecs.endian_codec(),
zarrita.codecs.blosc_codec(
typesize=array_info.voxel_type.itemsize
),
]
if array_info.compression_mode
else [zarrita.codecs.transpose_codec("F")],
else [
zarrita.codecs.transpose_codec("F"),
zarrita.codecs.endian_codec(),
],
)
],
)
Expand Down
5 changes: 5 additions & 0 deletions webknossos/webknossos/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

from webknossos.dataset.defaults import (
DEFAULT_CHUNK_SHAPE,
DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES,
DEFAULT_CHUNKS_PER_SHARD_ZARR,
)

Expand Down Expand Up @@ -1167,6 +1168,10 @@ def add_layer_from_images(
chunk_shape = DEFAULT_CHUNK_SHAPE.with_z(1)
if chunks_per_shard is None:
chunks_per_shard = DEFAULT_CHUNKS_PER_SHARD_ZARR.with_z(1)

if chunks_per_shard is None and layer.data_format == DataFormat.Zarr3:
chunks_per_shard = DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES

mag_view = layer.add_mag(
mag=mag,
chunk_shape=chunk_shape,
Expand Down
1 change: 1 addition & 0 deletions webknossos/webknossos/dataset/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
DEFAULT_CHUNK_SHAPE = Vec3Int.full(32)
DEFAULT_CHUNKS_PER_SHARD = Vec3Int.full(32)
DEFAULT_CHUNKS_PER_SHARD_ZARR = Vec3Int.full(1)
DEFAULT_CHUNKS_PER_SHARD_FROM_IMAGES = Vec3Int(128, 128, 1)

0 comments on commit 3960f26

Please sign in to comment.