Skip to content

Commit

Permalink
automatically add filter
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Oct 10, 2024
1 parent 4b0a39e commit d8f24a8
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
18 changes: 13 additions & 5 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,16 +223,16 @@ async def create(
) -> AsyncArray:
store_path = await make_store_path(store)

dtype = parse_dtype(dtype, zarr_format)
dtype_parsed = parse_dtype(dtype, zarr_format)
shape = parse_shapelike(shape)

if chunks is not None and chunk_shape is not None:
raise ValueError("Only one of chunk_shape or chunks can be provided.")

if chunks:
_chunks = normalize_chunks(chunks, shape, dtype.itemsize)
_chunks = normalize_chunks(chunks, shape, dtype_parsed.itemsize)
else:
_chunks = normalize_chunks(chunk_shape, shape, dtype.itemsize)
_chunks = normalize_chunks(chunk_shape, shape, dtype_parsed.itemsize)

if zarr_format == 3:
if dimension_separator is not None:
Expand All @@ -254,7 +254,7 @@ async def create(
result = await cls._create_v3(
store_path,
shape=shape,
dtype=dtype,
dtype=dtype_parsed,
chunk_shape=_chunks,
fill_value=fill_value,
chunk_key_encoding=chunk_key_encoding,
Expand All @@ -264,6 +264,14 @@ async def create(
exists_ok=exists_ok,
)
elif zarr_format == 2:
if dtype is str or dtype == "str":
# another special case: zarr v2 added the vlen-utf8 codec
vlen_codec: dict[str, JSON] = {"id": "vlen-utf8"}
if filters and not any(x["id"] == "vlen-utf8" for x in filters):
filters = list(filters) + [vlen_codec]
else:
filters = [vlen_codec]

if codecs is not None:
raise ValueError(
"codecs cannot be used for arrays with version 2. Use filters and compressor instead."
Expand All @@ -277,7 +285,7 @@ async def create(
result = await cls._create_v2(
store_path,
shape=shape,
dtype=dtype,
dtype=dtype_parsed,
chunks=_chunks,
dimension_separator=dimension_separator,
fill_value=fill_value,
Expand Down
2 changes: 2 additions & 0 deletions tests/v3/test_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections.abc import Iterator
from typing import Any

import numcodecs.vlen
import numpy as np
import pytest
from numcodecs import Delta
Expand Down Expand Up @@ -118,3 +119,4 @@ async def test_create_dtype_str(dtype: Any) -> None:
arr = zarr.create(shape=10, dtype=dtype, zarr_format=2)
assert arr.dtype.kind == "O"
assert arr.metadata.to_dict()["dtype"] == "|O"
assert arr.metadata.filters == (numcodecs.vlen.VLenUTF8(),)

0 comments on commit d8f24a8

Please sign in to comment.