Skip to content

Commit

Permalink
Merge branch 'v3' into tom/fix/dtype-str-special-case
Browse files Browse the repository at this point in the history
  • Loading branch information
jhamman authored Oct 10, 2024
2 parents 7e76e9e + 395604d commit df92bad
Show file tree
Hide file tree
Showing 21 changed files with 209 additions and 70 deletions.
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,7 @@ repos:
hooks:
- id: rst-directive-colons
- id: rst-inline-touching-normal
- repo: https://github.com/numpy/numpydoc
rev: v1.8.0
hooks:
- id: numpydoc-validation
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,7 @@ ignore = [
"PC111", # fix Python code in documentation - enable later
"PC180", # for JavaScript - not interested
]

[tool.numpydoc_validation]
# See https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks for list of checks
checks = ["GL06", "GL07", "GL10", "PR03", "PR05", "PR06"]
2 changes: 1 addition & 1 deletion src/zarr/abc/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
from zarr.core.indexing import SelectorTuple

__all__ = [
"BaseCodec",
"ArrayArrayCodec",
"ArrayBytesCodec",
"ArrayBytesCodecPartialDecodeMixin",
"ArrayBytesCodecPartialEncodeMixin",
"BaseCodec",
"BytesBytesCodec",
"CodecInput",
"CodecOutput",
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/abc/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class Store(ABC):
_mode: AccessMode
_is_open: bool

def __init__(self, mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any) -> None:
def __init__(self, *args: Any, mode: AccessModeLiteral = "r", **kwargs: Any) -> None:
self._is_open = False
self._mode = AccessMode.from_literal(mode)

Expand Down
38 changes: 22 additions & 16 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from zarr.core.group import AsyncGroup
from zarr.core.metadata.v2 import ArrayV2Metadata
from zarr.core.metadata.v3 import ArrayV3Metadata
from zarr.errors import NodeTypeValidationError
from zarr.storage import (
StoreLike,
StorePath,
Expand Down Expand Up @@ -159,7 +160,7 @@ async def load(
Parameters
----------
store : Store or string
store : Store or str
Store or path to directory in file system or name of zip file.
path : str or None, optional
The path within the store from which to load.
Expand Down Expand Up @@ -203,7 +204,7 @@ async def open(
Parameters
----------
store : Store or string, optional
store : Store or str, optional
Store or path to directory in file system or name of zip file.
mode : {'r', 'r+', 'a', 'w', 'w-'}, optional
Persistence mode: 'r' means read only (must exist); 'r+' means
Expand Down Expand Up @@ -247,7 +248,10 @@ async def open(

try:
return await open_array(store=store_path, zarr_format=zarr_format, **kwargs)
except KeyError:
except (KeyError, NodeTypeValidationError):
# KeyError for a missing key
# NodeTypeValidationError for failing to parse node metadata as an array when it's
# actually a group
return await open_group(store=store_path, zarr_format=zarr_format, **kwargs)


Expand All @@ -267,7 +271,7 @@ async def save(
Parameters
----------
store : Store or string
store : Store or str
Store or path to directory in file system or name of zip file.
args : ndarray
NumPy arrays with data to save.
Expand Down Expand Up @@ -303,7 +307,7 @@ async def save_array(
Parameters
----------
store : Store or string
store : Store or str
Store or path to directory in file system or name of zip file.
arr : ndarray
NumPy array with data to save.
Expand Down Expand Up @@ -351,7 +355,7 @@ async def save_group(
Parameters
----------
store : Store or string
store : Store or str
Store or path to directory in file system or name of zip file.
args : ndarray
NumPy arrays with data to save.
Expand Down Expand Up @@ -467,7 +471,7 @@ async def group(
Parameters
----------
store : Store or string, optional
store : Store or str, optional
Store or path to directory in file system.
overwrite : bool, optional
If True, delete any pre-existing data in `store` at `path` before
Expand All @@ -481,7 +485,7 @@ async def group(
to all attribute read operations.
synchronizer : object, optional
Array synchronizer.
path : string, optional
path : str, optional
Group path within store.
meta_array : array-like, optional
An array instance to use for determining arrays to create and return
Expand Down Expand Up @@ -547,7 +551,7 @@ async def open_group(
Parameters
----------
store : Store, string, or mapping, optional
store : Store, str, or mapping, optional
Store or path to directory in file system or name of zip file.
Strings are interpreted as paths on the local file system
Expand All @@ -570,16 +574,18 @@ async def open_group(
to all attribute read operations.
synchronizer : object, optional
Array synchronizer.
path : string, optional
path : str, optional
Group path within store.
chunk_store : Store or string, optional
chunk_store : Store or str, optional
Store or path to directory in file system or name of zip file.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.
meta_array : array-like, optional
An array instance to use for determining arrays to create and return
to users. Use `numpy.empty(())` by default.
attributes : dict
A dictionary of JSON-serializable values with user-defined attributes.
Returns
-------
Expand Down Expand Up @@ -664,22 +670,22 @@ async def create(
False, will be set to `shape`, i.e., single chunk for the whole array.
If an int, the chunk size in each dimension will be given by the value
of `chunks`. Default is True.
dtype : string or dtype, optional
dtype : str or dtype, optional
NumPy dtype.
compressor : Codec, optional
Primary compressor.
fill_value : object
Default value to use for uninitialized portions of the array.
order : {'C', 'F'}, optional
Memory layout to be used within each chunk.
store : Store or string
store : Store or str
Store or path to directory in file system or name of zip file.
synchronizer : object, optional
Array synchronizer.
overwrite : bool, optional
If True, delete all pre-existing data in `store` at `path` before
creating the array.
path : string, optional
path : str, optional
Path under which array is stored.
chunk_store : MutableMapping, optional
Separate storage for chunks. If not provided, `store` will be used
Expand Down Expand Up @@ -937,11 +943,11 @@ async def open_array(
Parameters
----------
store : Store or string
store : Store or str
Store or path to directory in file system or name of zip file.
zarr_format : {2, 3, None}, optional
The zarr format to use when saving.
path : string, optional
path : str, optional
Path in store to array.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
Expand Down
2 changes: 2 additions & 0 deletions src/zarr/api/synchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def open_group(
zarr_version: ZarrFormat | None = None, # deprecated
zarr_format: ZarrFormat | None = None,
meta_array: Any | None = None, # not used in async api
attributes: dict[str, JSON] | None = None,
) -> Group:
return Group(
sync(
Expand All @@ -221,6 +222,7 @@ def open_group(
zarr_version=zarr_version,
zarr_format=zarr_format,
meta_array=meta_array,
attributes=attributes,
)
)
)
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/codecs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
"ShardingCodec",
"ShardingCodecIndexLocation",
"TransposeCodec",
"VLenUTF8Codec",
"VLenBytesCodec",
"VLenUTF8Codec",
"ZstdCodec",
]

Expand Down
18 changes: 16 additions & 2 deletions src/zarr/codecs/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from zarr.core.common import ChunkCoords, concurrent_map
from zarr.core.config import config
from zarr.core.indexing import SelectorTuple, is_scalar, is_total_slice
from zarr.core.metadata.v2 import _default_fill_value
from zarr.registry import register_pipeline

if TYPE_CHECKING:
Expand Down Expand Up @@ -247,7 +248,17 @@ async def read_batch(
if chunk_array is not None:
out[out_selection] = chunk_array
else:
out[out_selection] = chunk_spec.fill_value
fill_value = chunk_spec.fill_value

if fill_value is None:
# Zarr V2 allowed `fill_value` to be null in the metadata.
# Zarr V3 requires it to be set. This has already been
# validated when decoding the metadata, but we support reading
# Zarr V2 data and need to support the case where fill_value
# is None.
fill_value = _default_fill_value(dtype=chunk_spec.dtype)

out[out_selection] = fill_value
else:
chunk_bytes_batch = await concurrent_map(
[
Expand All @@ -274,7 +285,10 @@ async def read_batch(
tmp = tmp.squeeze(axis=drop_axes)
out[out_selection] = tmp
else:
out[out_selection] = chunk_spec.fill_value
fill_value = chunk_spec.fill_value
if fill_value is None:
fill_value = _default_fill_value(dtype=chunk_spec.dtype)
out[out_selection] = fill_value

def _merge_chunk_array(
self,
Expand Down
31 changes: 16 additions & 15 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
from zarr.core.metadata.v2 import ArrayV2Metadata
from zarr.core.metadata.v3 import ArrayV3Metadata
from zarr.core.sync import collect_aiterator, sync
from zarr.errors import MetadataValidationError
from zarr.registry import get_pipeline_class
from zarr.storage import StoreLike, make_store_path
from zarr.storage.common import StorePath, ensure_no_existing_node
Expand Down Expand Up @@ -145,7 +146,7 @@ async def get_array_metadata(
else:
zarr_format = 2
else:
raise ValueError(f"unexpected zarr_format: {zarr_format}")
raise MetadataValidationError("zarr_format", "2, 3, or None", zarr_format)

metadata_dict: dict[str, Any]
if zarr_format == 2:
Expand Down Expand Up @@ -382,7 +383,7 @@ async def _create_v2(
chunks=chunks,
order=order,
dimension_separator=dimension_separator,
fill_value=0 if fill_value is None else fill_value,
fill_value=fill_value,
compressor=compressor,
filters=filters,
attributes=attributes,
Expand Down Expand Up @@ -1290,11 +1291,11 @@ def get_basic_selection(
array. May be any combination of int and/or slice or ellipsis for multidimensional arrays.
out : NDBuffer, optional
If given, load the selected data directly into this buffer.
prototype : BufferPrototype, optional
The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used.
fields : str or sequence of str, optional
For arrays with a structured dtype, one or more fields can be specified to
extract data for.
prototype : BufferPrototype, optional
The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used.
Returns
-------
Expand Down Expand Up @@ -2286,6 +2287,17 @@ def resize(self, new_shape: ChunkCoords) -> Array:
This method does not modify the original Array object. Instead, it returns a new Array
with the specified shape.
Notes
-----
When resizing an array, the data are not rearranged in any way.
If one or more dimensions are shrunk, any chunks falling outside the
new array shape will be deleted from the underlying store.
However, it is noteworthy that the chunks partially falling inside the new array
(i.e. boundary chunks) will remain intact, and therefore,
the data falling outside the new array but inside the boundary chunks
would be restored by a subsequent resize operation that grows the array size.
Examples
--------
>>> import zarr
Expand All @@ -2303,17 +2315,6 @@ def resize(self, new_shape: ChunkCoords) -> Array:
(20000, 1000)
>>> z2.shape
(50, 50)
Notes
-----
When resizing an array, the data are not rearranged in any way.
If one or more dimensions are shrunk, any chunks falling outside the
new array shape will be deleted from the underlying store.
However, it is noteworthy that the chunks partially falling inside the new array
(i.e. boundary chunks) will remain intact, and therefore,
the data falling outside the new array but inside the boundary chunks
would be restored by a subsequent resize operation that grows the array size.
"""
return type(self)(
sync(
Expand Down
Loading

0 comments on commit df92bad

Please sign in to comment.