diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index a4b86b85a..c3d6fca54 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -77,7 +77,7 @@ T_ArrayMetadata, ) from zarr.core.metadata.v3 import parse_node_type_array -from zarr.core.sync import collect_aiterator, sync +from zarr.core.sync import sync from zarr.errors import MetadataValidationError from zarr.registry import get_pipeline_class from zarr.storage import StoreLike, make_store_path @@ -829,17 +829,31 @@ def nchunks(self) -> int: """ return product(self.cdata_shape) - @property - def nchunks_initialized(self) -> int: + async def nchunks_initialized(self) -> int: """ - The number of chunks that have been persisted in storage. + Calculate the number of chunks that have been initialized, i.e. the number of chunks that have + been persisted to the storage backend. Returns ------- - int - The number of initialized chunks in the array. + nchunks_initialized : int + The number of chunks that have been initialized. + + Notes + ----- + On :class:`AsyncArray` this is an asynchronous method, unlike the (synchronous) + property :attr:`Array.nchunks_initialized`. + + Examples + -------- + >>> arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(2,)) + >>> await arr.nchunks_initialized() + 0 + >>> await arr.setitem(slice(5), 1) + >>> await arr.nchunks_initialized() + 3 """ - return nchunks_initialized(self) + return len(await chunks_initialized(self)) def _iter_chunk_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -1492,9 +1506,29 @@ def nbytes(self) -> int: @property def nchunks_initialized(self) -> int: """ - The number of chunks that have been initialized in the stored representation of this array. + Calculate the number of chunks that have been initialized, i.e. the number of chunks that have + been persisted to the storage backend. + + Returns + ------- + nchunks_initialized : int + The number of chunks that have been initialized. + + Notes + ----- + On :class:`Array` this is a (synchronous) property, unlike asynchronous function + :meth:`AsyncArray.nchunks_initialized`. + + Examples + -------- + >>> arr = await zarr.create(shape=(10,), chunks=(2,)) + >>> arr.nchunks_initialized + 0 + >>> arr[:5] = 1 + >>> arr.nchunks_initialized + 3 """ - return self._async_array.nchunks_initialized + return sync(self._async_array.nchunks_initialized()) def _iter_chunk_keys( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2905,39 +2939,15 @@ def info(self) -> None: ) -def nchunks_initialized( - array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array, -) -> int: - """ - Calculate the number of chunks that have been initialized, i.e. the number of chunks that have - been persisted to the storage backend. - - Parameters - ---------- - array : Array - The array to inspect. - - Returns - ------- - nchunks_initialized : int - The number of chunks that have been initialized. - - See Also - -------- - chunks_initialized - """ - return len(chunks_initialized(array)) - - -def chunks_initialized( - array: Array | AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], +async def chunks_initialized( + array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], ) -> tuple[str, ...]: """ Return the keys of the chunks that have been persisted to the storage backend. Parameters ---------- - array : Array + array : AsyncArray The array to inspect. Returns @@ -2950,10 +2960,9 @@ def chunks_initialized( nchunks_initialized """ - # TODO: make this compose with the underlying async iterator - store_contents = list( - collect_aiterator(array.store_path.store.list_prefix(prefix=array.store_path.path)) - ) + store_contents = [ + x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path) + ] return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents) diff --git a/tests/test_array.py b/tests/test_array.py index 6451c7fe5..7c70fbca2 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -323,7 +323,7 @@ def test_nchunks(test_cls: type[Array] | type[AsyncArray[Any]], nchunks: int) -> @pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) -def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> None: +async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> None: """ Test that nchunks_initialized accurately returns the number of stored chunks. """ @@ -337,7 +337,7 @@ def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> N if test_cls == Array: observed = arr.nchunks_initialized else: - observed = arr._async_array.nchunks_initialized + observed = await arr._async_array.nchunks_initialized() assert observed == expected # delete chunks @@ -346,13 +346,13 @@ def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> N if test_cls == Array: observed = arr.nchunks_initialized else: - observed = arr._async_array.nchunks_initialized + observed = await arr._async_array.nchunks_initialized() expected = arr.nchunks - idx - 1 assert observed == expected @pytest.mark.parametrize("test_cls", [Array, AsyncArray[Any]]) -def test_chunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> None: +async def test_chunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> None: """ Test that chunks_initialized accurately returns the keys of stored chunks. """ @@ -366,9 +366,9 @@ def test_chunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]) -> No arr[region] = 1 if test_cls == Array: - observed = sorted(chunks_initialized(arr)) + observed = sorted(await chunks_initialized(arr)) # Why doesn't mypy error here? else: - observed = sorted(chunks_initialized(arr._async_array)) + observed = sorted(await chunks_initialized(arr._async_array)) expected = sorted(keys) assert observed == expected