diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 045da7e84..eefe04d50 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -342,8 +342,8 @@ def list(self) -> AsyncGenerator[str, None]: @abstractmethod def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned with the - common leading prefix removed. + Retrieve all keys in the store that begin with a given prefix. Keys are returned relative + to the root of the store. Parameters ---------- @@ -371,6 +371,20 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: """ ... + async def delete_dir(self, prefix: str) -> None: + """ + Remove all keys and prefixes in the store that begin with a given prefix. + """ + if not self.supports_deletes: + raise NotImplementedError + if not self.supports_listing: + raise NotImplementedError + self._check_writable() + if not prefix.endswith("/"): + prefix += "/" + async for key in self.list_prefix(prefix): + await self.delete(key) + def close(self) -> None: """Close the store.""" self._is_open = False diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index a4b86b85a..34d59bc3d 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -553,7 +553,12 @@ async def _create_v3( attributes: dict[str, JSON] | None = None, exists_ok: bool = False, ) -> AsyncArray[ArrayV3Metadata]: - if not exists_ok: + if exists_ok: + if store_path.store.supports_deletes: + await store_path.delete_dir() + else: + await ensure_no_existing_node(store_path, zarr_format=3) + else: await ensure_no_existing_node(store_path, zarr_format=3) shape = parse_shapelike(shape) @@ -605,7 +610,12 @@ async def _create_v2( attributes: dict[str, JSON] | None = None, exists_ok: bool = False, ) -> AsyncArray[ArrayV2Metadata]: - if not exists_ok: + if exists_ok: + if store_path.store.supports_deletes: + await store_path.delete_dir() + else: + await ensure_no_existing_node(store_path, zarr_format=2) + else: await ensure_no_existing_node(store_path, zarr_format=2) if order is None: diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 46f37700e..86cf191ca 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -404,7 +404,13 @@ async def from_store( zarr_format: ZarrFormat = 3, ) -> AsyncGroup: store_path = await make_store_path(store) - if not exists_ok: + + if exists_ok: + if store_path.store.supports_deletes: + await store_path.delete_dir() + else: + await ensure_no_existing_node(store_path, zarr_format=zarr_format) + else: await ensure_no_existing_node(store_path, zarr_format=zarr_format) attributes = attributes or {} group = cls( @@ -727,19 +733,8 @@ def _getitem_consolidated( async def delitem(self, key: str) -> None: store_path = self.store_path / key - if self.metadata.zarr_format == 3: - await (store_path / ZARR_JSON).delete() - - elif self.metadata.zarr_format == 2: - await asyncio.gather( - (store_path / ZGROUP_JSON).delete(), # TODO: missing_ok=False - (store_path / ZARRAY_JSON).delete(), # TODO: missing_ok=False - (store_path / ZATTRS_JSON).delete(), # TODO: missing_ok=True - ) - - else: - raise ValueError(f"unexpected zarr_format: {self.metadata.zarr_format}") + await store_path.delete_dir() if self.metadata.consolidated_metadata: self.metadata.consolidated_metadata.metadata.pop(key, None) await self._save_metadata() diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index 9ed8c274d..337fbc59a 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -101,6 +101,15 @@ async def delete(self) -> None: """ await self.store.delete(self.path) + async def delete_dir(self) -> None: + """ + Delete all keys with the given prefix from the store. + """ + path = self.path + if not path.endswith("/"): + path += "/" + await self.store.delete_dir(path) + async def set_if_not_exists(self, default: Buffer) -> None: """ Store a key to ``value`` if the key is not already present. diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index ba13c2c66..331c9857c 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -226,9 +226,8 @@ async def list(self) -> AsyncGenerator[str, None]: async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - to_strip = ( - (self.root / prefix).as_posix() + "/" - ) # TODO: fixme in https://github.com/zarr-developers/zarr-python/issues/2438 + to_strip = self.root.as_posix() + "/" + prefix = prefix.rstrip("/") for p in (self.root / prefix).rglob("*"): if p.is_file(): yield p.as_posix().replace(to_strip, "") diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index 66fd1687e..be259579e 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -222,6 +222,11 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: async for key in self._store.list_dir(prefix=prefix): yield key + async def delete_dir(self, prefix: str) -> None: + # docstring inherited + with self.log(prefix): + await self._store.delete_dir(prefix=prefix) + def with_mode(self, mode: AccessModeLiteral) -> Self: # docstring inherited with self.log(mode): diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index f942d57b9..fa4ede2a8 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -1,5 +1,6 @@ from __future__ import annotations +from logging import getLogger from typing import TYPE_CHECKING, Self from zarr.abc.store import ByteRangeRequest, Store @@ -14,6 +15,9 @@ from zarr.core.common import AccessModeLiteral +logger = getLogger(__name__) + + class MemoryStore(Store): """ In-memory store for testing purposes. @@ -137,7 +141,7 @@ async def delete(self, key: str) -> None: try: del self._store_dict[key] except KeyError: - pass + logger.debug("Key %s does not exist.", key) async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: # docstring inherited @@ -150,9 +154,10 @@ async def list(self) -> AsyncGenerator[str, None]: async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - for key in self._store_dict: + # note: we materialize all dict keys into a list here so we can mutate the dict in-place (e.g. in delete_prefix) + for key in list(self._store_dict): if key.startswith(prefix): - yield key.removeprefix(prefix) + yield key async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 1f7d5f7a1..ca7a010bd 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -340,6 +340,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - find_str = f"{self.path}/{prefix}" - for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False): - yield onefile.removeprefix(find_str) + for onefile in await self.fs._find( + f"{self.path}/{prefix}", detail=False, maxdepth=None, withdirs=False + ): + yield onefile.removeprefix(f"{self.path}/") diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index d9e1aa130..cf9b338cf 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -244,7 +244,7 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited async for key in self.list(): if key.startswith(prefix): - yield key.removeprefix(prefix) + yield key async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index b4da75b06..3aece0f4a 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -213,11 +213,26 @@ async def test_exists(self, store: S) -> None: assert await store.exists("foo/zarr.json") async def test_delete(self, store: S) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) assert await store.exists("foo/zarr.json") await store.delete("foo/zarr.json") assert not await store.exists("foo/zarr.json") + async def test_delete_dir(self, store: S) -> None: + if not store.supports_deletes: + pytest.skip("store does not support deletes") + await store.set("zarr.json", self.buffer_cls.from_bytes(b"root")) + await store.set("foo-bar/zarr.json", self.buffer_cls.from_bytes(b"root")) + await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) + await store.set("foo/c/0", self.buffer_cls.from_bytes(b"chunk")) + await store.delete_dir("foo") + assert await store.exists("zarr.json") + assert await store.exists("foo-bar/zarr.json") + assert not await store.exists("foo/zarr.json") + assert not await store.exists("foo/c/0") + async def test_empty(self, store: S) -> None: assert await store.empty() await self.set( @@ -249,8 +264,7 @@ async def test_list(self, store: S) -> None: async def test_list_prefix(self, store: S) -> None: """ Test that the `list_prefix` method works as intended. Given a prefix, it should return - all the keys in storage that start with this prefix. Keys should be returned with the shared - prefix removed. + all the keys in storage that start with this prefix. """ prefixes = ("", "a/", "a/b/", "a/b/c/") data = self.buffer_cls.from_bytes(b"") @@ -264,7 +278,7 @@ async def test_list_prefix(self, store: S) -> None: expected: tuple[str, ...] = () for key in store_dict: if key.startswith(prefix): - expected += (key.removeprefix(prefix),) + expected += (key,) expected = tuple(sorted(expected)) assert observed == expected diff --git a/tests/test_array.py b/tests/test_array.py index 6451c7fe5..96475d43e 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -51,6 +51,8 @@ def test_array_creation_existing_node( new_dtype = "float32" if exists_ok: + if not store.supports_deletes: + pytest.skip("store does not support deletes") arr_new = Array.create( spath / "extant", shape=new_shape, diff --git a/tests/test_group.py b/tests/test_group.py index bcdc6ff0d..6bacca488 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -290,6 +290,10 @@ def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> N with pytest.raises(ContainsGroupError): Group.from_store(store, attributes=attrs, zarr_format=zarr_format, exists_ok=exists_ok) else: + if not store.supports_deletes: + pytest.skip( + "Store does not support deletes but `exists_ok` is True, requiring deletes to override a group" + ) group_created_again = Group.from_store( store, attributes=new_attrs, zarr_format=zarr_format, exists_ok=exists_ok ) @@ -720,6 +724,8 @@ def test_group_creation_existing_node( new_attributes = {"new": True} if exists_ok: + if not store.supports_deletes: + pytest.skip("store does not support deletes but exists_ok is True") node_new = Group.from_store( spath / "extant", attributes=new_attributes, @@ -1092,7 +1098,9 @@ async def test_require_group(store: LocalStore | MemoryStore, zarr_format: ZarrF assert foo_group.attrs == {"foo": 100} # test that we can get the group using require_group and overwrite=True - foo_group = await root.require_group("foo", overwrite=True) + if store.supports_deletes: + foo_group = await root.require_group("foo", overwrite=True) + assert foo_group.attrs == {} _ = await foo_group.create_array( "bar", shape=(10,), dtype="uint8", chunk_shape=(2,), attributes={"foo": 100} @@ -1371,3 +1379,16 @@ def test_group_deprecated_positional_args(method: str) -> None: with pytest.warns(FutureWarning, match=r"Pass name=.*, data=.* as keyword args."): arr = getattr(root, method)("foo_like", data, **kwargs) assert arr.shape == data.shape + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +def test_delitem_removes_children(store: Store, zarr_format: ZarrFormat) -> None: + # https://github.com/zarr-developers/zarr-python/issues/2191 + g1 = zarr.group(store=store, zarr_format=zarr_format) + g1.create_group("0") + g1.create_group("0/0") + arr = g1.create_array("0/0/0", shape=(1,)) + arr[:] = 1 + del g1["0"] + with pytest.raises(KeyError): + g1["0/0"] diff --git a/tests/test_store/test_logging.py b/tests/test_store/test_logging.py index 0258244c5..50db5c1c5 100644 --- a/tests/test_store/test_logging.py +++ b/tests/test_store/test_logging.py @@ -45,10 +45,15 @@ async def test_logging_store_counter(store: Store) -> None: arr[:] = 1 assert wrapped.counter["set"] == 2 - assert wrapped.counter["get"] == 0 # 1 if overwrite=False assert wrapped.counter["list"] == 0 assert wrapped.counter["list_dir"] == 0 assert wrapped.counter["list_prefix"] == 0 + if store.supports_deletes: + assert wrapped.counter["get"] == 0 # 1 if overwrite=False + assert wrapped.counter["delete_dir"] == 1 + else: + assert wrapped.counter["get"] == 1 + assert wrapped.counter["delete_dir"] == 0 async def test_with_mode(): diff --git a/tests/test_store/test_zip.py b/tests/test_store/test_zip.py index d05422ecd..8dee47449 100644 --- a/tests/test_store/test_zip.py +++ b/tests/test_store/test_zip.py @@ -14,7 +14,6 @@ from zarr.testing.store import StoreTests if TYPE_CHECKING: - from collections.abc import Coroutine from typing import Any @@ -65,9 +64,6 @@ def test_store_supports_partial_writes(self, store: ZipStore) -> None: def test_store_supports_listing(self, store: ZipStore) -> None: assert store.supports_listing - def test_delete(self, store: ZipStore) -> Coroutine[Any, Any, None]: - pass - def test_api_integration(self, store: ZipStore) -> None: root = zarr.open_group(store=store) @@ -103,4 +99,4 @@ async def test_with_mode(self, store: ZipStore) -> None: @pytest.mark.parametrize("mode", ["a", "w"]) async def test_store_open_mode(self, store_kwargs: dict[str, Any], mode: str) -> None: - super().test_store_open_mode(store_kwargs, mode) + await super().test_store_open_mode(store_kwargs, mode)