Skip to content

Commit

Permalink
wip - restructured the IO
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Oct 8, 2024
1 parent 3878f24 commit 80a2821
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 0 deletions.
91 changes: 91 additions & 0 deletions src/zarr/core/metadata/_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from __future__ import annotations

import asyncio
from typing import Literal

from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZarrFormat
from zarr.storage.common import StorePath


def _build_paths(
node_type: Literal["array", "group"] | None = None,
zarr_format: ZarrFormat | None = None,
) -> list[str]:
state = (node_type, zarr_format)
paths = []

match state:
case (_, 3):
paths.extend([ZARR_JSON])
case ("array", 2):
paths.extend([ZARRAY_JSON, ZATTRS_JSON])
case ("array", None):
paths.extend([ZARR_JSON, ZATTRS_JSON, ZARRAY_JSON])
case ("group", 2):
paths.extend([ZGROUP_JSON, ZATTRS_JSON])
case ("group", None):
paths.extend([ZGROUP_JSON, ZATTRS_JSON, ZARR_JSON])
case (None, 2):
paths.extend([ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON])
case (None, None):
paths.extend([ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZARR_JSON])
case _:
raise ValueError(f"Unhandled state: {state}")

return paths


async def _gather_documents(
store_path: StorePath,
node_type: Literal["array", "group"] | None = None,
zarr_format: ZarrFormat | None = None,
):
# We potentially have two unknown, key pieces of information:
# 1. node_type, which determines
# - the keys to read for Zarr V2
# - the metadata structure to parse this into for Zarr v3
# 2. zarr_format, which determines
# - the keys to read for Zarr V2
#
# This function is purely concerned with doing the I/O to read
# the minimum set of documents needed
# When both node_type and zarr_format are unknown, we need to read
#
# 1. ZGROUP_JSON: for zarr v2 groups
# 2. ZARRAY_JSON: for zarr v2 arrays
# 3. ZATTRS_JSON: for zarr v2 groups and arrays
# 4. ZARR_JSON: for v3 groups and arrays

# if we know we're v2 (i.e. the user told us) we just need to read 1-3
# if we know we're v3 we just need to read 4
# if we know we're group, we just need to read 2
paths = _build_paths(node_type, zarr_format)
results = await asyncio.gather(*[(store_path / path).get() for path in paths])
items = dict(zip(paths, results, strict=True))

# Now let's look at what we have. This match statement *must* have the
# same structure as our other one.
state = (node_type, zarr_format)

match state:
case (_, 3):
if items[ZARR_JSON] is None:
# this is known to be missing
raise Exception("todo")
# figure out the node type
node_type = ...
case ("array", 2):
# missing = [k for k in [ZARRAY_JSON, ZATTRS_JSON]]
paths.extend([ZARRAY_JSON, ZATTRS_JSON])
case ("array", None):
paths.extend([ZARR_JSON, ZATTRS_JSON, ZARRAY_JSON])
case ("group", 2):
paths.extend([ZGROUP_JSON, ZATTRS_JSON])
case ("group", None):
paths.extend([ZGROUP_JSON, ZATTRS_JSON, ZARR_JSON])
case (None, 2):
paths.extend([ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON])
case (None, None):
paths.extend([ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZARR_JSON])
case _:
raise ValueError(f"Unhandled state: {state}")
29 changes: 29 additions & 0 deletions tests/v3/test_metadata/test_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from typing import Literal

import pytest

from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON
from zarr.core.metadata._io import _build_paths


@pytest.mark.parametrize(
("node_type", "zarr_format", "expected"),
[
(None, None, [ZARR_JSON, ZATTRS_JSON, ZGROUP_JSON, ZARRAY_JSON]),
(None, 2, [ZATTRS_JSON, ZGROUP_JSON, ZARRAY_JSON]),
(None, 3, [ZARR_JSON]),
("array", None, [ZARR_JSON, ZATTRS_JSON, ZARRAY_JSON]),
("array", 2, [ZATTRS_JSON, ZARRAY_JSON]),
("array", 3, [ZARR_JSON]),
("group", None, [ZARR_JSON, ZATTRS_JSON, ZGROUP_JSON]),
("group", 2, [ZATTRS_JSON, ZGROUP_JSON]),
("group", 3, [ZARR_JSON]),
],
)
def test_build_paths(
node_type: Literal["array", "group"] | None,
zarr_format: Literal[2, 3] | None,
expected: list[str],
) -> None:
result = _build_paths(node_type, zarr_format)
assert set(result) == set(expected)

0 comments on commit 80a2821

Please sign in to comment.