-
-
Notifications
You must be signed in to change notification settings - Fork 282
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added info for Group and Array #2400
base: main
Are you sure you want to change the base?
Changes from all commits
a6ef792
b94bff2
73ea1d2
a3b797d
30c4e6a
297a9f3
60a0881
615c025
125129b
19fd7ff
5599da3
d96c202
9118056
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import dataclasses | ||
import textwrap | ||
from typing import Literal | ||
|
||
|
||
@dataclasses.dataclass(kw_only=True) | ||
class GroupInfo: | ||
""" | ||
Information about a group. | ||
|
||
Attributes | ||
---------- | ||
name : str | ||
The path of the group within the Store | ||
type : "Group" | ||
zarr_format : {2, 3} | ||
The zarr format of the Group. | ||
read_only : bool | ||
Whether the Group's access mode is read only. | ||
store_type : str | ||
The name of the Store class containing this group. | ||
count_members : int, optional | ||
The number of child members below this group. This | ||
will be set when the Group has consolidated metadata | ||
or when using :class:`Group.info_complete`. | ||
count_arrays : int, optional | ||
The number of child arrays below this group. This | ||
will be set when the Group has consolidated metadata | ||
or when using :class:`Group.info_complete`. | ||
count_groups : int, optional | ||
The number of child groups below this group. This | ||
will be set when the Group has consolidated metadata | ||
or when using :class:`Group.info_complete`. | ||
""" | ||
|
||
name: str | ||
type: Literal["Group"] = "Group" | ||
zarr_format: Literal[2, 3] | ||
read_only: bool | ||
store_type: str | ||
count_members: int | None = None | ||
count_arrays: int | None = None | ||
count_groups: int | None = None | ||
|
||
def __repr__(self) -> str: | ||
template = textwrap.dedent("""\ | ||
Name : {name} | ||
Type : {type} | ||
Zarr format : {zarr_format} | ||
Read-only : {read_only} | ||
Store type : {store_type}""") | ||
|
||
if self.count_members is not None: | ||
template += "\nNo. members : {count_members}" | ||
if self.count_arrays is not None: | ||
template += "\nNo. arrays : {count_arrays}" | ||
if self.count_groups is not None: | ||
template += "\nNo. groups : {count_groups}" | ||
return template.format(**dataclasses.asdict(self)) | ||
|
||
|
||
def human_readable_size(size: int) -> str: | ||
if size < 2**10: | ||
return f"{size}" | ||
elif size < 2**20: | ||
return f"{size / float(2**10):.1f}K" | ||
elif size < 2**30: | ||
return f"{size / float(2**20):.1f}M" | ||
elif size < 2**40: | ||
return f"{size / float(2**30):.1f}G" | ||
elif size < 2**50: | ||
return f"{size / float(2**40):.1f}T" | ||
else: | ||
return f"{size / float(2**50):.1f}P" | ||
|
||
|
||
def byte_info(size: int) -> str: | ||
if size < 2**10: | ||
return str(size) | ||
else: | ||
return f"{size} ({human_readable_size(size)})" | ||
|
||
|
||
@dataclasses.dataclass(kw_only=True) | ||
class ArrayInfo: | ||
type: Literal["Array"] = "Array" | ||
zarr_format: Literal[2, 3] | ||
data_type: str | ||
shape: tuple[int, ...] | ||
chunk_shape: tuple[int, ...] | None = None | ||
order: Literal["C", "F"] | ||
read_only: bool | ||
store_type: str | ||
compressor: str | None = None | ||
filters: list[str] | None = None | ||
codecs: str | None = None | ||
count_bytes: int | None = None | ||
count_bytes_stored: int | None = None | ||
count_chunks_initialized: int | None = None | ||
|
||
def __repr__(self) -> str: | ||
template = textwrap.dedent("""\ | ||
Type : {type} | ||
Zarr format : {zarr_format} | ||
Data type : {data_type} | ||
Shape : {shape} | ||
Chunk shape : {chunk_shape} | ||
Order : {order} | ||
Read-only : {read_only} | ||
Store type : {store_type}""") | ||
|
||
kwargs = dataclasses.asdict(self) | ||
if self.chunk_shape is None: | ||
# for non-regular chunk grids | ||
kwargs["chunk_shape"] = "<variable>" | ||
if self.compressor is not None: | ||
template += "\nCompressor : {compressor}" | ||
|
||
if self.filters is not None: | ||
template += "\nFilters : {filters}" | ||
|
||
if self.codecs is not None: | ||
template += "\nCodecs : {codecs}" | ||
|
||
if self.count_bytes is not None: | ||
template += "\nNo. bytes : {count_bytes}" | ||
kwargs["count_bytes"] = byte_info(self.count_bytes) | ||
|
||
if self.count_bytes_stored is not None: | ||
template += "\nNo. bytes stored : {count_bytes_stored}" | ||
kwargs["count_stored"] = byte_info(self.count_bytes_stored) | ||
|
||
if ( | ||
self.count_bytes is not None | ||
and self.count_bytes_stored is not None | ||
and self.count_bytes_stored > 0 | ||
): | ||
template += "\nStorage ratio : {storage_ratio}" | ||
kwargs["storage_ratio"] = f"{self.count_bytes / self.count_bytes_stored:.1f}" | ||
|
||
if self.count_chunks_initialized is not None: | ||
template += "\nChunks Initialized : {count_chunks_initialized}" | ||
return template.format(**kwargs) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
from zarr.abc.store import Store, set_or_delete | ||
from zarr.codecs import _get_default_array_bytes_codec | ||
from zarr.codecs._v2 import V2Compressor, V2Filters | ||
from zarr.core._info import ArrayInfo | ||
from zarr.core.attributes import Attributes | ||
from zarr.core.buffer import ( | ||
BufferPrototype, | ||
|
@@ -1199,9 +1200,65 @@ async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: | |
def __repr__(self) -> str: | ||
return f"<AsyncArray {self.store_path} shape={self.shape} dtype={self.dtype}>" | ||
|
||
async def info(self) -> None: | ||
@property | ||
def info(self) -> ArrayInfo: | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Return the statically known information for an array. | ||
|
||
Returns | ||
------- | ||
ArrayInfo | ||
|
||
See Also | ||
-------- | ||
AsyncArray.info_complete | ||
All information about a group, including dynamic information | ||
like the number of bytes and chunks written. | ||
""" | ||
return self._info() | ||
|
||
async def info_complete(self) -> ArrayInfo: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we just don't have this in the API till we implement it (since it's new). |
||
# TODO: get the size of the object from the store. | ||
extra = { | ||
"count_chunks_initialized": self.nchunks_initialized, # this should be async? | ||
# count_bytes_stored isn't yet implemented. | ||
} | ||
return self._info(extra=extra) | ||
|
||
raise NotImplementedError | ||
|
||
def _info(self, extra: dict[str, int] | None = None) -> ArrayInfo: | ||
kwargs: dict[str, Any] = {} | ||
if self.metadata.zarr_format == 2: | ||
assert isinstance(self.metadata, ArrayV2Metadata) | ||
if self.metadata.compressor is not None: | ||
kwargs["compressor"] = str(self.metadata.compressor) | ||
if self.metadata.filters is not None: | ||
kwargs["filters"] = str(self.metadata.filters) | ||
kwargs["data_type"] = str(self.metadata.dtype) | ||
kwargs["chunk_shape"] = self.metadata.chunks | ||
else: | ||
kwargs["codecs"] = str(self.metadata.codecs) | ||
kwargs["data_type"] = str(self.metadata.data_type) | ||
# just regular? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to figure out what to do here for other chunking types. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. until we have other chunk grid types, I'd say we can either raise an error here (my preference) or leave this field null. |
||
chunk_grid = self.metadata.chunk_grid | ||
if isinstance(chunk_grid, RegularChunkGrid): | ||
kwargs["chunk_shape"] = chunk_grid.chunk_shape | ||
else: | ||
raise NotImplementedError( | ||
"'info' is not yet implemented for chunk grids of type {type(self.metadata.chunk_grid)}" | ||
) | ||
|
||
return ArrayInfo( | ||
zarr_format=self.metadata.zarr_format, | ||
shape=self.shape, | ||
order=self.order, | ||
read_only=self.store_path.store.mode.readonly, | ||
store_type=type(self.store_path.store).__name__, | ||
count_bytes=self.dtype.itemsize * self.size, | ||
**kwargs, | ||
) | ||
|
||
|
||
# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed | ||
@dataclass(frozen=False) | ||
|
@@ -2900,10 +2957,25 @@ def update_attributes(self, new_attributes: dict[str, JSON]) -> Array: | |
def __repr__(self) -> str: | ||
return f"<Array {self.store_path} shape={self.shape} dtype={self.dtype}>" | ||
|
||
def info(self) -> None: | ||
return sync( | ||
self._async_array.info(), | ||
) | ||
@property | ||
def info(self) -> ArrayInfo: | ||
""" | ||
Return the statically known information for an array. | ||
|
||
Returns | ||
------- | ||
ArrayInfo | ||
|
||
See Also | ||
-------- | ||
Array.info_complete | ||
All information about a group, including dynamic information | ||
like the number of bytes and chunks written. | ||
""" | ||
return self._async_array.info | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add docstrings here and in |
||
|
||
def info_complete(self) -> ArrayInfo: | ||
return sync(self._async_array.info_complete()) | ||
|
||
|
||
def nchunks_initialized( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain why we want to put this in the user API?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think any object that a user can get through a public API (like
zarr.group().info
) should be part of the API somewhere, if only to enable things like static typing without using private imports.I don't have a preference for whether it's in the top-level API like I've added here, or some submodule.