Skip to content

Commit

Permalink
Check and report the dtype of the SparseNdarray.
Browse files Browse the repository at this point in the history
This is now part of the seed contract, similar to DelayedArray::type.
  • Loading branch information
LTLA committed Aug 20, 2023
1 parent 202522f commit 953f5e2
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 11 deletions.
63 changes: 53 additions & 10 deletions src/delayedarray/SparseNdarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ class SparseNdarray:
representing the sparse contents of the corresponding dimension element.
In effect, this is a tree where the non-leaf nodes are lists and the leaf nodes
are tuples. ``index`` and ``value`` should be :py:class:`~typing.Sequence` of equal
length, where ``index`` is integer, sorted, and contains values less than the extent
of the final dimension.
are tuples. ``index`` should be a :py:class:`~typing.Sequence` of integers where
values are strictly increasing and less than the extent of the final dimension.
``value`` may be any :py:class:`~numpy.ndarray` but the ``dtype`` should be
consistent across all ``value``s in the array.
Any entry of any list may also be None, indicating that the corresponding element
of the dimension contains no non-zero values. In fact, the entire tree may be None,
Expand All @@ -46,6 +47,10 @@ class SparseNdarray:
For `1-dimensional` arrays, a tuple containing a sparse vector.
Alternatively None, if the array is empty.
dtype (numpy.dtype, optional):
Type of the array as a NumPy type.
If None, this is inferred from ``contents``.
"""

def __init__(
Expand All @@ -57,17 +62,28 @@ def __init__(
List,
]
],
check=True,
dtype: Optional[numpy.dtype] = None,
check=True
):
self._shape = shape
self._contents = contents

if dtype is None:
if contents is not None:
if len(shape) > 1:
dtype = _peek_for_type(contents, 0, self._shape)
else:
dtype = contents[1].dtype
if dtype is None:
raise ValueError("'dtype' should be provided if 'contents' is None")
self._dtype = dtype

if check is True and contents is not None:
if len(shape) > 1:
_recursive_check(self._contents, 0, self._shape)
_recursive_check(self._contents, 0, self._shape, self._dtype)
else:
_check_sparse_tuple(
self._contents[0], self._contents[1], self._shape[0]
self._contents[0], self._contents[1], self._shape[0], self._dtype
)

@property
Expand All @@ -80,6 +96,15 @@ def shape(self) -> Tuple[int, ...]:
"""
return self._shape

@property
def dtype(self) -> numpy.dtype:
"""Type of the array.
Returns:
numpy.dtype: Type of the NumPy array containing the values of the non-zero elements.
"""
return self._dtype

def __get_item__(self, args: Tuple[Union[slice, Sequence], ...]) -> "SparseNdarray":
"""Extract sparse array by slicing this data array.
Expand All @@ -103,10 +128,28 @@ def __get_item__(self, args: Tuple[Union[slice, Sequence], ...]) -> "SparseNdarr
return _extract_sparse_array_from_SparseNdarray(self, args)


def _check_sparse_tuple(indices: Sequence, values: Sequence, max_index: int):
def _peek_for_type(contents: Sequence, dim: int, shape: Tuple[int, ...]):
ndim = len(shape)
if dim == ndim - 2:
for x in contents:
if x is not None:
return x[1].dtype
else:
for x in contents:
if x is not None:
out = _peek_for_type(x, dim + 1, shape)
if out is not None:
return out
return None


def _check_sparse_tuple(indices: Sequence, values: Sequence, max_index: int, dtype: numpy.dtype):
if len(indices) != len(values):
raise ValueError("Length of index and value vectors should be the same.")

if values.dtype != dtype:
raise ValueError("Inconsistent data types for different value vectors.")

for i in range(len(indices)):
if indices[i] < 0 or indices[i] >= max_index:
raise ValueError("Index vectors out of range for the last dimension.")
Expand All @@ -116,7 +159,7 @@ def _check_sparse_tuple(indices: Sequence, values: Sequence, max_index: int):
raise ValueError("Index vectors should be sorted.")


def _recursive_check(contents: Sequence, dim: int, shape: Tuple[int, ...]):
def _recursive_check(contents: Sequence, dim: int, shape: Tuple[int, ...], dtype: numpy.dtype):
if len(contents) != shape[dim]:
raise ValueError(
"Length of 'contents' or its components should match the extent of the corresponding dimension."
Expand All @@ -126,11 +169,11 @@ def _recursive_check(contents: Sequence, dim: int, shape: Tuple[int, ...]):
if dim == ndim - 2:
for x in contents:
if x is not None:
_check_sparse_tuple(x[0], x[1], shape[ndim - 1])
_check_sparse_tuple(x[0], x[1], shape[ndim - 1], dtype)
else:
for x in contents:
if x is not None:
_recursive_check(x, dim + 1, shape)
_recursive_check(x, dim + 1, shape, dtype)


def _characterize_indices(idx: Sequence):
Expand Down
13 changes: 12 additions & 1 deletion tests/test_SparseNdarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
import delayedarray
import pytest
from utils import *

import numpy

def test_SparseNdarray_check():
test_shape = (10, 15, 20)
contents = mock_SparseNdarray_contents(test_shape)
y = delayedarray.SparseNdarray(test_shape, contents)
assert y.shape == test_shape
assert y.dtype == numpy.float64

with pytest.raises(ValueError, match="match the extent"):
y = delayedarray.SparseNdarray((5, 15, 20), contents)
Expand Down Expand Up @@ -48,6 +49,15 @@ def shorten(con, depth):
with pytest.raises(ValueError, match="should be the same"):
y = delayedarray.SparseNdarray(test_shape, contents2)

with pytest.raises(ValueError, match="Inconsistent data type"):
y = delayedarray.SparseNdarray(test_shape, contents, dtype = numpy.int32)

with pytest.raises(ValueError, match="'dtype' should be provided"):
y = delayedarray.SparseNdarray(test_shape, None)

empty = delayedarray.SparseNdarray(test_shape, None, dtype = numpy.int32)
assert empty.shape == test_shape
assert empty.dtype == numpy.int32

def test_SparseNdarray_extract_dense_array_3d():
test_shape = (16, 32, 8)
Expand Down Expand Up @@ -109,6 +119,7 @@ def test_SparseNdarray_extract_dense_array_1d():
test_shape = (99,)
contents = mock_SparseNdarray_contents(test_shape)
y = delayedarray.SparseNdarray(test_shape, contents)
assert y.dtype == numpy.float64

# Full extraction.
output = delayedarray.extract_dense_array(y, (slice(None),))
Expand Down

0 comments on commit 953f5e2

Please sign in to comment.