Skip to content

Commit

Permalink
Cleaned up the internal subsetting utilities.
Browse files Browse the repository at this point in the history
This reduces the support for Nones in the subset argument,
requiring no-op subsets to be specified as ranges instead.
  • Loading branch information
LTLA committed Feb 1, 2024
1 parent 3117799 commit 3c2381e
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 37 deletions.
23 changes: 15 additions & 8 deletions src/delayedarray/Grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import bisect
import abc

from ._subset import _is_single_subset_noop


class AbstractGrid(abc.ABC):
"""
Expand Down Expand Up @@ -39,7 +41,7 @@ def transpose(self, perm: Tuple[int, ...]) -> "AbstractGrid":


@abc.abstractmethod
def subset(self, subset: Tuple[Optional[Sequence[int]], ...]) -> "AbstractGrid":
def subset(self, subset: Tuple[Sequence[int], ...]) -> "AbstractGrid":
pass


Expand Down Expand Up @@ -169,7 +171,7 @@ def transpose(self, perm: Tuple[int, ...]) -> "SimpleGrid":
)


def subset(self, subset: Tuple[Optional[Sequence[int]], ...]) -> "SimpleGrid":
def subset(self, subset: Tuple[Sequence[int], ...]) -> "SimpleGrid":
"""
Subset a grid to reflect the same operation on the associated array.
For any given dimension, consecutive elements in the subset are only
Expand All @@ -195,7 +197,7 @@ def subset(self, subset: Tuple[Optional[Sequence[int]], ...]) -> "SimpleGrid":
new_maxgap = []
for i, bounds in enumerate(self._boundaries):
cursub = subset[i]
if cursub is None:
if _is_single_subset_noop(self._shape[i], cursub):
new_boundaries.append(bounds)
new_shape.append(self._shape[i])
new_maxgap.append(self._maxgap[i])
Expand Down Expand Up @@ -504,7 +506,7 @@ def transpose(self, perm: Tuple[int, ...]) -> "CompositeGrid":
)


def subset(self, subset: Tuple[Optional[Sequence[int]], ...]) -> "CompositeGrid":
def subset(self, subset: Tuple[Sequence[int], ...]) -> "CompositeGrid":
"""
Subset a grid to reflect the same operation on the associated array.
This splits up the subset sequence for the ``along`` dimension and
Expand All @@ -515,14 +517,19 @@ def subset(self, subset: Tuple[Optional[Sequence[int]], ...]) -> "CompositeGrid"
Tuple of length equal to the number of grid dimensions. Each
entry should be a (possibly unsorted) sequence of integers,
specifying the subset to apply to each dimension of the grid.
Alternatively, an entry may be None if no subsetting is to be
applied to the corresponding dimension.
Returns:
A new ``CompositeGrid`` object.
"""
if subset[self._along] is None:
new_components = [grid.subset(subset) for grid in self._components]
if len(subset) != len(self._shape):
raise ValueError("'shape' and 'subset' should have the same length")

if _is_single_subset_noop(self._shape[self._along], subset[self._along]):
new_components = []
new_subset = list(subset)
for grid in self._components:
new_subset[self._along] = range(grid.shape[self._along])
new_components.append(grid.subset((*new_subset,)))
return CompositeGrid(new_components, self._along)

component_limits = []
Expand Down
5 changes: 1 addition & 4 deletions src/delayedarray/Subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from .DelayedOp import DelayedOp
from .SparseNdarray import SparseNdarray
from ._subset import _spawn_indices, _sanitize_subset
from ._subset import _sanitize_subset
from .extract_dense_array import extract_dense_array
from .extract_sparse_array import extract_sparse_array
from .create_dask_array import create_dask_array
Expand Down Expand Up @@ -88,9 +88,6 @@ def subset(self) -> Tuple[Sequence[int], ...]:


def _extract_array(x: Subset, subset: Tuple[Sequence[int], ...], f: Callable):
if subset is None:
subset = _spawn_indices(x.shape)

newsub = list(subset)
expanded = []
is_safe = 0
Expand Down
4 changes: 0 additions & 4 deletions src/delayedarray/Transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from .DelayedOp import DelayedOp
from .SparseNdarray import SparseNdarray
from ._subset import _spawn_indices
from .extract_dense_array import extract_dense_array
from .extract_sparse_array import extract_sparse_array
from .create_dask_array import create_dask_array
Expand Down Expand Up @@ -96,9 +95,6 @@ def perm(self) -> Tuple[int, ...]:


def _extract_array(x: Transpose, subset: Tuple[Sequence[int], ...], f: Callable):
if subset is None:
subset = _spawn_indices(x.shape)

permsub = [None] * len(subset)
for i, j in enumerate(x._perm):
permsub[j] = subset[i]
Expand Down
3 changes: 0 additions & 3 deletions src/delayedarray/UnaryIsometricOpWithArgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from .DelayedOp import DelayedOp
from .SparseNdarray import SparseNdarray
from ._isometric import ISOMETRIC_OP_WITH_ARGS, _execute, _infer_along_with_args
from ._subset import _spawn_indices
from .extract_dense_array import extract_dense_array
from .extract_sparse_array import extract_sparse_array
from .create_dask_array import create_dask_array
Expand Down Expand Up @@ -149,8 +148,6 @@ def _extract_array(x: UnaryIsometricOpWithArgs, subset: Tuple[Sequence[int], ...

subvalue = x._value
if isinstance(subvalue, ndarray) and not subvalue is numpy.ma.masked:
if subset is None:
subset = _spawn_indices(x.shape)
if len(subvalue.shape) == 1:
subvalue = subvalue[subset[-1]]
else:
Expand Down
40 changes: 25 additions & 15 deletions src/delayedarray/_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,41 @@ def _spawn_indices(shape: Tuple[int, ...]) -> Tuple[Sequence[int], ...]:


def _is_subset_consecutive(subset: Sequence):
if isinstance(subset, range):
return subset.step == 1
for s in range(1, len(subset)):
if subset[s] != subset[s-1]+1:
return False
return True


def _is_subset_noop(shape: Tuple[int, ...], subset: Tuple[Sequence, ...]):
if subset is not None:
for i, s in enumerate(shape):
cursub = subset[i]
if len(cursub) != s:
return False
for j in range(s):
if cursub[j] != j:
return False
def _is_single_subset_noop(extent: int, subset: Sequence[int]) -> bool:
if isinstance(subset, range):
return subset == range(extent)
if len(subset) != extent:
return False
for i, s in enumerate(subset):
if s != i:
return False
return True


def _sanitize_subset(subset: Sequence):
okay = True
for i in range(1, len(subset)):
if subset[i] <= subset[i - 1]:
okay = False
break
def _is_subset_noop(shape: Tuple[int, ...], subset: Tuple[Sequence, ...]) -> bool:
for i, s in enumerate(shape):
if not _is_single_subset_noop(s, subset[i]):
return False
return True


def _sanitize_subset(subset: Sequence):
if isinstance(subset, range):
okay = (subset.step > 0)
else:
okay = True
for i in range(1, len(subset)):
if subset[i] <= subset[i - 1]:
okay = False
break
if okay:
return subset, None

Expand Down
2 changes: 1 addition & 1 deletion src/delayedarray/extract_sparse_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Any, Tuple, Sequence, List, Union
from biocutils.package_utils import is_package_installed

from ._subset import _spawn_indices, _is_subset_noop, _is_subset_consecutive
from ._subset import _is_subset_noop, _is_subset_consecutive
from ._mask import _convert_to_unmasked_1darray, _convert_to_maybe_masked_1darray, _allocate_unmasked_ndarray, _allocate_maybe_masked_ndarray
from .SparseNdarray import SparseNdarray, _extract_sparse_array_from_SparseNdarray

Expand Down
4 changes: 2 additions & 2 deletions tests/test_Grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def test_SimpleGrid_subset():
grid = delayedarray.SimpleGrid((range(10, 51, 10), range(2, 21, 3)), cost_factor=1)

# No-op subsetting.
subgrid = grid.subset((None, None))
subgrid = grid.subset((*(range(s) for s in grid.shape),))
assert subgrid.shape == grid.shape
assert subgrid.boundaries == grid.boundaries
assert subgrid.cost == grid.cost
Expand Down Expand Up @@ -267,7 +267,7 @@ def test_CompositeGrid_subset():
combined = delayedarray.CompositeGrid([grid1, grid2], along=0)

# No-op subsetting.
subcombined = combined.subset((None, None))
subcombined = combined.subset((*(range(s) for s in combined.shape),))
assert combined.shape == subcombined.shape
assert subcombined.boundaries == combined.boundaries
assert subcombined.cost == combined.cost
Expand Down

0 comments on commit 3c2381e

Please sign in to comment.