diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 3e3ec370..64ee60ba 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.8', '3.9', '3.10' ] + python-version: [ '3.9', '3.10', '3.11', '3.12' ] steps: - uses: actions/checkout@v2 diff --git a/.gitignore b/.gitignore index 01c1c10d..ed15d620 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ cooltools/__pycache__ # Distribution / packaging .Python env/ +.venv/ build/ develop-eggs/ dist/ @@ -106,4 +107,5 @@ tmp.npz tmp.hdf5 cooltools/sandbox/test.mcool -.vscode/ \ No newline at end of file +.vscode/ +.idea/ \ No newline at end of file diff --git a/cooltools/api/snipping.py b/cooltools/api/snipping.py index 3d02df60..dbc2bd79 100644 --- a/cooltools/api/snipping.py +++ b/cooltools/api/snipping.py @@ -4,24 +4,24 @@ The main user-facing function of this module is `pileup`, it performs pileups using snippers and other functions defined in the module. The concept is the following: -- First, the provided features are annotated with the regions from a view (or simply +- First, the provided features are annotated with the regions from a view (or simply whole chromosomes, if no view is provided). They are assigned to the region that contains it, or the one with the largest overlap. -- Then the features are expanded using the `flank` argument, and aligned to the bins +- Then the features are expanded using the `flank` argument, and aligned to the bins of the cooler -- Depending on the requested operation (whether the normalization to expected is +- Depending on the requested operation (whether the normalization to expected is required), the appropriate snipper object is created -- A snipper can `select` a particular region of a genome-wide matrix, meaning it +- A snipper can `select` a particular region of a genome-wide matrix, meaning it stores its sparse representation in memory. This could be whole chromosomes or chromosome arms, for example -- A snipper can `snip` a small area of a selected region, meaning it will extract +- A snipper can `snip` a small area of a selected region, meaning it will extract and return a dense representation of this area -- For each region present, it is first `select`ed, and then all features within it are +- For each region present, it is first `select`ed, and then all features within it are `snip`ped, creating a stack: a 3D array containing all snippets for this region -- For features that are not assigned to any region, an empty snippet is returned -- All per-region stacks are then combined into one, which then can be averaged to create +- For features that are not assigned to any region, an empty snippet is returned +- All per-region stacks are then combined into one, which then can be averaged to create a single pileup -- The order of snippets in the stack matches the order of features, this way the stack +- The order of snippets in the stack matches the order of features, this way the stack can also be used for analysis of any subsets of original features This procedure achieves a good tradeoff between speed and RAM. Extracting each @@ -390,7 +390,8 @@ def select(self, region1, region2): if self.cooler_opts["sparse"]: matrix = matrix.tocsr() if self.min_diag is not None: - diags = np.arange(np.diff(self.clr.extent(region1_coords)), dtype=np.int32) + lo, hi = self.clr.extent(region1_coords) + diags = np.arange(hi - lo, dtype=np.int32) self.diag_indicators[region1] = LazyToeplitz(-diags, diags) return matrix @@ -600,7 +601,8 @@ def select(self, region1, region2): .values ) if self.min_diag is not None: - diags = np.arange(np.diff(self.clr.extent(region1_coords)), dtype=np.int32) + lo, hi = self.clr.extent(region1_coords) + diags = np.arange(hi - lo, dtype=np.int32) self.diag_indicators[region1] = LazyToeplitz(-diags, diags) return matrix @@ -770,7 +772,8 @@ def select(self, region1, region2): .values ) if self.min_diag is not None: - diags = np.arange(np.diff(self.clr.extent(region1_coords)), dtype=np.int32) + lo, hi = self.clr.extent(region1_coords) + diags = np.arange(hi - lo, dtype=np.int32) self.diag_indicators[region1] = LazyToeplitz(-diags, diags) return self._expected @@ -861,7 +864,7 @@ def pileup( map_functor : callable, optional Map function to dispatch the matrix chunks to workers. If left unspecified, pool_decorator applies the following defaults: if nproc>1 this defaults to multiprocess.Pool; - If nproc=1 this defaults the builtin map. + If nproc=1 this defaults the builtin map. Returns ------- @@ -983,5 +986,5 @@ def pileup( stack = _pileup(features_df, snipper.select, snipper.snip, map=map_functor) if feature_type == "bed": stack = np.fmax(stack, np.transpose(stack, axes=(0, 2, 1))) - + return stack diff --git a/requirements.txt b/requirements.txt index be1624b0..5891c6c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ matplotlib multiprocess numba numpy -pandas>=1.5.1,<2 +pandas>=1.5.1 scikit-learn>=1.1.2 scipy scikit-image diff --git a/tests/test_checks.py b/tests/test_checks.py index ccd0eee9..65ffa8ac 100644 --- a/tests/test_checks.py +++ b/tests/test_checks.py @@ -235,13 +235,13 @@ def test_is_track(): assert cooltools.lib.is_track(track) track_incompat = bioframe.sort_bedframe(track.copy()) - track_incompat.iloc[:, 0] = 10 + track_incompat["chrom"] = 10 # not bedframe in first three columns assert cooltools.lib.is_track(track_incompat) is False track_incompat = track.copy() - track_incompat.iloc[:, -1] = ["a", "b", "c", "d"] + track_incompat["value"] = ["a", "b", "c", "d"] # not numeric type in column4 assert cooltools.lib.is_track(track_incompat) is False