From 3db321fd5c6db2601bb231420e44c761bb1a4140 Mon Sep 17 00:00:00 2001 From: PGmajev <46708795+PGmajev@users.noreply.github.com> Date: Tue, 29 Mar 2022 15:32:43 +0200 Subject: [PATCH] n_pcs are no longer ignored for representations other than "X_pca" (#2179) * n_pcs are no longer ignored for non (scanpy-internal) X_pca reductions This change allows to only take the dimensions needed also for non X_pca representations. This can be useful when using harmony for example or storing different PCA embeddings in the same object. Then it is not needed to first store them as X_pca to be able to fully use the n_pcs parameter. It might make sense then to adapt the documentation as such as well. * Added error if n_pcs > dimensions in representation * Fixed typo in variable name * Fixed styling * using consistent quotation marks; all done from my side * first idea for test; check in detail tomorrow * Used wrong quotes before * shortened lines * Improved style * improved style in new function * trying to finish the test today after all * Removing wrong toarray() * Test changes as per pre-commit * Function changes as per pre-commit * Another try at implementing pre-commit changes * Added release note * Attempt to fix reference error Co-authored-by: pmajev --- docs/release-notes/1.9.0.md | 1 + scanpy/tests/test_pca.py | 17 +++++++++++++++++ scanpy/tools/_utils.py | 12 +++++++++--- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/docs/release-notes/1.9.0.md b/docs/release-notes/1.9.0.md index 8322e3b10c..8f0370e9ce 100644 --- a/docs/release-notes/1.9.0.md +++ b/docs/release-notes/1.9.0.md @@ -6,3 +6,4 @@ - {func}`~scanpy.tl.filter_rank_genes_groups` now allows to filter with absolute values of log fold change {pr}`1649` {smaller}`S Rybakov` - {func}`~scanpy.pl.embedding_density` now allows more than 10 groups {pr}`1936` {smaller}`A Wolf` - {func}`~scanpy.logging.print_versions` now uses `session_info` {pr}`2089` {smaller}`P Angerer` {smaller}`I Virshup` +- `_choose_representation` now subsets the provided representation to n_pcs, regardless of the name of the provided representation (should affect mostly {func}`~scanpy.pp.neighbors`) {pr}`2179` {smaller}`I Virshup` {smaller}`PG Majev` diff --git a/scanpy/tests/test_pca.py b/scanpy/tests/test_pca.py index 7d7837a9a0..7d90d9bf20 100644 --- a/scanpy/tests/test_pca.py +++ b/scanpy/tests/test_pca.py @@ -136,3 +136,20 @@ def test_pca_chunked(pbmc3k_normalized): np.abs(chunked.uns["pca"]["variance_ratio"]), np.abs(default.uns["pca"]["variance_ratio"]), ) + + +def test_pca_n_pcs(pbmc3k_normalized): + """ + Tests that the n_pcs parameter also works for + representations not called "X_pca" + """ + pbmc = pbmc3k_normalized + sc.pp.pca(pbmc, dtype=np.float64) + pbmc.obsm["X_pca_test"] = pbmc.obsm["X_pca"] + original = sc.pp.neighbors(pbmc, n_pcs=5, use_rep="X_pca", copy=True) + renamed = sc.pp.neighbors(pbmc, n_pcs=5, use_rep="X_pca_test", copy=True) + + assert np.allclose(original.obsm["X_pca"], renamed.obsm["X_pca_test"]) + assert np.allclose( + original.obsp["distances"].toarray(), renamed.obsp["distances"].toarray() + ) diff --git a/scanpy/tools/_utils.py b/scanpy/tools/_utils.py index 856a3f1b45..6e548cb435 100644 --- a/scanpy/tools/_utils.py +++ b/scanpy/tools/_utils.py @@ -47,10 +47,16 @@ def _choose_representation(adata, use_rep=None, n_pcs=None, silent=False): logg.info(' using data matrix X directly') X = adata.X else: - if use_rep in adata.obsm.keys(): + if use_rep in adata.obsm.keys() and n_pcs is not None: + if n_pcs > adata.obsm[use_rep].shape[1]: + raise ValueError( + f'{use_rep} does not have enough Dimensions. Provide a ' + 'Representation with equal or more dimensions than' + '`n_pcs` or lower `n_pcs` ' + ) + X = adata.obsm[use_rep][:, :n_pcs] + elif use_rep in adata.obsm.keys() and n_pcs is None: X = adata.obsm[use_rep] - if use_rep == 'X_pca' and n_pcs is not None: - X = adata.obsm[use_rep][:, :n_pcs] elif use_rep == 'X': X = adata.X else: