euxhenh · euxhenh · Nov 25, 2023 · Nov 25, 2023 · Nov 25, 2023
diff --git a/.gitignore b/.gitignore
@@ -17,3 +17,4 @@ figs
 docs/_build
 test-results
 docs
+!tests/adatas/*
diff --git a/src/grinch/pipeline.py b/src/grinch/pipeline.py
@@ -33,7 +33,7 @@ def read(filepath: FilePath) -> AnnData:
         """Reads AnnData from filepath"""
         if filepath.suffix == '.h5':
             return sc.read_10x_h5(filepath)
-        return anndata.read(filepath)
+        return anndata.read_h5ad(filepath)
 
 
 class MultiRead(BaseConfigurable, ReadMixin):

diff --git a/src/grinch/processors/de.py b/src/grinch/processors/de.py
@@ -335,7 +335,7 @@ def _single_test(self, pmv: PartMeanVar, label, *, x, y, m2) -> pd.DataFrame:
 
         pvals, qvals = self.get_pqvals(pvals)
         m1 = pmv.compute([label], ddof=1)[1]  # take label
-        m2 = m2 or pmv.compute([label], ddof=1, exclude=True)[1]  # all but label
+        m2 = m2 if m2 is not None else pmv.compute([label], ddof=1, exclude=True)[1]  # all - label
         log2fc = self.get_log2fc(m1, m2)
 
         return pd.DataFrame(data=dict(

diff --git a/src/grinch/processors/indexer.py b/src/grinch/processors/indexer.py
@@ -91,5 +91,8 @@ def _process_mask(self, adata: AnnData, mask: NP1D_bool) -> None:
         # passing a view
         key = ['obs_indices', 'var_indices'][int(self.cfg.axis)]
         kwargs = {key: mask}
-        logger.info(f"Running '{self.processor.__class__.__name__}'.")
+        logger.info(
+            f"Running '{self.processor.__class__.__name__}' "
+            f"on {mask.sum()} / {mask.size} points."
+        )
         self.processor(adata, **kwargs)
diff --git a/src/grinch/processors/tools.py b/src/grinch/processors/tools.py
@@ -47,7 +47,7 @@
                 gene_names.append(gene_id)
                 not_found += 1
 
-        logger.info(f"Could not convert {not_found} gene IDs.")
+        logger.warning(f"Could not convert {not_found} gene IDs.")
         self.store_item(self.cfg.save_key, np.asarray(gene_names))
         self.store_item(self.cfg.stats_key, not_found)
 

diff --git a/tests/adatas/a1.h5ad b/tests/adatas/a1.h5ad
diff --git a/tests/adatas/a2.h5ad b/tests/adatas/a2.h5ad
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
@@ -1,3 +1,5 @@
+import os
+
 import numpy as np
 import pytest
 from anndata import AnnData
@@ -6,7 +8,7 @@
 
 from grinch import OBS, OBSM, DataSplitter
 
-from ._utils import to_view, assert_allclose
+from ._utils import assert_allclose, to_view
 
 X = np.array([
     [2, 2, 0, 0, 0],
@@ -17,7 +19,7 @@
     [0, 1, 5, 3, 1],
 ], dtype=np.float32)
 
-X_mods = [X, to_view(X)]
+X_mods = [X, to_view(X), ]
 
 
 @pytest.mark.parametrize("X", X_mods)
@@ -72,3 +74,25 @@ def test_pipeline_end_to_end_single_dataset(X):
     assert_allclose(train.obs[OBS.KMEANS], [0, 1])
     assert_allclose(train.obs[OBS.LOG_REG], [0, 1])
     assert_allclose(val.obs[OBS.LOG_REG], [0, 1, 1])
+
+
+def test_multi_read():
+    curdir = os.path.dirname(os.path.realpath(__file__))
+    multiread_cfg = OmegaConf.create({
+        "_target_": "src.grinch.MultiRead.Config",
+        "paths": {
+            "a1": os.path.join(curdir, 'adatas', 'a1.h5ad'),
+            "a2": os.path.join(curdir, 'adatas', 'a2.h5ad'),
+        },
+        "id_key": None,
+    })
+
+    cfg = OmegaConf.create({
+        "_target_": "src.grinch.GRPipeline.Config",
+        "data_readpath": multiread_cfg,
+        "processors": [],
+    })
+
+    cfg = instantiate(cfg, _convert_='all')
+    obj = cfg.create()
+    obj()  # empty run