From 3d9d585ae464ca343fcf73d07fa56ab051239aa3 Mon Sep 17 00:00:00 2001 From: euxhenh Date: Sat, 25 Nov 2023 15:34:27 -0500 Subject: [PATCH 1/2] Fixed minor corner cases --- src/grinch/pipeline.py | 2 +- src/grinch/processors/de.py | 2 +- src/grinch/processors/indexer.py | 5 ++++- src/grinch/processors/tools.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/grinch/pipeline.py b/src/grinch/pipeline.py index a9be510..a1c9660 100644 --- a/src/grinch/pipeline.py +++ b/src/grinch/pipeline.py @@ -33,7 +33,7 @@ def read(filepath: FilePath) -> AnnData: """Reads AnnData from filepath""" if filepath.suffix == '.h5': return sc.read_10x_h5(filepath) - return anndata.read(filepath) + return anndata.read_h5ad(filepath) class MultiRead(BaseConfigurable, ReadMixin): diff --git a/src/grinch/processors/de.py b/src/grinch/processors/de.py index 05d3dd4..20a9601 100644 --- a/src/grinch/processors/de.py +++ b/src/grinch/processors/de.py @@ -335,7 +335,7 @@ def _single_test(self, pmv: PartMeanVar, label, *, x, y, m2) -> pd.DataFrame: pvals, qvals = self.get_pqvals(pvals) m1 = pmv.compute([label], ddof=1)[1] # take label - m2 = m2 or pmv.compute([label], ddof=1, exclude=True)[1] # all but label + m2 = m2 if m2 is not None else pmv.compute([label], ddof=1, exclude=True)[1] # all - label log2fc = self.get_log2fc(m1, m2) return pd.DataFrame(data=dict( diff --git a/src/grinch/processors/indexer.py b/src/grinch/processors/indexer.py index 3afc229..dabebfc 100644 --- a/src/grinch/processors/indexer.py +++ b/src/grinch/processors/indexer.py @@ -91,5 +91,8 @@ def _process_mask(self, adata: AnnData, mask: NP1D_bool) -> None: # passing a view key = ['obs_indices', 'var_indices'][int(self.cfg.axis)] kwargs = {key: mask} - logger.info(f"Running '{self.processor.__class__.__name__}'.") + logger.info( + f"Running '{self.processor.__class__.__name__}' " + f"on {mask.sum()} / {mask.size} points." + ) self.processor(adata, **kwargs) diff --git a/src/grinch/processors/tools.py b/src/grinch/processors/tools.py index f70ce1f..ab1adbe 100644 --- a/src/grinch/processors/tools.py +++ b/src/grinch/processors/tools.py @@ -47,7 +47,7 @@ def _process(self, adata: AnnData) -> None: gene_names.append(gene_id) not_found += 1 - logger.info(f"Could not convert {not_found} gene IDs.") + logger.warning(f"Could not convert {not_found} gene IDs.") self.store_item(self.cfg.save_key, np.asarray(gene_names)) self.store_item(self.cfg.stats_key, not_found) From 47519e7effe3e186b178ba8f06e26fa9c3858086 Mon Sep 17 00:00:00 2001 From: euxhenh Date: Sat, 25 Nov 2023 15:54:04 -0500 Subject: [PATCH 2/2] Adding coverage tests for multiread --- .gitignore | 1 + tests/adatas/a1.h5ad | Bin 0 -> 24064 bytes tests/adatas/a2.h5ad | Bin 0 -> 24064 bytes tests/test_pipeline.py | 28 ++++++++++++++++++++++++++-- 4 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 tests/adatas/a1.h5ad create mode 100644 tests/adatas/a2.h5ad diff --git a/.gitignore b/.gitignore index 7360d4b..d43b6cb 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ figs docs/_build test-results docs +!tests/adatas/* diff --git a/tests/adatas/a1.h5ad b/tests/adatas/a1.h5ad new file mode 100644 index 0000000000000000000000000000000000000000..238869a24bbb98ce84ad88ccf35cd830a61b524a GIT binary patch literal 24064 zcmeHPdpK2D8{fx0_oQ)2ZbNPnrBIko$jK#yD7hRQI^~v-WXhM3OU;zW7)=tQl1vm) zW2O!o%*b6SnIRO_B*ljkzO&C>OSS7f%{O)CnP&F>e|-7e=ri zk&xN1+l!!uZl?zaVMo2&F7GRL(GVmdf>`??KuL%?%M^(V1F*nHm~CPkz6|5lkPoYm z1bR^LGCP4<;^M}d5keGMJIwC0HWI6zg!t0@0;pj$WEMD30L%#7%mcyPfHsYu28)&V z<@Z${z*NI(*>SC%IXWi+>XGhOE5W(FDprDmfHSrOx`pV1vb_t2qvMQjMSv%$XQ+wc zHvz7Q6%heTYzOM;g0lS@pzt>r=%}IRK3nlKz~xFrUkK5k27GeDpV3PIi5WR)yZGyg zJu9GI&JS?Da0NWV@fio>0AR$E9^^}7IyD>z3at2(awc93Far!==cPZ3Fr8c)@d*wH z4-C={4)vvlqAKioB*8!ecAyYORr`G z)Sw_=Y8aJyWyUsETrqcbb@X&}nG%K%#Hpd7)SXNmT&@v_>*_3K>BBM{26*1E4BKCd zHHXcU%zcP212~Ldhy%K3<`;+?0)05ILEH%7LO?=%2`CHWaD7k~!QqQRSrmusfwCA5 zM>#_`=#Q|S?O~zl)dSMUyb53&v$H9Ql!{cDUDm!X#kv^pMlHM z8*!=yj$`L%3qd{JKeYi4f!CIBq+o6Ih(@?m|%clfM9@NfM9@NfM9@N;3H!ovP0zS zt%hxhT=8>ee`hUq$y{@t?fIoA6GqOs-~Oq^;wR0cIVEe->jr%j0)CL@nex|>O?5HPS&>}f2bFw^WsmY(e=6xId{n5$b?|}1 zKy)?PQKKfk>BEJXb+`{gV^-$Tm0K zrg~TPT9Yn6Y?{1#X&)m`C8vhg?x=D*)9Qzfg%w&W9s~rADMrgTOz=clJ#nj?)qC*P z7T<`JgL51-DL-506b!5>3pyor)H0-EY2GtXFclUnHwQce7MxD*3v&mge&8 zIOFH5fBvtAiE-&k=W7vTIk!vmZZGqUNxFLbpg~t!_JqTM^2tD*h{@(%THRTaQ@rKw z^X{ieKaF%Q9zQAD%G1?YnZyv44c@Uk^^?8gUGp!KW5p)~+C}|GDi&W^R@t+UOQ}D9 zQ)Qpk(Xsj7f*OJ^c2`_98Xx2tjc)1KP#Z0n(iY&*5G8W6v{0d8lU%%?XYN#3o`i2s z@BlfLTRLiksZvNwg=W+V%K?#DPXjhLliI(|tGg@7YpyY5_-Odj`VRGoZ=0pkE+;D~ zkCfXP#+xfdep{(?nxE!Tbt3(KlvAhTiMZ&__~4WaH9b{o-8at}7*|xsTpt|lYE`XL zFKP|jt$#LNNBH+{DPw()CnmX9zC$dPwt8>hpl)~J$0DWJeFaA0`S*8gm!{7>=Iy_w z>1kX_nb7cTo%*(2A)T|F7LBxsJ-=~>5+X&1#1gIw}vV zd5u+(lt;`OkDfn0uv0>e|7qsu_j(pp-_yyy(3Q)VJ23D|YmIr9T6+@Jorm%KRq+1{MzOGg^phwR=74; zLr%IyxxAx$&otvkoo8c7a^DPXf|48S#as5X_lj~;D1dX+im%*@A>x@DHf})EzTt!DV7-U zm-Km*Pc@8i{7PbMME$tK1zNqJzVI=d;ueX#Q+Ao#0@s-x`)zb0IdgT93w_&}smt;U zOyW=7imkg`F%jkp{n`F`Qg@0s-t~}+N-6EtMmJn z+jP!-@9j0F(o(-nJ^Ooz!{Rrvr~P!cJvSMFYdllH|>+w=LCB{H*hShs@#w0^5uXw64a~rcrH18yw7yLuAx*1;ppel`HwBh`RUv=yLlT z4Ts}e0p4`s$e`uTE0e#h{5(_C%xyw^OIr&y=bM;R$E7|GPOqxBuFEl5Gso3($F3$j zWrm$kU#eky_6slHkcdz{Wj=4Y^MWFT^>y>E=XLeE(MtwH z+_TBkH}=F0(L*EU@1EEwd!|TZ?OnC-b=@7Vu|93_(-XVo{a#esE4AGZi)cxZnC90% zy?ivX!^aol66MqF=J#H$VyYmUDfMOJm(~5`-s0>hx0=W0Hb<}3H!@rp&p&+ELyZ5kdd*DatH zxlhWlir3?gVDNMPSrOdupW2V?Li&-OE|#zP=5r(1oz38nVB$FtWaQr^vAjQKpFyGp z31n=+d?xt35K5R&9{qqdDia-tm+9XWBL0;x%{kc3&R$c{ga?~iy|LO(_1Fm4$ zvAjc+6>M{b2^64Ah(E>vyXVy~zXWf^Mg4UI`rC{Hyk1R|2nGlS2nGlS2nGlS{s9bd z;yna3Hygeegnz%|eeWk(`L^ID5zY1H#QCE+;_Tj|xVRh}^dd|k?^@?Y8e@v^z7`xt zh<^zK?B28S0)7N(fbSc?yn70)_>&T@vnZ=E0}Np2W&Hhp-m^h-V&1hLhIunk{srq{ zh6%4J%HmhWPvrtZautt1y)b|{(lUxTMvVrK zID!BO1_%ZS1_%ZS1_%Z|b_O`Hp44QL0@uTe;Kj$4;Qs%8?1rdHsTCSfb}r)M!r0#Cd9v-0sMLxJ{JiAegx@(>tK5byr+cmhyRYq`^F#Zy=Kq} z^e=r*#2@5ehRzyuM0fCoanodI_7 zH?;9}{SO^~&^r8oJJ@?W3yPqIvLv${i0&}UEXgc~fHF%4PSnX?n)^%3@Ksxc_=?W4{d}* zmr+@oyR^h+p98N2*nvgfq=5+iad$GL4h`9LZSP9AX>TRdE)^I%?acHx3} z2)HRpQktX50s09c9XJLY{^f?gYINK6Oos!P9+ZO?<L*Fl6Q{-$ly%TQ8)9Li}Q&)N!o8&K0@V-jXd ztYhQCBjRWmE*=FaBq#?>cglk~r1_xdAMD4g<{hT{F!*VWj2;;-;*h}!{99&p;PGu?Ljr$r3a9c=uZIC>VxZnWXpZtRCP_+x!ENs)2l*%5&2 zfMw@<3vlc&_8-S5@@(fw9QDAz4Ie^)9&8-OTTeFbM1Wpw+?fFSEZiQ5d7F&`k-gbC z5RZDqcG|-QG-Uh31vFyg&|hOV4*l)J#-YD`**NqU?`Li8f&Q9+KaLM@r%YKmxKsUE z<=X*2W^5ez8NkMYpMh)~_!-2;fgf|gv7I>XEm%0v26y%L@eQ<5@38!VHVSS_ZSx-L z=UtXR>d=aXqpqx3IO@sq==f_@vk-pH6ofcVgK%pUArO z&<_HFs~c~eH!Rvdx%zzWkqKw_oEkTz`$kz}%&nsA(kDUwy}o?>M9ZXTO2x3Etr?j= z|GpXNe>TAc-CLVd%0omP9H~Ejp@^nnhR;!3%&{u$kZKRw*SX6 zH=Q&yJLfGm%iqq;vwE=B=%BA-!U9!6>e%`BT)TB0Qt-~R;wIAr`OhxfyZfdaMArV6 z-PpUbZqjjg-R<@UWz+XH#lza#6#iA>J8V9t-;Y(qa*Kqu0c~2kjT#H8^nUYTBak z)$1keH+9qKQ}EH=;l6hD``11R+~K{)bVOOmXWz(E1a&>ThWA;OpK!BBlu1V3q5n?R zUa)drpn3JNhdaO9IcU$0s1oOeGlOjxU%W4yJ*>)SrC02Jk-lZ7i{(1M8{gK>{C1LV z@kFhH*x?^+w|?3he8TqU@1s0+oD7g>HBOflUb`g8RnZChR66}b{ZSd$-9DQs`pMUz zhq-gKFeB^G?>7bt7GF%gm)&gO+v8gwD+9}Q$@)j@N-~}16)rjQMPYB7$&b!nb-P-2 zKF8HcdR6tJe`U#vTSKYDm2T_Pd*tuC;x{A4q^v2u)V}^gyzGAdtRZs?bF7Yp*N?iR zS0C%}yDW6|(}P;`UCUR69jeKHH*If)WsUpYn|n*cES8@-x_MhgLr`g}{{ykt(tz~c z5z}_*dY@`E)tePy8@TW1V_ye2KMocfEL%A)t7)uu^CzEgvYdNYZ@;V9^R_Upm+@!P z-}efB&YPjGcXd})SpN8PZ=W2o=5Sv82AxG;oOZPT$Xlagv!C@3r}ud3xHe{#-L-yV zouYlDE?j@TNqxgpjim!N?r`dz+jq*A3#;_)k9+JsYNUUA zklTZ2{k&4|x6IgJ>@n1GZh@9*Sf#G@u=0DJU9(zT^5YCv7(Qu!I&iT@>(T<}v*$Fj zK1giIH+NjOyz-t-m37qamTzL*4);BHGGSG4jrz9CQGOHeXo!uaH`mSeHYt3ToO9); zbdNI8Oq-&K8CTQ$8bzk}`FhRHlH>D?tJXK&nc9%(SfA{BYW51z#iXtM9&E@vQN zJ4JW#SMrqPdxrYoSw(jV3hv@R(d+oK?{f#Zm;BgNx=%w=v!OVvTk+t@kB)6Tu|ze* z%4pY+g=ZHX{`{d;iMN{PI^!!FgZ?8uo2XuJc>Bomi`Tb(I6U4iZ*@V6?YV8Y?3av8 zs2Z^~w02v|q$RD?EYqmaq)XGoDHR=cVXexvQ3Hor7;RNubvm_ThQ>5XXD~G}>C)8W zLvxkf2(=v{G>Cgrw6!w z{oWDzyD`@^S6@@}zt%lbs?%HAHCU=WRH`+%MMHi^t?)IQ!};d&4E$XNNWA8s-y30h zQSgsp{yh*f@@82Q_owJH1TY97VhiJ$@O>d#j3?uYU+Q?Ap^6M*66h**@oKckW&OQ4 zPA0wq200x|gz;as5HJt|6_L0@nicX3!GH=#^X9KH!1cZw<9gVo{D^@xa016$+W=r^ z;L|(0z99Dgj*dQ0GI6i)kVwavE2I5%Y&zFIied;v zmBE#N)jBT;0#Wg~7P9f?4Pk(5pG_DBGy@N~&jI7vMIGb^*IA-bq#!wz*RH>R&psP^ z&&R9Q!x%S%_8(Xe%OO^ruCi@8)XrbW*TeLB8?RG?H@QGu>)|L2bSG~7-@nKJ*LrvY zNB(BkdKkxB+W_Fm$HP1WJOexfJOexfJOi&U1Ik!W(t9~^J!}daA3fmtzoXBS&##At zih7jMetJ(S*LP|v$B=6+Cx#;HVd_uu@^G3rZ#V<&^)S8{k-|7adT<>ah(HG|>W}>$ zk&fz**#-igp!ZlSLx1!hZ7%(Z{0K2}VtD-l54?Ff16=xR82h5Xvh*kOA@!Vr`Z`m8 z^fwEXp+EYY2VDBAgb7Mcy2a}cc;L;;8Q{{Nly#$1mi~nPq+NkfUuWu%4(U;b{^)Or zaOqDJMEWBqhSwkPz?+vdz@@*2@h|!-OMkNWNj+zvzRuJiy|h$@{^)P=aOtlynDj?Z z46i@nfj2K_fJ=W;7)O{(S^8stm$EbUN59fhhW_Y({c-8fV=7@xP7JR<;DI+UXMjt8 z4U=E=?`-{H>G=Qc*lF<$lqim4tRg*|K2W3?t4K>B&Da=mvf@L(6qO($`LKOqw38Bk zBp$sBC4{my;kg;>Wj{w_yha*y5dCD3J_Gz3KzYsZVLbF>zZV`1erd2?MnCrY*$MsN gxuVUF^_&52|G2^TZfqVpbKa#xtdueD((xaE08nJ--v9sr literal 0 HcmV?d00001 diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 0b580af..a2af5da 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,3 +1,5 @@ +import os + import numpy as np import pytest from anndata import AnnData @@ -6,7 +8,7 @@ from grinch import OBS, OBSM, DataSplitter -from ._utils import to_view, assert_allclose +from ._utils import assert_allclose, to_view X = np.array([ [2, 2, 0, 0, 0], @@ -17,7 +19,7 @@ [0, 1, 5, 3, 1], ], dtype=np.float32) -X_mods = [X, to_view(X)] +X_mods = [X, to_view(X), ] @pytest.mark.parametrize("X", X_mods) @@ -72,3 +74,25 @@ def test_pipeline_end_to_end_single_dataset(X): assert_allclose(train.obs[OBS.KMEANS], [0, 1]) assert_allclose(train.obs[OBS.LOG_REG], [0, 1]) assert_allclose(val.obs[OBS.LOG_REG], [0, 1, 1]) + + +def test_multi_read(): + curdir = os.path.dirname(os.path.realpath(__file__)) + multiread_cfg = OmegaConf.create({ + "_target_": "src.grinch.MultiRead.Config", + "paths": { + "a1": os.path.join(curdir, 'adatas', 'a1.h5ad'), + "a2": os.path.join(curdir, 'adatas', 'a2.h5ad'), + }, + "id_key": None, + }) + + cfg = OmegaConf.create({ + "_target_": "src.grinch.GRPipeline.Config", + "data_readpath": multiread_cfg, + "processors": [], + }) + + cfg = instantiate(cfg, _convert_='all') + obj = cfg.create() + obj() # empty run