Skip to content

Commit

Permalink
Adding compression, better logfc (#111)
Browse files Browse the repository at this point in the history
  • Loading branch information
euxhenh authored Nov 29, 2023
1 parent a35fc3e commit 4307d75
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/grinch/cond_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def _take_k_functional(arr, k: NonNegativeInt, as_mask: bool, top: bool):
"""
if k > (n := len(arr)):
logger.warning(f"Requested {k} items but array has size {n}.")
k = n

idx = np.argpartition(arr, -k if top else k) # linear time
idx = idx[-k:] if top else idx[:k]
Expand Down
4 changes: 3 additions & 1 deletion src/grinch/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ class Config(BaseConfigurable.Config):
processors: List[BaseConfigurable.Config]
verbose: bool = Field(True, exclude=True)
write_key: str = "pipeline"
compression: str | int | None = None
# It may be desirable to write only the columns of adata without
# the data matrix so save memory. In that case, set no_data_write
# to True. This will replace the data matrix with a sparse matrix
Expand Down Expand Up @@ -165,7 +166,8 @@ def __call__(self, adata: AnnData | None = None, **kwargs) -> DataSplitter:
if self.cfg.data_writepath is not None:
logger.info(f"Writting AnnData at '{self.cfg.data_writepath}'...")
ds.write_h5ad(str(self.cfg.data_writepath),
no_data_write=self.cfg.no_data_write)
no_data_write=self.cfg.no_data_write,
compression=self.cfg.compression)
return ds

def _apply(self, ds: DataSplitter, processor: BaseConfigurable) -> None:
Expand Down
6 changes: 3 additions & 3 deletions src/grinch/processors/splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ class DataSplitter:
def is_split(self) -> bool:
return any_not_None(self.VAL_SPLIT, self.TEST_SPLIT)

def write_h5ad(self, path: str, no_data_write: bool = False) -> None:
def write_h5ad(self, path: str, no_data_write: bool = False, **kwargs) -> None:
"""Writes anndata to path. If any of VAL or TEST splits are not
None, will instead write both to a folder with the name specified
in path.
"""
if not any_not_None(self.VAL_SPLIT, self.TEST_SPLIT):
to_write = as_empty(self.TRAIN_SPLIT) if no_data_write else self.TRAIN_SPLIT
to_write.write_h5ad(path)
to_write.write_h5ad(path, **kwargs)
return

if path.endswith('.h5ad'):
Expand All @@ -47,7 +47,7 @@ def write_h5ad(self, path: str, no_data_write: bool = False) -> None:
if os.path.exists(path_to_write):
logger.warning(f"Object {path_to_write} exists. This will be overwritten.")
to_write = as_empty(sp) if no_data_write else sp
to_write.write_h5ad(path_to_write)
to_write.write_h5ad(path_to_write, **kwargs)


class Splitter(BaseConfigurable, StorageMixin):
Expand Down
2 changes: 1 addition & 1 deletion src/grinch/utils/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def _compute_log2fc(mean1, mean2, base='e', is_logged=False):
base = np.e if base == 'e' else float(base)
log2fc *= np.log2(base)
else:
log2fc = np.log2((mean1 + 1) / (mean2 + 1))
log2fc = np.log2(mean1 + 1) - np.log2(mean2 + 1)
return log2fc


Expand Down

0 comments on commit 4307d75

Please sign in to comment.