Skip to content

Commit

Permalink
Make sure gsea step is seeded!
Browse files Browse the repository at this point in the history
  • Loading branch information
SeppeDeWinter committed Jan 10, 2025
1 parent cf1f27d commit e63278b
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 23 deletions.
8 changes: 6 additions & 2 deletions src/scenicplus/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,8 @@ def infer_grn(
keep_only_activating: bool,
rho_threshold: float,
min_target_genes: int,
n_cpu: int):
n_cpu: int,
seed: int):
"""
Infer gene regulatory network.
Expand Down Expand Up @@ -869,6 +870,8 @@ def infer_grn(
Minimum number of target genes.
n_cpu : int
Number of parallel processes to run.
seed: int
Random seed to use.
"""
from scenicplus.grn_builder.gsea_approach import build_grn
Expand Down Expand Up @@ -906,7 +909,8 @@ def infer_grn(
min_target_genes=min_target_genes,
n_cpu=n_cpu,
merge_eRegulons=True,
disable_tqdm=False)
disable_tqdm=False,
seed=seed)

log.info("Formatting eGRN as table.")
eRegulon_metadata = _format_egrns(
Expand Down
8 changes: 7 additions & 1 deletion src/scenicplus/cli/scenicplus.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,8 @@ def eGRN(arg):
keep_only_activating=arg.keep_only_activating_eRegulons,
rho_threshold=arg.rho_threshold,
min_target_genes=arg.min_target_genes,
n_cpu=arg.n_cpu)
n_cpu=arg.n_cpu,
seed=arg.seed)

parser.set_defaults(func=eGRN)
# Required arguments
Expand Down Expand Up @@ -984,6 +985,11 @@ def eGRN(arg):
action="store", type=int, required=False,
default=1,
help="Number of cores to use. Default is 1.")
parser.add_argument(
"--seed", dest="seed",
action="store", type=int, required=False,
default=666,
help="Seed to use. Default is 666.")

def add_parser_for_aucell(subparser:argparse._SubParsersAction):
parser:argparse.ArgumentParser = subparser.add_parser(
Expand Down
18 changes: 4 additions & 14 deletions src/scenicplus/grn_builder/gsea.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
from gseapy.algorithm import enrichment_score
from gseapy.algorithm import gsea_compute
from gseapy.plot import GSEAPlot
import numpy as np
import pandas as pd

seed = 666


def run_gsea(ranked_gene_list: pd.Series,
gene_set: list,
n_perm: int = 1000,
ascending: bool = False,
return_res: bool = False,
name: str = 'gene_set'):
name: str = 'gene_set',
seed: int = 555):
"""
Calculates gene set enrichment score (and estimates its significance) and leading edge for the gene set in the ranked gene list using gseapy prerank.
Expand All @@ -39,7 +35,7 @@ def run_gsea(ranked_gene_list: pd.Series,
gmt = {name: list(gene_set)}
gsea_results, ind, rank_ES, gs = gsea_compute(data=ranked_gene_list, n=n_perm, gmt=gmt,
weighted_score_type=1, permutation_type='gene_set', method=None,
pheno_pos='Pos', pheno_neg='Neg', classes=None, ascending=ascending)
pheno_pos='Pos', pheno_neg='Neg', classes=None, ascending=ascending, seed = seed)

# extract enrichment scores
gseale = list(gsea_results)[0]
Expand All @@ -62,13 +58,7 @@ def run_gsea(ranked_gene_list: pd.Series,
pval = gseale[2]
fdr = gseale[3]
LE = list(map(str, ranked_gene_list.iloc[ldg_pos].index))
if return_res:
res = GSEAPlot(ranked_gene_list, name, ind, nes, pval, fdr, RES,
pheno_pos='', pheno_neg='', figsize=(6, 5.5),
cmap='seismic', ofname=None)
return nes, pval, LE, res
else:
return nes, pval, LE
return nes, pval, LE


def run_enrichr(ranked_gene_list: np.array,
Expand Down
18 changes: 12 additions & 6 deletions src/scenicplus/grn_builder/gsea_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,13 @@
logging.basicConfig(level=level, format=format, handlers=handlers)
log = logging.getLogger('GSEA')


def _run_gsea_for_e_module(
e_module:eRegulon,
rnk:pd.Series,
gsea_n_perm:int,
context: frozenset):
context: frozenset,
seed: int):
"""
Helper function to run gsea for single e_module
Expand All @@ -92,11 +94,12 @@ def _run_gsea_for_e_module(
NES, pval, LE_genes = run_gsea(
ranked_gene_list=rnk,
gene_set=gene_set,
n_perm=gsea_n_perm)
n_perm=gsea_n_perm,
seed=seed)
except:
NES = np.nan
pval = np.nan
LE_genes = np.nan
LE_genes = [np.nan]
return eRegulon(
transcription_factor=TF,
cistrome_name=e_module.cistrome_name,
Expand Down Expand Up @@ -134,6 +137,7 @@ def build_grn(
n_cpu=1,
merge_eRegulons=True,
disable_tqdm=False,
seed=555,
**kwargs) -> List[eRegulon]:
log.info('Thresholding region to gene relationships')
# some tfs are missing from tf_to_gene because they are not
Expand Down Expand Up @@ -185,9 +189,10 @@ def build_grn(
e_module=e_module,
rnk=TF_to_ranking_pos[e_module.transcription_factor],
gsea_n_perm=gsea_n_perm,
context=frozenset(['positive tf2g']))
context=frozenset(['positive tf2g']),
seed=seed)
for e_module in tqdm(
e_modules,
e_modules,
total = len(e_modules),
desc="Running for Positive TF to gene")
if e_module.transcription_factor in pos_TFs)
Expand All @@ -198,7 +203,8 @@ def build_grn(
e_module=e_module,
rnk=TF_to_ranking_neg[e_module.transcription_factor],
gsea_n_perm=gsea_n_perm,
context=frozenset(['negative tf2g']))
context=frozenset(['negative tf2g']),
seed=seed)
for e_module in tqdm(
e_modules,
total = len(e_modules),
Expand Down

0 comments on commit e63278b

Please sign in to comment.