Skip to content

Commit

Permalink
Network predictions in GENIE3 format. Other superficial changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
David Merrell committed May 5, 2020
1 parent ac32df8 commit f5c9882
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 6 deletions.
18 changes: 16 additions & 2 deletions run_ssps/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ configfile: "ssps_config.yaml"
# directories
TIMESERIES = config["ts_file"]
PRIOR = config["prior_file"]
NODE_NAME_FILE = config["node_name_file"]
OUTFILE = config["prediction_file"]
TEMPDIR = config["temp_dir"]
PRED_JSON = os.path.join(TEMPDIR, "predictions.json")
JULIA_PROJ_DIR = os.path.join(os.path.pardir, "julia-project")
SCRIPT_DIR = os.path.join(os.path.pardir, "scripts")

# MCMC hyperparameters
MCMC_PARAMS = config["mcmc_hyperparams"]
Expand All @@ -38,16 +41,27 @@ rule all:
input:
OUTFILE


rule to_genie:
input:
pred=PRED_JSON,
nn=NODE_NAME_FILE
output:
OUTFILE
shell:
"python {SCRIPT_DIR}/json_to_genie.py {input.pred} {OUTFILE} {input.nn}"


rule postprocess_mcmc:
input:
pp=JULIA_PROJ_DIR+"/postprocess_samples.jl",
raw=expand(TEMPDIR+"/{chain}.json", chain=CHAINS)
output:
out=OUTFILE
out=PRED_JSON
resources:
runtime=3600,
threads=1,
mem_mb=6000
mem_mb=3000
shell:
"julia --project={JULIA_PROJ_DIR} {input.pp} --chain-samples {input.raw} --output-file {output.out}"

Expand Down
4 changes: 3 additions & 1 deletion run_ssps/ssps_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# Input and output files
ts_file: "my_timeseries.csv" # Must be TAB SEPARATED (for now)
prior_file: "my_prior.csv" # Must be COMMA SEPARATED (for now)
node_name_file: "node_names.json" # Optional. If included, must be a JSON list.
# Otherwise, set to empty string: "".
prediction_file: "my_predictions.json"

temp_dir: "temp"
Expand All @@ -15,5 +17,5 @@ mcmc_hyperparams:
lambda_prop_std: 3.0
large_indeg: 20
n_chains: 4
timeout: 60
timeout: 300

53 changes: 53 additions & 0 deletions scripts/json_to_genie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import json
import pandas as pd
import sys

"""
Converts lists of weighted parent sets into
a dataframe of weighted edges
"""
def psets_to_edgedf(parent_sets, node_names=None):

if node_names is None:
node_names = ["node_{}".format(i) for i in range(len(parent_sets))]

edge_df = pd.DataFrame()

for j, ps in enumerate(parent_sets):
for i, p_prob in enumerate(ps):
row = {0: node_names[i],
1: node_names[j],
2: p_prob}
edge_df = edge_df.append(row, ignore_index=True)

edge_df.sort_values(2, ascending=False, inplace=True)

return edge_df


if __name__=="__main__":

# get arguments
pred_file = sys.argv[1]
out_file = sys.argv[2]
node_names = None

# optionally: read in a JSON file of node names
if len(sys.argv) > 3:
name_file = sys.argv[3]
with open(name_file, "r") as f:
node_names = json.load(f)

# load prediction file
with open(pred_file) as f:
preds = json.load(f)

# convert parent sets
# to edge-list dataframe
parent_sets = preds[preds["edge_conf_key"]]
edge_df = psets_to_edgedf(parent_sets, node_names=node_names)

# output GENIE3
edge_df.to_csv(out_file, sep="\t", header=False, index=False)


10 changes: 7 additions & 3 deletions scripts/preprocess_dream_prior.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ def build_weighted_adj(eda_filename):

print(adj)

return adj, antibody_map
antibody_ls = [0 for i in antibody_map]
for (name, idx) in antibody_map.items():
antibody_ls[idx] = name

return adj, antibody_ls



Expand All @@ -38,11 +42,11 @@ def build_weighted_adj(eda_filename):
parser.add_argument("antibody_file", help="path to output JSON file containing the indices of antibodies")
args = parser.parse_args()

adj_mat, antibody_map = build_weighted_adj(args.eda_file)
adj_mat, antibody_ls = build_weighted_adj(args.eda_file)

df = pd.DataFrame(adj_mat)
df.to_csv(args.output_file, sep=",", index=False, header=False)

json.dump(antibody_map, open(args.antibody_file, "w"))
json.dump(antibody_ls, open(args.antibody_file, "w"))


0 comments on commit f5c9882

Please sign in to comment.