-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Network predictions in GENIE3 format. Other superficial changes.
- Loading branch information
David Merrell
committed
May 5, 2020
1 parent
ac32df8
commit f5c9882
Showing
4 changed files
with
79 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import json | ||
import pandas as pd | ||
import sys | ||
|
||
""" | ||
Converts lists of weighted parent sets into | ||
a dataframe of weighted edges | ||
""" | ||
def psets_to_edgedf(parent_sets, node_names=None): | ||
|
||
if node_names is None: | ||
node_names = ["node_{}".format(i) for i in range(len(parent_sets))] | ||
|
||
edge_df = pd.DataFrame() | ||
|
||
for j, ps in enumerate(parent_sets): | ||
for i, p_prob in enumerate(ps): | ||
row = {0: node_names[i], | ||
1: node_names[j], | ||
2: p_prob} | ||
edge_df = edge_df.append(row, ignore_index=True) | ||
|
||
edge_df.sort_values(2, ascending=False, inplace=True) | ||
|
||
return edge_df | ||
|
||
|
||
if __name__=="__main__": | ||
|
||
# get arguments | ||
pred_file = sys.argv[1] | ||
out_file = sys.argv[2] | ||
node_names = None | ||
|
||
# optionally: read in a JSON file of node names | ||
if len(sys.argv) > 3: | ||
name_file = sys.argv[3] | ||
with open(name_file, "r") as f: | ||
node_names = json.load(f) | ||
|
||
# load prediction file | ||
with open(pred_file) as f: | ||
preds = json.load(f) | ||
|
||
# convert parent sets | ||
# to edge-list dataframe | ||
parent_sets = preds[preds["edge_conf_key"]] | ||
edge_df = psets_to_edgedf(parent_sets, node_names=node_names) | ||
|
||
# output GENIE3 | ||
edge_df.to_csv(out_file, sep="\t", header=False, index=False) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters