Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Header Lines #142

Merged
merged 29 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
2964db1
almost done with adding header files
ntalluri Dec 22, 2023
1b340bb
precommit
ntalluri Dec 22, 2023
5edf8a7
updated summary.py code
ntalluri Dec 23, 2023
733b736
precommit
ntalluri Dec 23, 2023
3964789
added changes to cytoscape
ntalluri Jan 19, 2024
7fc351b
Merge branch 'master' into header
agitter Jan 19, 2024
9d122e1
precommit
ntalluri Jan 24, 2024
ada6cde
update config
ntalluri Feb 9, 2024
0f1cba7
review
ntalluri Feb 9, 2024
a9de3ed
update ml
ntalluri Feb 9, 2024
6a9ea4a
update ml and test cases
ntalluri Feb 9, 2024
f9e989e
update contributing guide
ntalluri Feb 9, 2024
278b761
ml changes
ntalluri Mar 12, 2024
4dfd018
precommit to ml
ntalluri Mar 12, 2024
c6da91a
attempting error checking for empty df read from rpw
ntalluri Mar 12, 2024
c183abf
cleaned up code
ntalluri Mar 13, 2024
01ad342
precommit
ntalluri Mar 13, 2024
129532c
clean up new util func, add new test, add to contributing guide
ntalluri Mar 18, 2024
bbd9075
trying to fix error
ntalluri Mar 18, 2024
0f4510c
testing mcf tester with macos-latest
ntalluri Mar 21, 2024
36d556b
revert
ntalluri Mar 21, 2024
6945db6
Merge branch 'master' into header
agitter Jun 14, 2024
f5b880b
updated contributing guide
ntalluri Jun 14, 2024
322bfa5
updated new ML test files to include headers
ntalluri Jun 14, 2024
1ffe1d7
output docs
ntalluri Jun 17, 2024
7db6ea0
Code review and formatting updates
agitter Jul 4, 2024
cac0638
Resolve merge conflicts
agitter Jul 4, 2024
026e7e0
Bump version to 0.2.0
agitter Jul 4, 2024
d6b019a
Fix post_domino_id_transform
agitter Jul 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docker-wrappers/Cytoscape/cytoscape_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ def load_pathways(pathways: List[str], output: str) -> None:
suid = p4c.networks.import_network_from_tabular_file(
file=path,
column_type_list='s,t,x,ea',
delimiters='\t'
delimiters='\t',
first_row_as_column_names=True,

ntalluri marked this conversation as resolved.
Show resolved Hide resolved
)
p4c.networks.rename_network(name, network=suid)

Expand Down
4 changes: 3 additions & 1 deletion spras/allpairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
@param standardized_pathway_file: the same pathway written in the universal format
"""
df = pd.read_csv(raw_pathway_file, sep='\t', header=None)

df['Rank'] = 1 # add a rank column of 1s since the edges are not ranked.
df = reinsert_direction_col_undirected(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
ntalluri marked this conversation as resolved.
Show resolved Hide resolved
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
2 changes: 1 addition & 1 deletion spras/analysis/cytoscape.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def run_cytoscape(pathways: List[Union[str, PurePath]], output_file: str, contai

print('Running Cytoscape with arguments: {}'.format(' '.join(command)), flush=True)

container_suffix = "py4cytoscape:v2"
container_suffix = "py4cytoscape:v3"
ntalluri marked this conversation as resolved.
Show resolved Hide resolved
out = run_container(container_framework,
container_suffix,
command,
Expand Down
8 changes: 4 additions & 4 deletions spras/analysis/graphspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,21 @@ def load_graph(path: str) -> Tuple[Union[nx.Graph, nx.DiGraph], bool]:
directed = False

try:
pathways = pd.read_csv(path, sep="\t", header=None)
pathways = pd.read_csv(path, sep="\t", header=0)
except pd.errors.EmptyDataError:
print(f"The file {path} is empty.")
return G, directed
pathways.columns = ["Interactor1", "Interactor2", "Rank", "Direction"]

mask_u = pathways['Direction'] == 'U'
mask_d = pathways['Direction'] == 'D'
pathways.drop(columns=["Direction"])

if mask_u.all():
G = nx.from_pandas_edgelist(pathways, "Interactor1", "Interactor2", ["Rank"])
G = nx.from_pandas_edgelist(pathways, "Node1", "Node2", ["Rank"])
directed = False

elif mask_d.all():
G = nx.from_pandas_edgelist(pathways, "Interactor1", "Interactor2", ["Rank"], create_using=nx.DiGraph())
G = nx.from_pandas_edgelist(pathways, "Node1", "Node2", ["Rank"], create_using=nx.DiGraph())
directed = True
else:
print(f"{path} could not be visualized. GraphSpace does not support mixed direction type graphs currently")
Expand Down
7 changes: 5 additions & 2 deletions spras/analysis/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
edges = []
for line in lines:
parts = line.split('\t')
if len(parts) > 0: # in case of empty line in file
if len(parts) >= 4: # in case of empty line in file or line doesn't include all values
ntalluri marked this conversation as resolved.
Show resolved Hide resolved
node1 = parts[0]
node2 = parts[1]
direction = str(parts[3]).strip()
Expand All @@ -55,7 +55,10 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
# node order does matter for directed edges
edges.append(DIR_CONST.join([node1, node2]))
else:
ValueError(f"direction is {direction}, rather than U or D")
if direction == 'Direction': # if reading the header
ntalluri marked this conversation as resolved.
Show resolved Hide resolved
continue
else:
raise ValueError(f"direction is {direction}, rather than U or D")

# getting the algorithm name
p = PurePath(file)
Expand Down
9 changes: 7 additions & 2 deletions spras/analysis/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,13 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame) ->

# Iterate through each network file path
for file_path in sorted(file_paths):
# Load in the network
nw = nx.read_edgelist(file_path, data=(('weight', float), ('Direction',str)))

lines = None
with open(file_path, 'r') as f:
lines = f.readlines()[1:] # skip the first line

nw = nx.read_edgelist(lines, data=(('weight', float), ('Direction', str)))

# Save the network name, number of nodes, number edges, and number of connected components
nw_name = str(file_path)
number_nodes = nw.number_of_nodes()
Expand Down
6 changes: 4 additions & 2 deletions spras/domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
edges_df['source'] = edges_df['source'].apply(post_domino_id_transform)
edges_df['target'] = edges_df['target'].apply(post_domino_id_transform)
edges_df = reinsert_direction_col_undirected(edges_df)

edges_df.to_csv(standardized_pathway_file, sep='\t', header=False, index=False)
edges_df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
edges_df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False)
else:
edges_df.to_csv(standardized_pathway_file, sep='\t', header=None, index=False)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this output a completely blank file? Should we output the header row now instead? We could do that without pandas if it is hard to do with pandas.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Randomly the output file for domino (and for only domino) will add a '\n' to the file when header = True. This was our version of a quick fix to allow for that to not happen.



def pre_domino_id_transform(node_id):
Expand Down
6 changes: 3 additions & 3 deletions spras/meo.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# Would need to load the paths output file to rank edges correctly
df = add_rank_column(df)
df = reinsert_direction_col_directed(df)

df.to_csv(standardized_pathway_file, columns=['Source', 'Target', 'Rank', "Direction"], header=False,
index=False, sep='\t')
df.drop(columns=['Type', 'Oriented', 'Weight'], inplace = True)
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
3 changes: 2 additions & 1 deletion spras/mincostflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,5 +155,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# TODO update MinCostFlow version to support mixed graphs
# Currently directed edges in the input will be converted to undirected edges in the output
df = reinsert_direction_col_undirected(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')

6 changes: 3 additions & 3 deletions spras/omicsintegrator1.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
df.columns = ["Edge1", "InteractionType", "Edge2"]
df = add_rank_column(df)
df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")

df.to_csv(standardized_pathway_file, columns=['Edge1', 'Edge2', 'Rank', "Direction"], header=False, index=False,
sep='\t')
df.drop(columns=['InteractionType'], inplace = True)
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
3 changes: 2 additions & 1 deletion spras/omicsintegrator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
df = df.take([0, 1], axis=1)
df = add_rank_column(df)
df = reinsert_direction_col_undirected(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
3 changes: 2 additions & 1 deletion spras/pathlinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
# What about multiple raw_pathway_files
df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1, 2], axis=1)
df = reinsert_direction_col_directed(df)
df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
ntalluri marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
ABI1_HUMAN MK01_HUMAN 1 U
CBLB_HUMAN EGFR_HUMAN 1 U
CBL_HUMAN CD2AP_HUMAN 1 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
ABI1_HUMAN MK01_HUMAN 1 U
CBLB_HUMAN EGFR_HUMAN 1 U
CBL_HUMAN CD2AP_HUMAN 1 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
ABI1_HUMAN MK01_HUMAN 1 U
CBL_HUMAN CD2AP_HUMAN 1 U
CBL_HUMAN CRKL_HUMAN 1 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
EGF_HUMAN EGFR_HUMAN 1 U
EGF_HUMAN S10A4_HUMAN 2 U
S10A4_HUMAN MYH9_HUMAN 2 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
EGF_HUMAN EGFR_HUMAN 1 U
EGF_HUMAN S10A4_HUMAN 2 U
S10A4_HUMAN MYH9_HUMAN 2 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A B 1 U
B C 1 U
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A B 1 U
B C 1 U
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
B A 1 U
B C 1 U
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
A D 1 D
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A D 1 U
G H 1 U
G I 1 U
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A D 1 U
G H 1 U
G I 1 U
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
C D 1 U
C F 1 U
A D 1 U
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
A D 2 D
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 D
B C 1 D
A D 2 D
1 change: 1 addition & 0 deletions test/analysis/input/standardized-ranked.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
A C 3 U
C D 5 U
Expand Down
1 change: 1 addition & 0 deletions test/analysis/input/toy/network1.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
C D 1 U
E F 1 U
Expand Down
1 change: 1 addition & 0 deletions test/analysis/input/toy/network2.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
B C 1 U
C D 1 U
Expand Down
1 change: 1 addition & 0 deletions test/analysis/input/toy/network3.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
A C 1 U
A D 1 U
Expand Down
1 change: 1 addition & 0 deletions test/analysis/input/toy/network4.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
B C 1 U
D E 1 U
Expand Down
1 change: 1 addition & 0 deletions test/analysis/input/toy/network5.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
B C 1 U
C D 1 U
Expand Down
1 change: 1 addition & 0 deletions test/ml/input/test-data-longName/longName.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
node1 node2 1 U
node1 node3 1 U
node4 node5 1 U
Expand Down
1 change: 1 addition & 0 deletions test/ml/input/test-data-longName2/longName2.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
node3 node2 1 U
node1 node3 1 U
node5 node4 1 U
Expand Down
1 change: 1 addition & 0 deletions test/ml/input/test-data-s1/s1.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
C D 1 U
E F 1 U
1 change: 1 addition & 0 deletions test/ml/input/test-data-s2/s2.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 U
C D 1 U
E F 1 U
Expand Down
1 change: 1 addition & 0 deletions test/ml/input/test-data-s3/s3.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
L M 1 U
M N 1 U
O P 1 U
Expand Down
1 change: 1 addition & 0 deletions test/ml/input/test-data-spaces/spaces.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
L M 1 U
O P 1 U
nodes with spaces in name 1 U
1 change: 1 addition & 0 deletions test/ml/input/test-mixed-direction/mixed-direction.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
A B 1 D
B A 1 D
C D 1 U
Expand Down
1 change: 1 addition & 0 deletions test/parse-outputs/expected/allpairs-pathway-expected.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
S1 A 1 U
S1 B 1 U
A E 1 U
Expand Down
1 change: 1 addition & 0 deletions test/parse-outputs/expected/domino-pathway-expected.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
ENSG00000122691 ENSG00000138757 1 U
ENSG00000122691 ENSG00000109320 1 U
ENSG00000134954 ENSG00000077150 1 U
Expand Down
1 change: 1 addition & 0 deletions test/parse-outputs/expected/meo-pathway-expected.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
GENEA GENEC 1 D
GENEC GENEB 1 D
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
B A 1 U
D B 1 U
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
A C 1 D
C D 1 U
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Node1 Node2 Rank Direction
B A 1 U
B C 1 U
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Node1 Node2 Rank Direction
S2 T3 1 D
A E 2 D
S1 A 2 D
Expand Down
Loading