Reed-CompBio · agitter · Jul 10, 2024 · Dec 22, 2023 · Dec 22, 2023 · Dec 23, 2023
diff --git a/docker-wrappers/Cytoscape/cytoscape_util.py b/docker-wrappers/Cytoscape/cytoscape_util.py
@@ -116,7 +116,9 @@ def load_pathways(pathways: List[str], output: str) -> None:
         suid = p4c.networks.import_network_from_tabular_file(
             file=path,
             column_type_list='s,t,x,ea',
-            delimiters='\t'
+            delimiters='\t',
+            first_row_as_column_names=True,
+
         )
         p4c.networks.rename_network(name, network=suid)
 

diff --git a/spras/allpairs.py b/spras/allpairs.py
@@ -112,6 +112,8 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         @param standardized_pathway_file: the same pathway written in the universal format
         """
         df = pd.read_csv(raw_pathway_file, sep='\t', header=None)
+
         df['Rank'] = 1  # add a rank column of 1s since the edges are not ranked.
         df = reinsert_direction_col_undirected(df)
-        df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
+        df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/analysis/cytoscape.py b/spras/analysis/cytoscape.py
@@ -48,7 +48,7 @@ def run_cytoscape(pathways: List[Union[str, PurePath]], output_file: str, contai
 
     print('Running Cytoscape with arguments: {}'.format(' '.join(command)), flush=True)
 
-    container_suffix = "py4cytoscape:v2"
+    container_suffix = "py4cytoscape:v3"
     out = run_container(container_framework,
                         container_suffix,
                         command,

diff --git a/spras/analysis/graphspace.py b/spras/analysis/graphspace.py
@@ -77,21 +77,21 @@ def load_graph(path: str) -> Tuple[Union[nx.Graph, nx.DiGraph], bool]:
     directed = False
 
     try:
-        pathways = pd.read_csv(path, sep="\t", header=None)
+        pathways = pd.read_csv(path, sep="\t", header=0)
     except pd.errors.EmptyDataError:
         print(f"The file {path} is empty.")
         return G, directed
-    pathways.columns = ["Interactor1", "Interactor2", "Rank", "Direction"]
+
     mask_u = pathways['Direction'] == 'U'
     mask_d = pathways['Direction'] == 'D'
     pathways.drop(columns=["Direction"])
 
     if mask_u.all():
-        G = nx.from_pandas_edgelist(pathways, "Interactor1", "Interactor2", ["Rank"])
+        G = nx.from_pandas_edgelist(pathways, "Node1", "Node2", ["Rank"])
         directed = False
 
     elif mask_d.all():
-        G = nx.from_pandas_edgelist(pathways, "Interactor1", "Interactor2", ["Rank"], create_using=nx.DiGraph())
+        G = nx.from_pandas_edgelist(pathways, "Node1", "Node2", ["Rank"], create_using=nx.DiGraph())
         directed = True
     else:
         print(f"{path} could not be visualized. GraphSpace does not support mixed direction type graphs currently")

diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py
@@ -44,7 +44,7 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
             edges = []
             for line in lines:
                 parts = line.split('\t')
-                if len(parts) > 0:  # in case of empty line in file
+                if len(parts) >= 4:  # in case of empty line in file or line doesn't include all values
                     node1 = parts[0]
                     node2 = parts[1]
                     direction = str(parts[3]).strip()
@@ -55,7 +55,10 @@ def summarize_networks(file_paths: Iterable[Union[str, PathLike]]) -> pd.DataFra
                         # node order does matter for directed edges
                         edges.append(DIR_CONST.join([node1, node2]))
                     else:
-                        ValueError(f"direction is {direction}, rather than U or D")
+                        if direction == 'Direction': # if reading the header
+                            continue
+                        else:
+                            raise ValueError(f"direction is {direction}, rather than U or D")
 
             # getting the algorithm name
             p = PurePath(file)

diff --git a/spras/analysis/summary.py b/spras/analysis/summary.py
@@ -33,8 +33,13 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame) ->
 
     # Iterate through each network file path
     for file_path in sorted(file_paths):
-        # Load in the network
-        nw = nx.read_edgelist(file_path, data=(('weight', float), ('Direction',str)))
+
+        lines = None
+        with open(file_path, 'r') as f:
+            lines = f.readlines()[1:]  # skip the first line
+
+        nw = nx.read_edgelist(lines, data=(('weight', float), ('Direction', str)))
+
         # Save the network name, number of nodes, number edges, and number of connected components
         nw_name = str(file_path)
         number_nodes = nw.number_of_nodes()

diff --git a/spras/domino.py b/spras/domino.py
@@ -205,8 +205,10 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
             edges_df['source'] = edges_df['source'].apply(post_domino_id_transform)
             edges_df['target'] = edges_df['target'].apply(post_domino_id_transform)
             edges_df = reinsert_direction_col_undirected(edges_df)
-
-        edges_df.to_csv(standardized_pathway_file, sep='\t', header=False, index=False)
+            edges_df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
+            edges_df.to_csv(standardized_pathway_file, sep='\t', header=True, index=False)
+        else:
+            edges_df.to_csv(standardized_pathway_file, sep='\t', header=None, index=False)
 
 
 def pre_domino_id_transform(node_id):

diff --git a/spras/meo.py b/spras/meo.py
@@ -188,6 +188,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # Would need to load the paths output file to rank edges correctly
         df = add_rank_column(df)
         df = reinsert_direction_col_directed(df)
-
-        df.to_csv(standardized_pathway_file, columns=['Source', 'Target', 'Rank', "Direction"], header=False,
-                  index=False, sep='\t')
+        df.drop(columns=['Type', 'Oriented', 'Weight'], inplace = True)
+        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/mincostflow.py b/spras/mincostflow.py
@@ -155,5 +155,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # TODO update MinCostFlow version to support mixed graphs
         # Currently directed edges in the input will be converted to undirected edges in the output
         df = reinsert_direction_col_undirected(df)
-        df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
+        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
 
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
@@ -201,6 +201,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         df.columns = ["Edge1", "InteractionType", "Edge2"]
         df = add_rank_column(df)
         df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")
-
-        df.to_csv(standardized_pathway_file, columns=['Edge1', 'Edge2', 'Rank', "Direction"], header=False, index=False,
-                  sep='\t')
+        df.drop(columns=['InteractionType'], inplace = True)
+        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
@@ -157,4 +157,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         df = df.take([0, 1], axis=1)
         df = add_rank_column(df)
         df = reinsert_direction_col_undirected(df)
-        df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
+        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/spras/pathlinker.py b/spras/pathlinker.py
@@ -139,4 +139,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # What about multiple raw_pathway_files
         df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1, 2], axis=1)
         df = reinsert_direction_col_directed(df)
-        df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t')
+        df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+        df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 ABI1_HUMAN MK01_HUMAN 1 U 
 CBLB_HUMAN EGFR_HUMAN 1 U
 CBL_HUMAN CD2AP_HUMAN 1 U

diff --git a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 ABI1_HUMAN MK01_HUMAN 1 U
 CBLB_HUMAN EGFR_HUMAN 1 U
 CBL_HUMAN CD2AP_HUMAN 1 U

diff --git a/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt b/test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 ABI1_HUMAN MK01_HUMAN 1 U
 CBL_HUMAN CD2AP_HUMAN 1 U
 CBL_HUMAN CRKL_HUMAN 1 U

diff --git a/test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt b/test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 EGF_HUMAN EGFR_HUMAN 1 U
 EGF_HUMAN S10A4_HUMAN 2 U
 S10A4_HUMAN MYH9_HUMAN 2 U

diff --git a/test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt b/test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 EGF_HUMAN EGFR_HUMAN 1 U
 EGF_HUMAN S10A4_HUMAN 2 U
 S10A4_HUMAN MYH9_HUMAN 2 U

diff --git a/test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt b/test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
diff --git a/test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt b/test/analysis/input/example/data0-omicsintegrator1-params-RQCQ4YN_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	B	1	U
 B	C	1	U
diff --git a/test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt b/test/analysis/input/example/data0-omicsintegrator1-params-WY4V42C_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	B	1	U
 B	C	1	U
diff --git a/test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt b/test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 B	A	1	U
 B	C	1	U
diff --git a/test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt b/test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
diff --git a/test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt b/test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
diff --git a/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt b/test/analysis/input/example/data1-meo-params-GKEDDFZ_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
 A	D	1	D

diff --git a/test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt b/test/analysis/input/example/data1-omicsintegrator1-params-JAZWLAK_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	D	1	U
 G	H	1	U
 G	I	1	U
diff --git a/test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt b/test/analysis/input/example/data1-omicsintegrator1-params-PU62FNV_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	D	1	U
 G	H	1	U
 G	I	1	U
diff --git a/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt b/test/analysis/input/example/data1-omicsintegrator2-params-IV3IPCJ_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 C	D	1	U
 C	F	1	U
 A	D	1	U

diff --git a/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt b/test/analysis/input/example/data1-pathlinker-params-6SWY7JS_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
 A	D	2	D
diff --git a/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt b/test/analysis/input/example/data1-pathlinker-params-VQL7BDZ_pathway.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	C	1	D
 A	D	2	D
diff --git a/test/analysis/input/standardized-ranked.txt b/test/analysis/input/standardized-ranked.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	U
 A	C  	3  	U
 C	D 	5	U

diff --git a/test/analysis/input/toy/network1.txt b/test/analysis/input/toy/network1.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A B 1 U
 C D 1 U
 E F 1 U

diff --git a/test/analysis/input/toy/network2.txt b/test/analysis/input/toy/network2.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A B 1 U
 B C 1 U
 C D 1 U

diff --git a/test/analysis/input/toy/network3.txt b/test/analysis/input/toy/network3.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A B 1 U
 A C 1 U
 A D 1 U

diff --git a/test/analysis/input/toy/network4.txt b/test/analysis/input/toy/network4.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A B 1 U
 B C 1 U
 D E 1 U

diff --git a/test/analysis/input/toy/network5.txt b/test/analysis/input/toy/network5.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A B 1 U
 B C 1 U
 C D 1 U

diff --git a/test/ml/input/test-data-longName/longName.txt b/test/ml/input/test-data-longName/longName.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 node1	node2	1	U
 node1	node3	1	U
 node4	node5	1	U

diff --git a/test/ml/input/test-data-longName2/longName2.txt b/test/ml/input/test-data-longName2/longName2.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 node3	node2	1	U
 node1	node3	1	U
 node5	node4	1	U

diff --git a/test/ml/input/test-data-s1/s1.txt b/test/ml/input/test-data-s1/s1.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	U
 C	D	1	U
 E	F	1	U
diff --git a/test/ml/input/test-data-s2/s2.txt b/test/ml/input/test-data-s2/s2.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	U
 C	D	1	U
 E	F	1	U

diff --git a/test/ml/input/test-data-s3/s3.txt b/test/ml/input/test-data-s3/s3.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 L	M	1	U
 M	N	1	U
 O	P	1	U

diff --git a/test/ml/input/test-data-spaces/spaces.txt b/test/ml/input/test-data-spaces/spaces.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 L	M	1	U
 O	P	1	U
 nodes with	spaces in name	1	U
diff --git a/test/ml/input/test-mixed-direction/mixed-direction.txt b/test/ml/input/test-mixed-direction/mixed-direction.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 A	B	1	D
 B	A	1	D
 C	D	1	U

diff --git a/test/parse-outputs/expected/allpairs-pathway-expected.txt b/test/parse-outputs/expected/allpairs-pathway-expected.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 S1	A	1	U
 S1	B	1	U
 A	E	1	U

diff --git a/test/parse-outputs/expected/domino-pathway-expected.txt b/test/parse-outputs/expected/domino-pathway-expected.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 ENSG00000122691	ENSG00000138757	1	U
 ENSG00000122691	ENSG00000109320	1	U
 ENSG00000134954	ENSG00000077150	1	U

diff --git a/test/parse-outputs/expected/meo-pathway-expected.txt b/test/parse-outputs/expected/meo-pathway-expected.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 GENEA	GENEC	1	D
 GENEC	GENEB	1	D
diff --git a/test/parse-outputs/expected/mincostflow-pathway-expected.txt b/test/parse-outputs/expected/mincostflow-pathway-expected.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 B	A	1	U
 D	B	1	U
diff --git a/test/parse-outputs/expected/omicsintegrator1-pathway-expected.txt b/test/parse-outputs/expected/omicsintegrator1-pathway-expected.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 A	C	1	D
 C	D	1	U
diff --git a/test/parse-outputs/expected/omicsintegrator2-pathway-expected.txt b/test/parse-outputs/expected/omicsintegrator2-pathway-expected.txt
@@ -1,2 +1,3 @@
+Node1	Node2	Rank	Direction
 B	A	1	U
 B	C	1	U
diff --git a/test/parse-outputs/expected/pathlinker-pathway-expected.txt b/test/parse-outputs/expected/pathlinker-pathway-expected.txt
@@ -1,3 +1,4 @@
+Node1	Node2	Rank	Direction
 S2	T3	1	D
 A	E	2	D
 S1	A	2	D
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+    Node1	Node2	Rank	Direction
     A	B	1	D
     B	C	1	D
     A	D	1	D
@@ Expand Down @@