From 059c0fc806e83a930de2fe0e0d8613b6461d571b Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 3 Sep 2024 13:30:11 -0500
Subject: [PATCH 01/10] first pass at fixing oi2 error

---
 config/config.yaml                            | 26 +++++++++---------
 spras/omicsintegrator2.py                     | 18 +++++++++----
 .../oi2-expected/oi2-expected-empty.txt       |  1 +
 .../expected/oi2-expected/oi2-expected.txt    |  3 +++
 .../input/oi2-raw-pathways/oi2-correct.txt    |  3 +++
 .../input/oi2-raw-pathways/oi2-empty.txt      |  1 +
 .../oi2-raw-pathways/oi2-miss-insolution.txt  |  3 +++
 .../oi2-raw-pathways/oi2-wrong-order.txt      |  3 +++
 test/parse-outputs/test_parse_outputs.py      | 27 ++++++++++++++++++-
 9 files changed, 66 insertions(+), 19 deletions(-)
 create mode 100644 test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt
 create mode 100644 test/parse-outputs/expected/oi2-expected/oi2-expected.txt
 create mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt
 create mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt
 create mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt
 create mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt

diff --git a/config/config.yaml b/config/config.yaml
index b87bcd45..79a9912a 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -45,13 +45,13 @@ container_registry:
 algorithms:
       - name: "pathlinker"
         params:
-              include: true
+              include: false
               run1:
                   k: range(100,201,100)
 
       - name: "omicsintegrator1"
         params:
-              include: true
+              include: false
               run1:
                   b: [5, 6]
                   w: np.linspace(0,5,2)
@@ -69,7 +69,7 @@ algorithms:
 
       - name: "meo"
         params:
-              include: true
+              include: false
               run1:
                   max_path_length: [3]
                   local_search: ["Yes"]
@@ -77,18 +77,18 @@ algorithms:
 
       - name: "mincostflow"
         params:
-              include: true
+              include: false
               run1:
                   flow: [1] # The flow must be an int
                   capacity: [1]
 
       - name: "allpairs"
         params:
-              include: true
+              include: false
 
       - name: "domino"
         params:
-              include: true
+              include: false
               run1:
                   slice_threshold: [0.3]
                   module_threshold: [0.05]
@@ -149,28 +149,28 @@ reconstruction_settings:
 analysis:
       # Create one summary per pathway file and a single summary table for all pathways for each dataset
       summary:
-        include: true
+        include: false
       # Create output files for each pathway that can be visualized with GraphSpace
       graphspace:
-        include: true
+        include: false
       # Create Cytoscape session file with all pathway graphs for each dataset
       cytoscape:
-        include: true
+        include: false
       # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
       ml:
         # ml analysis per dataset
-        include: true
+        include: false
         # adds ml analysis per algorithm output
         # only runs for algorithms with multiple parameter combinations chosen
-        aggregate_per_algorithm: true
+        aggregate_per_algorithm: false
         # specify how many principal components to calculate
         components: 2
         # boolean to show the labels on the pca graph
-        labels: true
+        labels: false
         # 'ward', 'complete', 'average', 'single'
         # if linkage: ward, must use metric: euclidean
         linkage: 'ward'
         # 'euclidean', 'manhattan', 'cosine'
         metric: 'euclidean'
       evaluation:
-        include: true
+        include: false
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index ed0d5b56..1e1e2d20 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -148,14 +148,22 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         """
         # Omicsintegrator2 returns a single line file if no network is found
         num_lines = sum(1 for line in open(raw_pathway_file))
+        df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
+        print(df)
+        # Omicsintegrator2 has corrupted output, list of correct column names and order
+        correct_columns = ['protein1', 'protein2', 'cost', 'in_solution']
+
         if num_lines < 2:
             df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
         else:
             df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
-            df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
-            df = df.take([0, 1], axis=1)
-            df = add_rank_column(df)
-            df = reinsert_direction_col_undirected(df)
-            df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+            if (len(df.columns) == len(correct_columns)) and all(df.columns == correct_columns):
+                df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line 
+                df = df.take([0, 1], axis=1)
+                df = add_rank_column(df)
+                df = reinsert_direction_col_undirected(df)
+                df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
+            else: 
+                df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
 
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt b/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt
new file mode 100644
index 00000000..a1a76651
--- /dev/null
+++ b/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt
@@ -0,0 +1 @@
+Node1	Node2	Rank	Direction
diff --git a/test/parse-outputs/expected/oi2-expected/oi2-expected.txt b/test/parse-outputs/expected/oi2-expected/oi2-expected.txt
new file mode 100644
index 00000000..e34eeaff
--- /dev/null
+++ b/test/parse-outputs/expected/oi2-expected/oi2-expected.txt
@@ -0,0 +1,3 @@
+Node1	Node2	Rank	Direction
+B	A	1	U
+B	C	1	U
diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt
new file mode 100644
index 00000000..11bda2af
--- /dev/null
+++ b/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt
@@ -0,0 +1,3 @@
+protein1	protein2	cost	in_solution
+B	A	0.52	True
+B	C	0.73	True
\ No newline at end of file
diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt
new file mode 100644
index 00000000..30a5f772
--- /dev/null
+++ b/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt
@@ -0,0 +1 @@
+protein1	protein2
diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt
new file mode 100644
index 00000000..6ed53c89
--- /dev/null
+++ b/test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt
@@ -0,0 +1,3 @@
+protein1	protein2	cost
+B	A	0.52
+B	C	0.73
\ No newline at end of file
diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt
new file mode 100644
index 00000000..bde8f08c
--- /dev/null
+++ b/test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt
@@ -0,0 +1,3 @@
+protein1	protein2	in_solution	cost
+B	A	True	0.52
+B	C	True	0.73
\ No newline at end of file
diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py
index 60763d13..b636c2cd 100644
--- a/test/parse-outputs/test_parse_outputs.py
+++ b/test/parse-outputs/test_parse_outputs.py
@@ -6,6 +6,8 @@
 INDIR = "test/parse-outputs/input/"
 OUTDIR = "test/parse-outputs/output/"
 EXPDIR = "test/parse-outputs/expected/"
+RAW_PATHS_INDIR = 'test/parse-outputs/input/oi2-raw-pathways/'
+RAW_PATHS_EXPDIR = 'test/parse-outputs/expected/oi2-expected/'
 
 # DOMINO input is the concatenated module_0.html and module_1.html file from
 # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt
@@ -13,7 +15,6 @@
 
 algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino']
 
-
 class TestParseOutputs:
     @classmethod
     def setup_class(cls):
@@ -37,3 +38,27 @@ def test_empty_file(self):
 
             runner.parse_output(algo, test_file, out_file)
             assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)
+
+    def test_oi2_correct_parse_output(self):
+        test_file = RAW_PATHS_INDIR + f"oi2-correct.txt"
+        out_file = OUTDIR + f"oi2-correct-pathway.txt"
+        runner.parse_output('omicsintegrator2', test_file, out_file)
+        assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected.txt", shallow=False)
+
+    def test_oi2_empty_parse_output(self):
+        test_file = RAW_PATHS_INDIR + f"oi2-empty.txt"
+        out_file = OUTDIR + f"oi2-empty-pathway.txt"
+        runner.parse_output('omicsintegrator2', test_file, out_file)
+        assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)
+
+    def test_oi2_miss_insolution_parse_output(self):
+        test_file = RAW_PATHS_INDIR + f"oi2-miss-insolution.txt"
+        out_file = OUTDIR + f"oi2-miss-insolution-pathway.txt"
+        runner.parse_output('omicsintegrator2', test_file, out_file)
+        assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)
+
+    def test_oi2_wrong_order_parse_output(self):
+        test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt"
+        out_file = OUTDIR + f"oi2-wrong-order-pathway.txt"
+        runner.parse_output('omicsintegrator2', test_file, out_file)
+        assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)
\ No newline at end of file

From d786ec9f600742aa9a742f3e23b6c57a6993d768 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 3 Sep 2024 13:33:09 -0500
Subject: [PATCH 02/10] precommit

---
 spras/omicsintegrator2.py                | 4 ++--
 test/parse-outputs/test_parse_outputs.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 1e1e2d20..d02f793c 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -158,12 +158,12 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         else:
             df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
             if (len(df.columns) == len(correct_columns)) and all(df.columns == correct_columns):
-                df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line 
+                df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
                 df = df.take([0, 1], axis=1)
                 df = add_rank_column(df)
                 df = reinsert_direction_col_undirected(df)
                 df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
-            else: 
+            else:
                 df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
 
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py
index b636c2cd..5da74ec5 100644
--- a/test/parse-outputs/test_parse_outputs.py
+++ b/test/parse-outputs/test_parse_outputs.py
@@ -61,4 +61,4 @@ def test_oi2_wrong_order_parse_output(self):
         test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt"
         out_file = OUTDIR + f"oi2-wrong-order-pathway.txt"
         runner.parse_output('omicsintegrator2', test_file, out_file)
-        assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)
\ No newline at end of file
+        assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)

From b202639b8a8bc07d3faab7820089996b6136207c Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 3 Sep 2024 13:39:34 -0500
Subject: [PATCH 03/10] updated files being checked:

---
 .../oi2-expected/oi2-expected-empty.txt       |  1 -
 .../expected/oi2-expected/oi2-expected.txt    |  3 ---
 .../input/oi2-raw-pathways/oi2-correct.txt    |  3 ---
 .../input/oi2-raw-pathways/oi2-empty.txt      |  1 -
 test/parse-outputs/test_parse_outputs.py      | 19 ++++---------------
 5 files changed, 4 insertions(+), 23 deletions(-)
 delete mode 100644 test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt
 delete mode 100644 test/parse-outputs/expected/oi2-expected/oi2-expected.txt
 delete mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt
 delete mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt

diff --git a/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt b/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt
deleted file mode 100644
index a1a76651..00000000
--- a/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt
+++ /dev/null
@@ -1 +0,0 @@
-Node1	Node2	Rank	Direction
diff --git a/test/parse-outputs/expected/oi2-expected/oi2-expected.txt b/test/parse-outputs/expected/oi2-expected/oi2-expected.txt
deleted file mode 100644
index e34eeaff..00000000
--- a/test/parse-outputs/expected/oi2-expected/oi2-expected.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-Node1	Node2	Rank	Direction
-B	A	1	U
-B	C	1	U
diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt
deleted file mode 100644
index 11bda2af..00000000
--- a/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-protein1	protein2	cost	in_solution
-B	A	0.52	True
-B	C	0.73	True
\ No newline at end of file
diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt
deleted file mode 100644
index 30a5f772..00000000
--- a/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt
+++ /dev/null
@@ -1 +0,0 @@
-protein1	protein2
diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py
index 5da74ec5..b5ee7539 100644
--- a/test/parse-outputs/test_parse_outputs.py
+++ b/test/parse-outputs/test_parse_outputs.py
@@ -7,7 +7,6 @@
 OUTDIR = "test/parse-outputs/output/"
 EXPDIR = "test/parse-outputs/expected/"
 RAW_PATHS_INDIR = 'test/parse-outputs/input/oi2-raw-pathways/'
-RAW_PATHS_EXPDIR = 'test/parse-outputs/expected/oi2-expected/'
 
 # DOMINO input is the concatenated module_0.html and module_1.html file from
 # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt
@@ -39,26 +38,16 @@ def test_empty_file(self):
             runner.parse_output(algo, test_file, out_file)
             assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)
 
-    def test_oi2_correct_parse_output(self):
-        test_file = RAW_PATHS_INDIR + f"oi2-correct.txt"
-        out_file = OUTDIR + f"oi2-correct-pathway.txt"
-        runner.parse_output('omicsintegrator2', test_file, out_file)
-        assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected.txt", shallow=False)
-
-    def test_oi2_empty_parse_output(self):
-        test_file = RAW_PATHS_INDIR + f"oi2-empty.txt"
-        out_file = OUTDIR + f"oi2-empty-pathway.txt"
-        runner.parse_output('omicsintegrator2', test_file, out_file)
-        assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)
-
     def test_oi2_miss_insolution_parse_output(self):
         test_file = RAW_PATHS_INDIR + f"oi2-miss-insolution.txt"
         out_file = OUTDIR + f"oi2-miss-insolution-pathway.txt"
+
         runner.parse_output('omicsintegrator2', test_file, out_file)
-        assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)
+        assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False)
 
     def test_oi2_wrong_order_parse_output(self):
         test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt"
         out_file = OUTDIR + f"oi2-wrong-order-pathway.txt"
+
         runner.parse_output('omicsintegrator2', test_file, out_file)
-        assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False)
+        assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False)

From ea88614ccc732dca00eb94737a1b8ff5a8c334bd Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 3 Sep 2024 14:24:26 -0500
Subject: [PATCH 04/10] fixed error oi2 error

---
 spras/omicsintegrator2.py                | 4 +---
 test/parse-outputs/test_parse_outputs.py | 5 +++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index d02f793c..98401f8e 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -148,9 +148,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         """
         # Omicsintegrator2 returns a single line file if no network is found
         num_lines = sum(1 for line in open(raw_pathway_file))
-        df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
-        print(df)
-        # Omicsintegrator2 has corrupted output, list of correct column names and order
+        # Omicsintegrator2 has corrupted output; list of correct column names and order
         correct_columns = ['protein1', 'protein2', 'cost', 'in_solution']
 
         if num_lines < 2:
diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py
index b5ee7539..ad9c3257 100644
--- a/test/parse-outputs/test_parse_outputs.py
+++ b/test/parse-outputs/test_parse_outputs.py
@@ -14,6 +14,7 @@
 
 algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino']
 
+
 class TestParseOutputs:
     @classmethod
     def setup_class(cls):
@@ -38,14 +39,14 @@ def test_empty_file(self):
             runner.parse_output(algo, test_file, out_file)
             assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)
 
-    def test_oi2_miss_insolution_parse_output(self):
+    def test_oi2_miss_insolution(self):
         test_file = RAW_PATHS_INDIR + f"oi2-miss-insolution.txt"
         out_file = OUTDIR + f"oi2-miss-insolution-pathway.txt"
 
         runner.parse_output('omicsintegrator2', test_file, out_file)
         assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False)
 
-    def test_oi2_wrong_order_parse_output(self):
+    def test_oi2_wrong_order(self):
         test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt"
         out_file = OUTDIR + f"oi2-wrong-order-pathway.txt"
 

From 5623ebc836b34af9cbd53c511094bca278cbd8f3 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 3 Sep 2024 14:53:24 -0500
Subject: [PATCH 05/10] fix config file

---
 config/config.yaml | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 79a9912a..b87bcd45 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -45,13 +45,13 @@ container_registry:
 algorithms:
       - name: "pathlinker"
         params:
-              include: false
+              include: true
               run1:
                   k: range(100,201,100)
 
       - name: "omicsintegrator1"
         params:
-              include: false
+              include: true
               run1:
                   b: [5, 6]
                   w: np.linspace(0,5,2)
@@ -69,7 +69,7 @@ algorithms:
 
       - name: "meo"
         params:
-              include: false
+              include: true
               run1:
                   max_path_length: [3]
                   local_search: ["Yes"]
@@ -77,18 +77,18 @@ algorithms:
 
       - name: "mincostflow"
         params:
-              include: false
+              include: true
               run1:
                   flow: [1] # The flow must be an int
                   capacity: [1]
 
       - name: "allpairs"
         params:
-              include: false
+              include: true
 
       - name: "domino"
         params:
-              include: false
+              include: true
               run1:
                   slice_threshold: [0.3]
                   module_threshold: [0.05]
@@ -149,28 +149,28 @@ reconstruction_settings:
 analysis:
       # Create one summary per pathway file and a single summary table for all pathways for each dataset
       summary:
-        include: false
+        include: true
       # Create output files for each pathway that can be visualized with GraphSpace
       graphspace:
-        include: false
+        include: true
       # Create Cytoscape session file with all pathway graphs for each dataset
       cytoscape:
-        include: false
+        include: true
       # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
       ml:
         # ml analysis per dataset
-        include: false
+        include: true
         # adds ml analysis per algorithm output
         # only runs for algorithms with multiple parameter combinations chosen
-        aggregate_per_algorithm: false
+        aggregate_per_algorithm: true
         # specify how many principal components to calculate
         components: 2
         # boolean to show the labels on the pca graph
-        labels: false
+        labels: true
         # 'ward', 'complete', 'average', 'single'
         # if linkage: ward, must use metric: euclidean
         linkage: 'ward'
         # 'euclidean', 'manhattan', 'cosine'
         metric: 'euclidean'
       evaluation:
-        include: false
+        include: true

From cb81e633b1cff8dfc4b0ad88d30544963c18edc8 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 3 Sep 2024 15:28:02 -0500
Subject: [PATCH 06/10] aggregate per algo set to false

---
 config/config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/config.yaml b/config/config.yaml
index b87bcd45..2a43ce89 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -162,7 +162,7 @@ analysis:
         include: true
         # adds ml analysis per algorithm output
         # only runs for algorithms with multiple parameter combinations chosen
-        aggregate_per_algorithm: true
+        aggregate_per_algorithm: false
         # specify how many principal components to calculate
         components: 2
         # boolean to show the labels on the pca graph

From 8d9ffead71ed567a70e3b821ebb31be61242ee86 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 3 Sep 2024 15:38:43 -0500
Subject: [PATCH 07/10] update agg per algo

---
 config/config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/config.yaml b/config/config.yaml
index 2a43ce89..b87bcd45 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -162,7 +162,7 @@ analysis:
         include: true
         # adds ml analysis per algorithm output
         # only runs for algorithms with multiple parameter combinations chosen
-        aggregate_per_algorithm: false
+        aggregate_per_algorithm: true
         # specify how many principal components to calculate
         components: 2
         # boolean to show the labels on the pca graph

From 0ae2bbe7c9b7c5d14b7831fcf7bfa108dc886236 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Wed, 4 Sep 2024 17:57:12 -0500
Subject: [PATCH 08/10] updated code to allow for random order heading, updated
 test cases, and cleaned up code

---
 spras/omicsintegrator2.py                            |  8 ++++----
 ...omicsintegrator2-miss-insolution-raw-pathway.txt} |  0
 .../omicsintegrator2-wrong-order-raw-pathway.txt}    |  0
 test/parse-outputs/test_parse_outputs.py             | 12 ++++++------
 4 files changed, 10 insertions(+), 10 deletions(-)
 rename test/parse-outputs/input/{oi2-raw-pathways/oi2-miss-insolution.txt => omicsintegrator-edge-cases/omicsintegrator2-miss-insolution-raw-pathway.txt} (100%)
 rename test/parse-outputs/input/{oi2-raw-pathways/oi2-wrong-order.txt => omicsintegrator-edge-cases/omicsintegrator2-wrong-order-raw-pathway.txt} (100%)

diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 98401f8e..450aa258 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -148,20 +148,20 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         """
         # Omicsintegrator2 returns a single line file if no network is found
         num_lines = sum(1 for line in open(raw_pathway_file))
-        # Omicsintegrator2 has corrupted output; list of correct column names and order
-        correct_columns = ['protein1', 'protein2', 'cost', 'in_solution']
+        # Omicsintegrator2 has corrupted output; list of correct column names
+        sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2']
 
         if num_lines < 2:
             df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
         else:
             df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
-            if (len(df.columns) == len(correct_columns)) and all(df.columns == correct_columns):
+            if sorted(df.columns) == sorted_correct_column_names: # if column header names are all correct
                 df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
                 df = df.take([0, 1], axis=1)
                 df = add_rank_column(df)
                 df = reinsert_direction_col_undirected(df)
                 df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
-            else:
+            else: # corrupted data
                 df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
 
         df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t')
diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt b/test/parse-outputs/input/omicsintegrator-edge-cases/omicsintegrator2-miss-insolution-raw-pathway.txt
similarity index 100%
rename from test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt
rename to test/parse-outputs/input/omicsintegrator-edge-cases/omicsintegrator2-miss-insolution-raw-pathway.txt
diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt b/test/parse-outputs/input/omicsintegrator-edge-cases/omicsintegrator2-wrong-order-raw-pathway.txt
similarity index 100%
rename from test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt
rename to test/parse-outputs/input/omicsintegrator-edge-cases/omicsintegrator2-wrong-order-raw-pathway.txt
diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py
index ad9c3257..7dfe270e 100644
--- a/test/parse-outputs/test_parse_outputs.py
+++ b/test/parse-outputs/test_parse_outputs.py
@@ -6,7 +6,7 @@
 INDIR = "test/parse-outputs/input/"
 OUTDIR = "test/parse-outputs/output/"
 EXPDIR = "test/parse-outputs/expected/"
-RAW_PATHS_INDIR = 'test/parse-outputs/input/oi2-raw-pathways/'
+OI2_EDGE_CASES_INDIR = 'test/parse-outputs/input/omicsintegrator-edge-cases/'
 
 # DOMINO input is the concatenated module_0.html and module_1.html file from
 # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt
@@ -40,15 +40,15 @@ def test_empty_file(self):
             assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False)
 
     def test_oi2_miss_insolution(self):
-        test_file = RAW_PATHS_INDIR + f"oi2-miss-insolution.txt"
-        out_file = OUTDIR + f"oi2-miss-insolution-pathway.txt"
+        test_file = OI2_EDGE_CASES_INDIR + f"omicsintegrator2-miss-insolution-raw-pathway.txt"
+        out_file = OUTDIR + f"omicsintegrator2-miss-insolution-pathway.txt"
 
         runner.parse_output('omicsintegrator2', test_file, out_file)
         assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False)
 
     def test_oi2_wrong_order(self):
-        test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt"
-        out_file = OUTDIR + f"oi2-wrong-order-pathway.txt"
+        test_file = OI2_EDGE_CASES_INDIR + f"omicsintegrator2-wrong-order-raw-pathway.txt"
+        out_file = OUTDIR + f"omicsintegrator2-wrong-order-pathway.txt"
 
         runner.parse_output('omicsintegrator2', test_file, out_file)
-        assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False)
+        assert filecmp.cmp(out_file, EXPDIR + f"omicsintegrator2-pathway-expected.txt", shallow=False)

From c91373c89b9913a9be38ffa211d4b1f43f2ba894 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 17 Sep 2024 17:56:34 -0500
Subject: [PATCH 09/10] updated commenting on oi2 wrapper code

---
 config/config.yaml        | 18 +++++++++---------
 spras/omicsintegrator2.py |  6 +++---
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index b87bcd45..53a3317d 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -45,13 +45,13 @@ container_registry:
 algorithms:
       - name: "pathlinker"
         params:
-              include: true
+              include: false
               run1:
                   k: range(100,201,100)
 
       - name: "omicsintegrator1"
         params:
-              include: true
+              include: false
               run1:
                   b: [5, 6]
                   w: np.linspace(0,5,2)
@@ -69,7 +69,7 @@ algorithms:
 
       - name: "meo"
         params:
-              include: true
+              include: false
               run1:
                   max_path_length: [3]
                   local_search: ["Yes"]
@@ -77,18 +77,18 @@ algorithms:
 
       - name: "mincostflow"
         params:
-              include: true
+              include: false
               run1:
                   flow: [1] # The flow must be an int
                   capacity: [1]
 
       - name: "allpairs"
         params:
-              include: true
+              include: false
 
       - name: "domino"
         params:
-              include: true
+              include: false
               run1:
                   slice_threshold: [0.3]
                   module_threshold: [0.05]
@@ -152,14 +152,14 @@ analysis:
         include: true
       # Create output files for each pathway that can be visualized with GraphSpace
       graphspace:
-        include: true
+        include: false
       # Create Cytoscape session file with all pathway graphs for each dataset
       cytoscape:
-        include: true
+        include: false
       # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
       ml:
         # ml analysis per dataset
-        include: true
+        include: false
         # adds ml analysis per algorithm output
         # only runs for algorithms with multiple parameter combinations chosen
         aggregate_per_algorithm: true
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 450aa258..19a8bd14 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -149,15 +149,15 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # Omicsintegrator2 returns a single line file if no network is found
         num_lines = sum(1 for line in open(raw_pathway_file))
         # Omicsintegrator2 has corrupted output; list of correct column names
-        sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2']
+        sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2'] # the order of edge attributes in the NetworkX graph is not guaranteed.
 
         if num_lines < 2:
             df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
         else:
             df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
             if sorted(df.columns) == sorted_correct_column_names: # if column header names are all correct
-                df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
-                df = df.take([0, 1], axis=1)
+                df = df[df['in_solution'] == True]  # the 'in_solution' column exists when the forest is not empty.
+                df = df.take([0, 1], axis=1) # the first two columns in the df will be 'protein1' and 'protein2', followed by the edge attributes.
                 df = add_rank_column(df)
                 df = reinsert_direction_col_undirected(df)
                 df.columns = ['Node1', 'Node2', 'Rank', "Direction"]

From 2e2c5c1b64cd075a3fce41347655d18ffc11590f Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 17 Sep 2024 17:57:34 -0500
Subject: [PATCH 10/10] revert config.yamk

---
 config/config.yaml | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 53a3317d..b87bcd45 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -45,13 +45,13 @@ container_registry:
 algorithms:
       - name: "pathlinker"
         params:
-              include: false
+              include: true
               run1:
                   k: range(100,201,100)
 
       - name: "omicsintegrator1"
         params:
-              include: false
+              include: true
               run1:
                   b: [5, 6]
                   w: np.linspace(0,5,2)
@@ -69,7 +69,7 @@ algorithms:
 
       - name: "meo"
         params:
-              include: false
+              include: true
               run1:
                   max_path_length: [3]
                   local_search: ["Yes"]
@@ -77,18 +77,18 @@ algorithms:
 
       - name: "mincostflow"
         params:
-              include: false
+              include: true
               run1:
                   flow: [1] # The flow must be an int
                   capacity: [1]
 
       - name: "allpairs"
         params:
-              include: false
+              include: true
 
       - name: "domino"
         params:
-              include: false
+              include: true
               run1:
                   slice_threshold: [0.3]
                   module_threshold: [0.05]
@@ -152,14 +152,14 @@ analysis:
         include: true
       # Create output files for each pathway that can be visualized with GraphSpace
       graphspace:
-        include: false
+        include: true
       # Create Cytoscape session file with all pathway graphs for each dataset
       cytoscape:
-        include: false
+        include: true
       # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
       ml:
         # ml analysis per dataset
-        include: false
+        include: true
         # adds ml analysis per algorithm output
         # only runs for algorithms with multiple parameter combinations chosen
         aggregate_per_algorithm: true