From 059c0fc806e83a930de2fe0e0d8613b6461d571b Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 3 Sep 2024 13:30:11 -0500 Subject: [PATCH 01/10] first pass at fixing oi2 error --- config/config.yaml | 26 +++++++++--------- spras/omicsintegrator2.py | 18 +++++++++---- .../oi2-expected/oi2-expected-empty.txt | 1 + .../expected/oi2-expected/oi2-expected.txt | 3 +++ .../input/oi2-raw-pathways/oi2-correct.txt | 3 +++ .../input/oi2-raw-pathways/oi2-empty.txt | 1 + .../oi2-raw-pathways/oi2-miss-insolution.txt | 3 +++ .../oi2-raw-pathways/oi2-wrong-order.txt | 3 +++ test/parse-outputs/test_parse_outputs.py | 27 ++++++++++++++++++- 9 files changed, 66 insertions(+), 19 deletions(-) create mode 100644 test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt create mode 100644 test/parse-outputs/expected/oi2-expected/oi2-expected.txt create mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt create mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt create mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt create mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt diff --git a/config/config.yaml b/config/config.yaml index b87bcd45..79a9912a 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -45,13 +45,13 @@ container_registry: algorithms: - name: "pathlinker" params: - include: true + include: false run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: true + include: false run1: b: [5, 6] w: np.linspace(0,5,2) @@ -69,7 +69,7 @@ algorithms: - name: "meo" params: - include: true + include: false run1: max_path_length: [3] local_search: ["Yes"] @@ -77,18 +77,18 @@ algorithms: - name: "mincostflow" params: - include: true + include: false run1: flow: [1] # The flow must be an int capacity: [1] - name: "allpairs" params: - include: true + include: false - name: "domino" params: - include: true + include: false run1: slice_threshold: [0.3] module_threshold: [0.05] @@ -149,28 +149,28 @@ reconstruction_settings: analysis: # Create one summary per pathway file and a single summary table for all pathways for each dataset summary: - include: true + include: false # Create output files for each pathway that can be visualized with GraphSpace graphspace: - include: true + include: false # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: - include: true + include: false # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset ml: # ml analysis per dataset - include: true + include: false # adds ml analysis per algorithm output # only runs for algorithms with multiple parameter combinations chosen - aggregate_per_algorithm: true + aggregate_per_algorithm: false # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph - labels: true + labels: false # 'ward', 'complete', 'average', 'single' # if linkage: ward, must use metric: euclidean linkage: 'ward' # 'euclidean', 'manhattan', 'cosine' metric: 'euclidean' evaluation: - include: true + include: false diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index ed0d5b56..1e1e2d20 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -148,14 +148,22 @@ def parse_output(raw_pathway_file, standardized_pathway_file): """ # Omicsintegrator2 returns a single line file if no network is found num_lines = sum(1 for line in open(raw_pathway_file)) + df = pd.read_csv(raw_pathway_file, sep='\t', header=0) + print(df) + # Omicsintegrator2 has corrupted output, list of correct column names and order + correct_columns = ['protein1', 'protein2', 'cost', 'in_solution'] + if num_lines < 2: df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) else: df = pd.read_csv(raw_pathway_file, sep='\t', header=0) - df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line - df = df.take([0, 1], axis=1) - df = add_rank_column(df) - df = reinsert_direction_col_undirected(df) - df.columns = ['Node1', 'Node2', 'Rank', "Direction"] + if (len(df.columns) == len(correct_columns)) and all(df.columns == correct_columns): + df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line + df = df.take([0, 1], axis=1) + df = add_rank_column(df) + df = reinsert_direction_col_undirected(df) + df.columns = ['Node1', 'Node2', 'Rank', "Direction"] + else: + df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t') diff --git a/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt b/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt new file mode 100644 index 00000000..a1a76651 --- /dev/null +++ b/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt @@ -0,0 +1 @@ +Node1 Node2 Rank Direction diff --git a/test/parse-outputs/expected/oi2-expected/oi2-expected.txt b/test/parse-outputs/expected/oi2-expected/oi2-expected.txt new file mode 100644 index 00000000..e34eeaff --- /dev/null +++ b/test/parse-outputs/expected/oi2-expected/oi2-expected.txt @@ -0,0 +1,3 @@ +Node1 Node2 Rank Direction +B A 1 U +B C 1 U diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt new file mode 100644 index 00000000..11bda2af --- /dev/null +++ b/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt @@ -0,0 +1,3 @@ +protein1 protein2 cost in_solution +B A 0.52 True +B C 0.73 True \ No newline at end of file diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt new file mode 100644 index 00000000..30a5f772 --- /dev/null +++ b/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt @@ -0,0 +1 @@ +protein1 protein2 diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt new file mode 100644 index 00000000..6ed53c89 --- /dev/null +++ b/test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt @@ -0,0 +1,3 @@ +protein1 protein2 cost +B A 0.52 +B C 0.73 \ No newline at end of file diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt new file mode 100644 index 00000000..bde8f08c --- /dev/null +++ b/test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt @@ -0,0 +1,3 @@ +protein1 protein2 in_solution cost +B A True 0.52 +B C True 0.73 \ No newline at end of file diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index 60763d13..b636c2cd 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -6,6 +6,8 @@ INDIR = "test/parse-outputs/input/" OUTDIR = "test/parse-outputs/output/" EXPDIR = "test/parse-outputs/expected/" +RAW_PATHS_INDIR = 'test/parse-outputs/input/oi2-raw-pathways/' +RAW_PATHS_EXPDIR = 'test/parse-outputs/expected/oi2-expected/' # DOMINO input is the concatenated module_0.html and module_1.html file from # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt @@ -13,7 +15,6 @@ algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino'] - class TestParseOutputs: @classmethod def setup_class(cls): @@ -37,3 +38,27 @@ def test_empty_file(self): runner.parse_output(algo, test_file, out_file) assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False) + + def test_oi2_correct_parse_output(self): + test_file = RAW_PATHS_INDIR + f"oi2-correct.txt" + out_file = OUTDIR + f"oi2-correct-pathway.txt" + runner.parse_output('omicsintegrator2', test_file, out_file) + assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected.txt", shallow=False) + + def test_oi2_empty_parse_output(self): + test_file = RAW_PATHS_INDIR + f"oi2-empty.txt" + out_file = OUTDIR + f"oi2-empty-pathway.txt" + runner.parse_output('omicsintegrator2', test_file, out_file) + assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False) + + def test_oi2_miss_insolution_parse_output(self): + test_file = RAW_PATHS_INDIR + f"oi2-miss-insolution.txt" + out_file = OUTDIR + f"oi2-miss-insolution-pathway.txt" + runner.parse_output('omicsintegrator2', test_file, out_file) + assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False) + + def test_oi2_wrong_order_parse_output(self): + test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt" + out_file = OUTDIR + f"oi2-wrong-order-pathway.txt" + runner.parse_output('omicsintegrator2', test_file, out_file) + assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False) \ No newline at end of file From d786ec9f600742aa9a742f3e23b6c57a6993d768 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 3 Sep 2024 13:33:09 -0500 Subject: [PATCH 02/10] precommit --- spras/omicsintegrator2.py | 4 ++-- test/parse-outputs/test_parse_outputs.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index 1e1e2d20..d02f793c 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -158,12 +158,12 @@ def parse_output(raw_pathway_file, standardized_pathway_file): else: df = pd.read_csv(raw_pathway_file, sep='\t', header=0) if (len(df.columns) == len(correct_columns)) and all(df.columns == correct_columns): - df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line + df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line df = df.take([0, 1], axis=1) df = add_rank_column(df) df = reinsert_direction_col_undirected(df) df.columns = ['Node1', 'Node2', 'Rank', "Direction"] - else: + else: df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t') diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index b636c2cd..5da74ec5 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -61,4 +61,4 @@ def test_oi2_wrong_order_parse_output(self): test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt" out_file = OUTDIR + f"oi2-wrong-order-pathway.txt" runner.parse_output('omicsintegrator2', test_file, out_file) - assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False) \ No newline at end of file + assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False) From b202639b8a8bc07d3faab7820089996b6136207c Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 3 Sep 2024 13:39:34 -0500 Subject: [PATCH 03/10] updated files being checked: --- .../oi2-expected/oi2-expected-empty.txt | 1 - .../expected/oi2-expected/oi2-expected.txt | 3 --- .../input/oi2-raw-pathways/oi2-correct.txt | 3 --- .../input/oi2-raw-pathways/oi2-empty.txt | 1 - test/parse-outputs/test_parse_outputs.py | 19 ++++--------------- 5 files changed, 4 insertions(+), 23 deletions(-) delete mode 100644 test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt delete mode 100644 test/parse-outputs/expected/oi2-expected/oi2-expected.txt delete mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt delete mode 100644 test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt diff --git a/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt b/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt deleted file mode 100644 index a1a76651..00000000 --- a/test/parse-outputs/expected/oi2-expected/oi2-expected-empty.txt +++ /dev/null @@ -1 +0,0 @@ -Node1 Node2 Rank Direction diff --git a/test/parse-outputs/expected/oi2-expected/oi2-expected.txt b/test/parse-outputs/expected/oi2-expected/oi2-expected.txt deleted file mode 100644 index e34eeaff..00000000 --- a/test/parse-outputs/expected/oi2-expected/oi2-expected.txt +++ /dev/null @@ -1,3 +0,0 @@ -Node1 Node2 Rank Direction -B A 1 U -B C 1 U diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt deleted file mode 100644 index 11bda2af..00000000 --- a/test/parse-outputs/input/oi2-raw-pathways/oi2-correct.txt +++ /dev/null @@ -1,3 +0,0 @@ -protein1 protein2 cost in_solution -B A 0.52 True -B C 0.73 True \ No newline at end of file diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt b/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt deleted file mode 100644 index 30a5f772..00000000 --- a/test/parse-outputs/input/oi2-raw-pathways/oi2-empty.txt +++ /dev/null @@ -1 +0,0 @@ -protein1 protein2 diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index 5da74ec5..b5ee7539 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -7,7 +7,6 @@ OUTDIR = "test/parse-outputs/output/" EXPDIR = "test/parse-outputs/expected/" RAW_PATHS_INDIR = 'test/parse-outputs/input/oi2-raw-pathways/' -RAW_PATHS_EXPDIR = 'test/parse-outputs/expected/oi2-expected/' # DOMINO input is the concatenated module_0.html and module_1.html file from # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt @@ -39,26 +38,16 @@ def test_empty_file(self): runner.parse_output(algo, test_file, out_file) assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False) - def test_oi2_correct_parse_output(self): - test_file = RAW_PATHS_INDIR + f"oi2-correct.txt" - out_file = OUTDIR + f"oi2-correct-pathway.txt" - runner.parse_output('omicsintegrator2', test_file, out_file) - assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected.txt", shallow=False) - - def test_oi2_empty_parse_output(self): - test_file = RAW_PATHS_INDIR + f"oi2-empty.txt" - out_file = OUTDIR + f"oi2-empty-pathway.txt" - runner.parse_output('omicsintegrator2', test_file, out_file) - assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False) - def test_oi2_miss_insolution_parse_output(self): test_file = RAW_PATHS_INDIR + f"oi2-miss-insolution.txt" out_file = OUTDIR + f"oi2-miss-insolution-pathway.txt" + runner.parse_output('omicsintegrator2', test_file, out_file) - assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False) + assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False) def test_oi2_wrong_order_parse_output(self): test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt" out_file = OUTDIR + f"oi2-wrong-order-pathway.txt" + runner.parse_output('omicsintegrator2', test_file, out_file) - assert filecmp.cmp(out_file, RAW_PATHS_EXPDIR + f"oi2-expected-empty.txt", shallow=False) + assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False) From ea88614ccc732dca00eb94737a1b8ff5a8c334bd Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 3 Sep 2024 14:24:26 -0500 Subject: [PATCH 04/10] fixed error oi2 error --- spras/omicsintegrator2.py | 4 +--- test/parse-outputs/test_parse_outputs.py | 5 +++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index d02f793c..98401f8e 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -148,9 +148,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file): """ # Omicsintegrator2 returns a single line file if no network is found num_lines = sum(1 for line in open(raw_pathway_file)) - df = pd.read_csv(raw_pathway_file, sep='\t', header=0) - print(df) - # Omicsintegrator2 has corrupted output, list of correct column names and order + # Omicsintegrator2 has corrupted output; list of correct column names and order correct_columns = ['protein1', 'protein2', 'cost', 'in_solution'] if num_lines < 2: diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index b5ee7539..ad9c3257 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -14,6 +14,7 @@ algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino'] + class TestParseOutputs: @classmethod def setup_class(cls): @@ -38,14 +39,14 @@ def test_empty_file(self): runner.parse_output(algo, test_file, out_file) assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False) - def test_oi2_miss_insolution_parse_output(self): + def test_oi2_miss_insolution(self): test_file = RAW_PATHS_INDIR + f"oi2-miss-insolution.txt" out_file = OUTDIR + f"oi2-miss-insolution-pathway.txt" runner.parse_output('omicsintegrator2', test_file, out_file) assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False) - def test_oi2_wrong_order_parse_output(self): + def test_oi2_wrong_order(self): test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt" out_file = OUTDIR + f"oi2-wrong-order-pathway.txt" From 5623ebc836b34af9cbd53c511094bca278cbd8f3 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 3 Sep 2024 14:53:24 -0500 Subject: [PATCH 05/10] fix config file --- config/config.yaml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 79a9912a..b87bcd45 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -45,13 +45,13 @@ container_registry: algorithms: - name: "pathlinker" params: - include: false + include: true run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: false + include: true run1: b: [5, 6] w: np.linspace(0,5,2) @@ -69,7 +69,7 @@ algorithms: - name: "meo" params: - include: false + include: true run1: max_path_length: [3] local_search: ["Yes"] @@ -77,18 +77,18 @@ algorithms: - name: "mincostflow" params: - include: false + include: true run1: flow: [1] # The flow must be an int capacity: [1] - name: "allpairs" params: - include: false + include: true - name: "domino" params: - include: false + include: true run1: slice_threshold: [0.3] module_threshold: [0.05] @@ -149,28 +149,28 @@ reconstruction_settings: analysis: # Create one summary per pathway file and a single summary table for all pathways for each dataset summary: - include: false + include: true # Create output files for each pathway that can be visualized with GraphSpace graphspace: - include: false + include: true # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: - include: false + include: true # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset ml: # ml analysis per dataset - include: false + include: true # adds ml analysis per algorithm output # only runs for algorithms with multiple parameter combinations chosen - aggregate_per_algorithm: false + aggregate_per_algorithm: true # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph - labels: false + labels: true # 'ward', 'complete', 'average', 'single' # if linkage: ward, must use metric: euclidean linkage: 'ward' # 'euclidean', 'manhattan', 'cosine' metric: 'euclidean' evaluation: - include: false + include: true From cb81e633b1cff8dfc4b0ad88d30544963c18edc8 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 3 Sep 2024 15:28:02 -0500 Subject: [PATCH 06/10] aggregate per algo set to false --- config/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.yaml b/config/config.yaml index b87bcd45..2a43ce89 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -162,7 +162,7 @@ analysis: include: true # adds ml analysis per algorithm output # only runs for algorithms with multiple parameter combinations chosen - aggregate_per_algorithm: true + aggregate_per_algorithm: false # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph From 8d9ffead71ed567a70e3b821ebb31be61242ee86 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 3 Sep 2024 15:38:43 -0500 Subject: [PATCH 07/10] update agg per algo --- config/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.yaml b/config/config.yaml index 2a43ce89..b87bcd45 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -162,7 +162,7 @@ analysis: include: true # adds ml analysis per algorithm output # only runs for algorithms with multiple parameter combinations chosen - aggregate_per_algorithm: false + aggregate_per_algorithm: true # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph From 0ae2bbe7c9b7c5d14b7831fcf7bfa108dc886236 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Wed, 4 Sep 2024 17:57:12 -0500 Subject: [PATCH 08/10] updated code to allow for random order heading, updated test cases, and cleaned up code --- spras/omicsintegrator2.py | 8 ++++---- ...omicsintegrator2-miss-insolution-raw-pathway.txt} | 0 .../omicsintegrator2-wrong-order-raw-pathway.txt} | 0 test/parse-outputs/test_parse_outputs.py | 12 ++++++------ 4 files changed, 10 insertions(+), 10 deletions(-) rename test/parse-outputs/input/{oi2-raw-pathways/oi2-miss-insolution.txt => omicsintegrator-edge-cases/omicsintegrator2-miss-insolution-raw-pathway.txt} (100%) rename test/parse-outputs/input/{oi2-raw-pathways/oi2-wrong-order.txt => omicsintegrator-edge-cases/omicsintegrator2-wrong-order-raw-pathway.txt} (100%) diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index 98401f8e..450aa258 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -148,20 +148,20 @@ def parse_output(raw_pathway_file, standardized_pathway_file): """ # Omicsintegrator2 returns a single line file if no network is found num_lines = sum(1 for line in open(raw_pathway_file)) - # Omicsintegrator2 has corrupted output; list of correct column names and order - correct_columns = ['protein1', 'protein2', 'cost', 'in_solution'] + # Omicsintegrator2 has corrupted output; list of correct column names + sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2'] if num_lines < 2: df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) else: df = pd.read_csv(raw_pathway_file, sep='\t', header=0) - if (len(df.columns) == len(correct_columns)) and all(df.columns == correct_columns): + if sorted(df.columns) == sorted_correct_column_names: # if column header names are all correct df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line df = df.take([0, 1], axis=1) df = add_rank_column(df) df = reinsert_direction_col_undirected(df) df.columns = ['Node1', 'Node2', 'Rank', "Direction"] - else: + else: # corrupted data df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t') diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt b/test/parse-outputs/input/omicsintegrator-edge-cases/omicsintegrator2-miss-insolution-raw-pathway.txt similarity index 100% rename from test/parse-outputs/input/oi2-raw-pathways/oi2-miss-insolution.txt rename to test/parse-outputs/input/omicsintegrator-edge-cases/omicsintegrator2-miss-insolution-raw-pathway.txt diff --git a/test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt b/test/parse-outputs/input/omicsintegrator-edge-cases/omicsintegrator2-wrong-order-raw-pathway.txt similarity index 100% rename from test/parse-outputs/input/oi2-raw-pathways/oi2-wrong-order.txt rename to test/parse-outputs/input/omicsintegrator-edge-cases/omicsintegrator2-wrong-order-raw-pathway.txt diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index ad9c3257..7dfe270e 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -6,7 +6,7 @@ INDIR = "test/parse-outputs/input/" OUTDIR = "test/parse-outputs/output/" EXPDIR = "test/parse-outputs/expected/" -RAW_PATHS_INDIR = 'test/parse-outputs/input/oi2-raw-pathways/' +OI2_EDGE_CASES_INDIR = 'test/parse-outputs/input/omicsintegrator-edge-cases/' # DOMINO input is the concatenated module_0.html and module_1.html file from # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt @@ -40,15 +40,15 @@ def test_empty_file(self): assert filecmp.cmp(OUTDIR + f"{algo}-empty-pathway.txt", EXPDIR + f"empty-pathway-expected.txt", shallow=False) def test_oi2_miss_insolution(self): - test_file = RAW_PATHS_INDIR + f"oi2-miss-insolution.txt" - out_file = OUTDIR + f"oi2-miss-insolution-pathway.txt" + test_file = OI2_EDGE_CASES_INDIR + f"omicsintegrator2-miss-insolution-raw-pathway.txt" + out_file = OUTDIR + f"omicsintegrator2-miss-insolution-pathway.txt" runner.parse_output('omicsintegrator2', test_file, out_file) assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False) def test_oi2_wrong_order(self): - test_file = RAW_PATHS_INDIR + f"oi2-wrong-order.txt" - out_file = OUTDIR + f"oi2-wrong-order-pathway.txt" + test_file = OI2_EDGE_CASES_INDIR + f"omicsintegrator2-wrong-order-raw-pathway.txt" + out_file = OUTDIR + f"omicsintegrator2-wrong-order-pathway.txt" runner.parse_output('omicsintegrator2', test_file, out_file) - assert filecmp.cmp(out_file, EXPDIR + f"empty-pathway-expected.txt", shallow=False) + assert filecmp.cmp(out_file, EXPDIR + f"omicsintegrator2-pathway-expected.txt", shallow=False) From c91373c89b9913a9be38ffa211d4b1f43f2ba894 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 17 Sep 2024 17:56:34 -0500 Subject: [PATCH 09/10] updated commenting on oi2 wrapper code --- config/config.yaml | 18 +++++++++--------- spras/omicsintegrator2.py | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index b87bcd45..53a3317d 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -45,13 +45,13 @@ container_registry: algorithms: - name: "pathlinker" params: - include: true + include: false run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: true + include: false run1: b: [5, 6] w: np.linspace(0,5,2) @@ -69,7 +69,7 @@ algorithms: - name: "meo" params: - include: true + include: false run1: max_path_length: [3] local_search: ["Yes"] @@ -77,18 +77,18 @@ algorithms: - name: "mincostflow" params: - include: true + include: false run1: flow: [1] # The flow must be an int capacity: [1] - name: "allpairs" params: - include: true + include: false - name: "domino" params: - include: true + include: false run1: slice_threshold: [0.3] module_threshold: [0.05] @@ -152,14 +152,14 @@ analysis: include: true # Create output files for each pathway that can be visualized with GraphSpace graphspace: - include: true + include: false # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: - include: true + include: false # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset ml: # ml analysis per dataset - include: true + include: false # adds ml analysis per algorithm output # only runs for algorithms with multiple parameter combinations chosen aggregate_per_algorithm: true diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index 450aa258..19a8bd14 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -149,15 +149,15 @@ def parse_output(raw_pathway_file, standardized_pathway_file): # Omicsintegrator2 returns a single line file if no network is found num_lines = sum(1 for line in open(raw_pathway_file)) # Omicsintegrator2 has corrupted output; list of correct column names - sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2'] + sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2'] # the order of edge attributes in the NetworkX graph is not guaranteed. if num_lines < 2: df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) else: df = pd.read_csv(raw_pathway_file, sep='\t', header=0) if sorted(df.columns) == sorted_correct_column_names: # if column header names are all correct - df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line - df = df.take([0, 1], axis=1) + df = df[df['in_solution'] == True] # the 'in_solution' column exists when the forest is not empty. + df = df.take([0, 1], axis=1) # the first two columns in the df will be 'protein1' and 'protein2', followed by the edge attributes. df = add_rank_column(df) df = reinsert_direction_col_undirected(df) df.columns = ['Node1', 'Node2', 'Rank', "Direction"] From 2e2c5c1b64cd075a3fce41347655d18ffc11590f Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 17 Sep 2024 17:57:34 -0500 Subject: [PATCH 10/10] revert config.yamk --- config/config.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 53a3317d..b87bcd45 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -45,13 +45,13 @@ container_registry: algorithms: - name: "pathlinker" params: - include: false + include: true run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: false + include: true run1: b: [5, 6] w: np.linspace(0,5,2) @@ -69,7 +69,7 @@ algorithms: - name: "meo" params: - include: false + include: true run1: max_path_length: [3] local_search: ["Yes"] @@ -77,18 +77,18 @@ algorithms: - name: "mincostflow" params: - include: false + include: true run1: flow: [1] # The flow must be an int capacity: [1] - name: "allpairs" params: - include: false + include: true - name: "domino" params: - include: false + include: true run1: slice_threshold: [0.3] module_threshold: [0.05] @@ -152,14 +152,14 @@ analysis: include: true # Create output files for each pathway that can be visualized with GraphSpace graphspace: - include: false + include: true # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: - include: false + include: true # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset ml: # ml analysis per dataset - include: false + include: true # adds ml analysis per algorithm output # only runs for algorithms with multiple parameter combinations chosen aggregate_per_algorithm: true