diff --git a/marker_alignments/main.py b/marker_alignments/main.py index a2271e6..e02c49d 100644 --- a/marker_alignments/main.py +++ b/marker_alignments/main.py @@ -141,13 +141,13 @@ def main(argv=sys.argv[1:]): header = ["taxon", "coverage"] lines = alignment_store.as_taxon_coverage() elif options.output_type == "taxon_read_and_marker_count": - header = ["taxon", "taxon_num_reads", "taxon_num_markers"] + header = ["taxon", "taxon_num_reads", "taxon_num_markers", "taxon_max_reads_in_marker"] lines = alignment_store.as_taxon_read_and_marker_count() elif options.output_type == "taxon_cpm": header = ["taxon", "cpm"] lines = alignment_store.as_taxon_cpm(options.num_reads) elif options.output_type == "taxon_all": - header = ["taxon", "coverage", "cpm", "taxon_num_reads", "taxon_num_markers"] + header = ["taxon", "coverage", "cpm", "taxon_num_reads", "taxon_num_markers", "taxon_max_reads_in_marker"] lines = alignment_store.as_taxon_all(options.num_reads) field_formats = { @@ -160,6 +160,7 @@ def main(argv=sys.argv[1:]): "coverage" : ":.6f", "taxon_num_reads": ":.6f", "taxon_num_markers": ":d", + "taxon_max_reads_in_marker": ":.6f", } formatter="\t".join(['{' + field_formats[field] +'}' for field in header]) + "\n" with open(options.output_path, 'w') as f: diff --git a/marker_alignments/store.py b/marker_alignments/store.py index c0aba13..2e86694 100644 --- a/marker_alignments/store.py +++ b/marker_alignments/store.py @@ -67,7 +67,7 @@ def end_bulk_write(self): ''' a_cov = "avg(marker_coverage) as coverage" -a_tnm = "sum(marker_read_count) as taxon_num_reads, count(marker) as taxon_num_markers" +a_tnm = "sum(marker_read_count) as taxon_num_reads, count(marker) as taxon_num_markers, max(marker_read_count) as taxon_max_reads_in_marker" a_cpm = "avg(marker_coverage) / (?) * 1000000 as cpm" taxon_coverage_query=taxon_query_template.format(a_cov, marker_coverage_query) taxon_read_and_marker_count_query=taxon_query_template.format(a_tnm, marker_read_count_query) diff --git a/tests/store.py b/tests/store.py index a148c48..68a6c48 100644 --- a/tests/store.py +++ b/tests/store.py @@ -40,10 +40,10 @@ def test_one_read(self): 'marker_coverage': [('taxon_1', 'marker_1', 0.111)], 'marker_cpm': [('taxon_1', 'marker_1', 1.1099999999999999)], 'marker_read_count': [('taxon_1', 'marker_1', 1.0)], - 'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1)], + 'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1, 1.0)], 'taxon_coverage': [('taxon_1', 0.111)], 'taxon_cpm': [('taxon_1', 1.1099999999999999)], - 'taxon_read_and_marker_count': [('taxon_1', 1.0, 1)]} + 'taxon_read_and_marker_count': [('taxon_1', 1.0, 1, 1.0)]} ) def test_one_read_two_markers(self): @@ -60,10 +60,10 @@ def test_one_read_two_markers(self): ('taxon_1', 'marker_2', 3.3300000000000005)], 'marker_read_count': [('taxon_1', 'marker_1', 0.25), ('taxon_1', 'marker_2', 0.75)], - 'taxon_all': [('taxon_1', 0.222, 2.2199999999999998, 1.0, 2)], + 'taxon_all': [('taxon_1', 0.222, 2.2199999999999998, 1.0, 2, 0.75)], 'taxon_coverage': [('taxon_1', 0.222)], 'taxon_cpm': [('taxon_1', 2.2199999999999998)], - 'taxon_read_and_marker_count': [('taxon_1', 1.0, 2)]} + 'taxon_read_and_marker_count': [('taxon_1', 1.0, 2, 0.75)]} ) def test_one_read_two_markers_with_extra_read(self): @@ -81,10 +81,10 @@ def test_one_read_two_markers_with_extra_read(self): ('taxon_1', 'marker_2', 3.3300000000000005)], 'marker_read_count': [('taxon_1', 'marker_1', 1.25), ('taxon_1', 'marker_2', 0.75)], - 'taxon_all': [('taxon_1', 0.472, 4.72, 2.0, 2)], + 'taxon_all': [('taxon_1', 0.472, 4.72, 2.0, 2, 1.25)], 'taxon_coverage': [('taxon_1', 0.472)], 'taxon_cpm': [('taxon_1', 4.72)], - 'taxon_read_and_marker_count': [('taxon_1', 2.0, 2)]} + 'taxon_read_and_marker_count': [('taxon_1', 2.0, 2, 1.25)]} ) def test_one_read_weight_does_not_matter(self): @@ -96,10 +96,10 @@ def test_one_read_weight_does_not_matter(self): 'marker_coverage': [('taxon_1', 'marker_1', 0.111)], 'marker_cpm': [('taxon_1', 'marker_1', 1.1099999999999999)], 'marker_read_count': [('taxon_1', 'marker_1', 1.0)], - 'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1)], + 'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1, 1.0)], 'taxon_coverage': [('taxon_1', 0.111)], 'taxon_cpm': [('taxon_1', 1.1099999999999999)], - 'taxon_read_and_marker_count': [('taxon_1', 1.0, 1)]} + 'taxon_read_and_marker_count': [('taxon_1', 1.0, 1, 1.0)]} ) def test_two_reads_one_marker(self): @@ -112,10 +112,10 @@ def test_two_reads_one_marker(self): 'marker_coverage': [('taxon_1', 'marker_1', 0.223)], 'marker_cpm': [('taxon_1', 'marker_1', 2.2300000000000004)], 'marker_read_count': [('taxon_1', 'marker_1', 2.0)], - 'taxon_all': [('taxon_1', 0.223, 2.2300000000000004, 2.0, 1)], + 'taxon_all': [('taxon_1', 0.223, 2.2300000000000004, 2.0, 1, 2.0)], 'taxon_coverage': [('taxon_1', 0.223)], 'taxon_cpm': [('taxon_1', 2.2300000000000004)], - 'taxon_read_and_marker_count': [('taxon_1', 2.0, 1)]} + 'taxon_read_and_marker_count': [('taxon_1', 2.0, 1, 2.0)]} ) def test_two_reads_two_markers(self): @@ -132,10 +132,10 @@ def test_two_reads_two_markers(self): ('taxon_1', 'marker_2', 1.22)], 'marker_read_count': [('taxon_1', 'marker_1', 1.0), ('taxon_1', 'marker_2', 1.0)], - 'taxon_all': [('taxon_1', 0.11649999999999999, 1.1649999999999998, 2.0, 2)], + 'taxon_all': [('taxon_1', 0.11649999999999999, 1.1649999999999998, 2.0, 2, 1.0)], 'taxon_coverage': [('taxon_1', 0.11649999999999999)], 'taxon_cpm': [('taxon_1', 1.1649999999999998)], - 'taxon_read_and_marker_count': [('taxon_1', 2.0, 2)]} + 'taxon_read_and_marker_count': [('taxon_1', 2.0, 2, 1.0)]} ) def test_two_reads_two_taxons(self): @@ -152,12 +152,12 @@ def test_two_reads_two_taxons(self): ('taxon_2', 'marker_2', 2.2199999999999998)], 'marker_read_count': [('taxon_1', 'marker_1', 1.0), ('taxon_2', 'marker_2', 1.0)], - 'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1), - ('taxon_2', 0.222, 2.2199999999999998, 1.0, 1)], + 'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1, 1.0), + ('taxon_2', 0.222, 2.2199999999999998, 1.0, 1, 1.0)], 'taxon_coverage': [('taxon_1', 0.111), ('taxon_2', 0.222)], 'taxon_cpm': [('taxon_1', 1.1099999999999999), ('taxon_2', 2.2199999999999998)], - 'taxon_read_and_marker_count': [('taxon_1', 1.0, 1), ('taxon_2', 1.0, 1)]} + 'taxon_read_and_marker_count': [('taxon_1', 1.0, 1, 1.0), ('taxon_2', 1.0, 1, 1.0)]} )