Skip to content

Commit

Permalink
Add max reads in marker
Browse files Browse the repository at this point in the history
  • Loading branch information
wbazant committed Aug 24, 2021
1 parent 25cb84b commit d2f44d5
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 18 deletions.
5 changes: 3 additions & 2 deletions marker_alignments/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,13 @@ def main(argv=sys.argv[1:]):
header = ["taxon", "coverage"]
lines = alignment_store.as_taxon_coverage()
elif options.output_type == "taxon_read_and_marker_count":
header = ["taxon", "taxon_num_reads", "taxon_num_markers"]
header = ["taxon", "taxon_num_reads", "taxon_num_markers", "taxon_max_reads_in_marker"]
lines = alignment_store.as_taxon_read_and_marker_count()
elif options.output_type == "taxon_cpm":
header = ["taxon", "cpm"]
lines = alignment_store.as_taxon_cpm(options.num_reads)
elif options.output_type == "taxon_all":
header = ["taxon", "coverage", "cpm", "taxon_num_reads", "taxon_num_markers"]
header = ["taxon", "coverage", "cpm", "taxon_num_reads", "taxon_num_markers", "taxon_max_reads_in_marker"]
lines = alignment_store.as_taxon_all(options.num_reads)

field_formats = {
Expand All @@ -160,6 +160,7 @@ def main(argv=sys.argv[1:]):
"coverage" : ":.6f",
"taxon_num_reads": ":.6f",
"taxon_num_markers": ":d",
"taxon_max_reads_in_marker": ":.6f",
}
formatter="\t".join(['{' + field_formats[field] +'}' for field in header]) + "\n"
with open(options.output_path, 'w') as f:
Expand Down
2 changes: 1 addition & 1 deletion marker_alignments/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def end_bulk_write(self):
'''

a_cov = "avg(marker_coverage) as coverage"
a_tnm = "sum(marker_read_count) as taxon_num_reads, count(marker) as taxon_num_markers"
a_tnm = "sum(marker_read_count) as taxon_num_reads, count(marker) as taxon_num_markers, max(marker_read_count) as taxon_max_reads_in_marker"
a_cpm = "avg(marker_coverage) / (?) * 1000000 as cpm"
taxon_coverage_query=taxon_query_template.format(a_cov, marker_coverage_query)
taxon_read_and_marker_count_query=taxon_query_template.format(a_tnm, marker_read_count_query)
Expand Down
30 changes: 15 additions & 15 deletions tests/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def test_one_read(self):
'marker_coverage': [('taxon_1', 'marker_1', 0.111)],
'marker_cpm': [('taxon_1', 'marker_1', 1.1099999999999999)],
'marker_read_count': [('taxon_1', 'marker_1', 1.0)],
'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1)],
'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1, 1.0)],
'taxon_coverage': [('taxon_1', 0.111)],
'taxon_cpm': [('taxon_1', 1.1099999999999999)],
'taxon_read_and_marker_count': [('taxon_1', 1.0, 1)]}
'taxon_read_and_marker_count': [('taxon_1', 1.0, 1, 1.0)]}
)

def test_one_read_two_markers(self):
Expand All @@ -60,10 +60,10 @@ def test_one_read_two_markers(self):
('taxon_1', 'marker_2', 3.3300000000000005)],
'marker_read_count': [('taxon_1', 'marker_1', 0.25),
('taxon_1', 'marker_2', 0.75)],
'taxon_all': [('taxon_1', 0.222, 2.2199999999999998, 1.0, 2)],
'taxon_all': [('taxon_1', 0.222, 2.2199999999999998, 1.0, 2, 0.75)],
'taxon_coverage': [('taxon_1', 0.222)],
'taxon_cpm': [('taxon_1', 2.2199999999999998)],
'taxon_read_and_marker_count': [('taxon_1', 1.0, 2)]}
'taxon_read_and_marker_count': [('taxon_1', 1.0, 2, 0.75)]}
)

def test_one_read_two_markers_with_extra_read(self):
Expand All @@ -81,10 +81,10 @@ def test_one_read_two_markers_with_extra_read(self):
('taxon_1', 'marker_2', 3.3300000000000005)],
'marker_read_count': [('taxon_1', 'marker_1', 1.25),
('taxon_1', 'marker_2', 0.75)],
'taxon_all': [('taxon_1', 0.472, 4.72, 2.0, 2)],
'taxon_all': [('taxon_1', 0.472, 4.72, 2.0, 2, 1.25)],
'taxon_coverage': [('taxon_1', 0.472)],
'taxon_cpm': [('taxon_1', 4.72)],
'taxon_read_and_marker_count': [('taxon_1', 2.0, 2)]}
'taxon_read_and_marker_count': [('taxon_1', 2.0, 2, 1.25)]}
)

def test_one_read_weight_does_not_matter(self):
Expand All @@ -96,10 +96,10 @@ def test_one_read_weight_does_not_matter(self):
'marker_coverage': [('taxon_1', 'marker_1', 0.111)],
'marker_cpm': [('taxon_1', 'marker_1', 1.1099999999999999)],
'marker_read_count': [('taxon_1', 'marker_1', 1.0)],
'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1)],
'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1, 1.0)],
'taxon_coverage': [('taxon_1', 0.111)],
'taxon_cpm': [('taxon_1', 1.1099999999999999)],
'taxon_read_and_marker_count': [('taxon_1', 1.0, 1)]}
'taxon_read_and_marker_count': [('taxon_1', 1.0, 1, 1.0)]}
)

def test_two_reads_one_marker(self):
Expand All @@ -112,10 +112,10 @@ def test_two_reads_one_marker(self):
'marker_coverage': [('taxon_1', 'marker_1', 0.223)],
'marker_cpm': [('taxon_1', 'marker_1', 2.2300000000000004)],
'marker_read_count': [('taxon_1', 'marker_1', 2.0)],
'taxon_all': [('taxon_1', 0.223, 2.2300000000000004, 2.0, 1)],
'taxon_all': [('taxon_1', 0.223, 2.2300000000000004, 2.0, 1, 2.0)],
'taxon_coverage': [('taxon_1', 0.223)],
'taxon_cpm': [('taxon_1', 2.2300000000000004)],
'taxon_read_and_marker_count': [('taxon_1', 2.0, 1)]}
'taxon_read_and_marker_count': [('taxon_1', 2.0, 1, 2.0)]}
)

def test_two_reads_two_markers(self):
Expand All @@ -132,10 +132,10 @@ def test_two_reads_two_markers(self):
('taxon_1', 'marker_2', 1.22)],
'marker_read_count': [('taxon_1', 'marker_1', 1.0),
('taxon_1', 'marker_2', 1.0)],
'taxon_all': [('taxon_1', 0.11649999999999999, 1.1649999999999998, 2.0, 2)],
'taxon_all': [('taxon_1', 0.11649999999999999, 1.1649999999999998, 2.0, 2, 1.0)],
'taxon_coverage': [('taxon_1', 0.11649999999999999)],
'taxon_cpm': [('taxon_1', 1.1649999999999998)],
'taxon_read_and_marker_count': [('taxon_1', 2.0, 2)]}
'taxon_read_and_marker_count': [('taxon_1', 2.0, 2, 1.0)]}
)

def test_two_reads_two_taxons(self):
Expand All @@ -152,12 +152,12 @@ def test_two_reads_two_taxons(self):
('taxon_2', 'marker_2', 2.2199999999999998)],
'marker_read_count': [('taxon_1', 'marker_1', 1.0),
('taxon_2', 'marker_2', 1.0)],
'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1),
('taxon_2', 0.222, 2.2199999999999998, 1.0, 1)],
'taxon_all': [('taxon_1', 0.111, 1.1099999999999999, 1.0, 1, 1.0),
('taxon_2', 0.222, 2.2199999999999998, 1.0, 1, 1.0)],
'taxon_coverage': [('taxon_1', 0.111), ('taxon_2', 0.222)],
'taxon_cpm': [('taxon_1', 1.1099999999999999),
('taxon_2', 2.2199999999999998)],
'taxon_read_and_marker_count': [('taxon_1', 1.0, 1), ('taxon_2', 1.0, 1)]}
'taxon_read_and_marker_count': [('taxon_1', 1.0, 1, 1.0), ('taxon_2', 1.0, 1, 1.0)]}
)


Expand Down

0 comments on commit d2f44d5

Please sign in to comment.