Skip to content

Commit

Permalink
fix: do not report m6A predictions that happen within the first 7 or …
Browse files Browse the repository at this point in the history
…last 7 bp of a read. This is so the ML model only operates on real data. No changes to other calls.
  • Loading branch information
Mitchell R. Vollger committed Nov 13, 2024
1 parent 6357469 commit a48b34a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
5 changes: 4 additions & 1 deletion src/subcommands/predict_m6a.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,9 @@ where
positions: &[usize],
base_mod: &str,
) -> basemods::BaseMod {
// do not report predictions for the first and last 7 bases
let min_pos = (WINDOW / 2) as i64;
let max_pos = (record.seq_len() - WINDOW / 2) as i64;
let (modified_probabilities_forward, full_probabilities_forward, modified_bases_forward): (
Vec<u8>,
Vec<f32>,
Expand All @@ -273,7 +276,7 @@ where
.iter()
.zip(positions.iter())
.map(|(&x, &pos)| (self.float_to_u8(x), x, pos as i64))
.filter(|(ml, _, _)| *ml >= self.min_ml_value())
.filter(|(ml, _, pos)| *ml >= self.min_ml_value() && *pos >= min_pos && *pos < max_pos)
.multiunzip();

log::debug!(
Expand Down
18 changes: 14 additions & 4 deletions tests/m6a_prediction_test.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use fibertools_rs::subcommands::predict_m6a::WINDOW;
use fibertools_rs::utils::bio_io;
use fibertools_rs::utils::input_bam::FiberFilters;
use rust_htslib::bam::Reader;
Expand All @@ -7,15 +8,24 @@ fn sum_qual(bam: &mut Reader) -> usize {
let fibers = fibertools_rs::fiber::FiberseqRecords::new(bam, FiberFilters::default());
// sum up all quality scores across all fibers
let mut sum = 0;
let mut count = 0;
for fiber in fibers {
sum += fiber
let min_pos = (WINDOW / 2) as i64;
let max_pos = (fiber.record.seq_len() - WINDOW / 2) as i64;
let mut this_count = 0;
let this_qual_sum = fiber
.m6a
.qual
.into_iter()
.map(|x| x as usize)
.filter(|(st, en, _, _, _)| *st >= min_pos && *en < max_pos)
.map(|(_, _, _, qual, _)| {
this_count += 1;
qual as usize
})
.sum::<usize>();
sum += this_qual_sum;
count += this_count;
}
eprintln!("sum {:?}", sum);
eprintln!("sum {:?}; count {:?}", sum, count);
// now count for the input bam file as well
sum
}
Expand Down

0 comments on commit a48b34a

Please sign in to comment.