diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f189980..f078281 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,5 +14,4 @@ repos: rev: v1.0 hooks: - id: fmt - - id: clippy - id: cargo-check diff --git a/Cargo.toml b/Cargo.toml index fdc22f4..a3fde51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ license = "MIT" name = "rustybam" readme = "README.md" repository = "https://github.com/mrvollger/rustybam" -version = "0.1.29-alpha.1" +version = "0.1.29" [[bin]] name = "rb" @@ -36,7 +36,7 @@ bio = "0.39.1" bio-types = "0.12.0" calm_io = "0.1.1" chrono = "0.4.19" -clap = {version = "3.0.0", features = ["yaml", "derive"]} +clap = {version = "3.0.0", features = ["derive"]} colored = "2.0.0" env_logger = "0.9.0" flate2 = {version = "~1", default-features = false} @@ -51,7 +51,7 @@ regex = "1.5.4" rust-htslib = "0.38.2" [build-dependencies] -clap = {version = "3.0.0", features = ["yaml", "derive"]} +clap = {version = "3.0.0", features = ["derive"]} clap_generate = "3.0.0" env_logger = "0.9.0" log = "0.4.14" diff --git a/src/cli.rs b/src/cli.rs index 240ecdc..9ca1220 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,12 +1,11 @@ use clap::IntoApp; -use clap::{App, AppSettings, Parser, Subcommand}; +use clap::{AppSettings, Parser, Subcommand}; #[derive(Parser, Debug)] #[clap(global_setting(AppSettings::PropagateVersion))] #[clap(global_setting(AppSettings::DeriveDisplayOrder))] #[clap(global_setting(AppSettings::InferSubcommands))] #[clap(global_setting(AppSettings::HelpExpected))] -#[clap(global_setting(AppSettings::UseLongFormatForHelpSubcommand))] #[clap(setting(AppSettings::SubcommandRequiredElseHelp))] #[clap(author, version, about)] pub struct Cli { @@ -133,6 +132,9 @@ pub enum Commands { /// Value subtracted for an insertion or deletion. #[clap(short, long, default_value_t = 1)] indel_score: i32, + /// Remove contained alignments as well as overlaps. + #[clap(short, long)] + remove_contained: bool, }, /// Orient paf records so that most of the bases are in the forward direction. /// @@ -240,6 +242,6 @@ pub fn make_cli_parse() -> Cli { Cli::parse() } -pub fn make_cli_app() -> App<'static> { - Cli::into_app() +pub fn make_cli_app() -> clap::Command<'static> { + Cli::command() } diff --git a/src/liftover.rs b/src/liftover.rs index f9784f7..6fb15e5 100644 --- a/src/liftover.rs +++ b/src/liftover.rs @@ -33,8 +33,6 @@ pub fn trim_paf_rec_to_rgn(rgn: &bed::Region, paf: &PafRecord) -> Option Option end_idx { return None; } + // update the start and end positions + trimmed_paf.t_st = paf.tpos_aln[start_idx]; + trimmed_paf.q_st = paf.qpos_aln[start_idx]; + trimmed_paf.t_en = paf.tpos_aln[end_idx]; + trimmed_paf.q_en = paf.qpos_aln[end_idx]; + // get the cigar opts trimmed_paf.cigar = PafRecord::collapse_long_cigar(&paf.subset_cigar(start_idx, end_idx)); @@ -123,7 +124,7 @@ pub fn trim_helper(name: &str, recs: &[PafRecord], rgns: &[bed::Region]) -> Vec< .iter() .cartesian_product(cur_rgns) // make all pairwise combs .par_bridge() - .filter(|(paf, rgn)| paf.paf_overlaps_rgn(rgn)) //filter to overlaping pairs + .filter(|(paf, rgn)| paf.paf_overlaps_rgn(rgn)) //filter to overlapping pairs .filter_map(|(paf, rgn)| trim_paf_rec_to_rgn(rgn, paf)) .collect(); @@ -135,7 +136,7 @@ pub fn trim_paf_by_rgns( paf_recs: &[PafRecord], invert_query: bool, ) -> Vec { - // swap qeury and ref if needed. + // swap query and ref if needed. let mut newvec = Vec::new(); let recs: &[PafRecord] = if invert_query { for rec in paf_recs.iter() { diff --git a/src/main.rs b/src/main.rs index d24d964..59e7fd9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -243,9 +243,10 @@ pub fn parse_cli() { match_score, diff_score, indel_score, + remove_contained, }) => { let mut paf = paf::Paf::from_file(paf); - paf.overlapping_paf_recs(*match_score, *diff_score, *indel_score); + paf.overlapping_paf_recs(*match_score, *diff_score, *indel_score, *remove_contained); for rec in &paf.records { println!("{}", rec); } diff --git a/src/paf.rs b/src/paf.rs index bef5e20..b67c5a1 100644 --- a/src/paf.rs +++ b/src/paf.rs @@ -208,7 +208,13 @@ impl Paf { } /// Identify overlapping pairs in Paf set - pub fn overlapping_paf_recs(&mut self, match_score: i32, diff_score: i32, indel_score: i32) { + pub fn overlapping_paf_recs( + &mut self, + match_score: i32, + diff_score: i32, + indel_score: i32, + remove_contained: bool, + ) { // remove trailing indels in all cases for rec in &mut self.records { rec.remove_trailing_indels(); @@ -216,7 +222,7 @@ impl Paf { let mut overlap_pairs = Vec::new(); self.records.sort_by_key(|rec| rec.q_name.clone()); - + let mut contained_indexes = vec![false; self.records.len()]; for i in 0..(self.records.len() - 1) { let rec1 = &self.records[i]; let rgn1 = rec1.get_query_as_region(); @@ -231,11 +237,11 @@ impl Paf { j += 1; continue; } else if overlap == (rec2.q_en - rec2.q_st) { - //rec2.contained = true; - log::trace!("{}\n^is contained in another alignment", rec1); + contained_indexes[j] = true; + log::debug!("{}\n^is contained in another alignment", rec2); } else if overlap == (rec1.q_en - rec1.q_st) { - //rec1.contained = true; - log::trace!("{}\n^is contained in another alignment", rec1); + contained_indexes[i] = true; + log::debug!("{}\n^is contained in another alignment", rec1); } else { // put recs in left, right order if rec1.q_st <= rec2.q_st { @@ -274,7 +280,22 @@ impl Paf { if unseen > 0 { // recursively call for next overlap - self.overlapping_paf_recs(match_score, diff_score, indel_score); + self.overlapping_paf_recs(match_score, diff_score, indel_score, remove_contained); + } else if remove_contained { + let n_to_remove = contained_indexes.iter().filter(|&x| *x).count(); + log::info!("Removing {} contained alignments.", n_to_remove); + log::info!("{} total alignments.", self.records.len()); + let mut new_records = Vec::new(); + assert!(self.records.len() == contained_indexes.len()); + for (i, rec) in self.records.iter().enumerate() { + if !contained_indexes[i] { + new_records.push(rec.clone()); + } + } + self.records = new_records; + log::info!("{} total alignments.", self.records.len()); + // remove contained records + //self.records.retain(|rec| !rec.contained); } } @@ -520,13 +541,14 @@ impl PafRecord { /// force the tpos index to be at a match to the right (or left) pub fn tpos_to_idx_match(&self, qpos: u64, search_right: bool) -> Result { let mut idx = self.tpos_to_idx(qpos)?; + let max_idx = self.long_cigar.len(); // find the closes actual matching base if search_right { - while !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) { + while idx < max_idx && !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) { idx += 1; } } else { - while !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) { + while idx > 0 && !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) { idx -= 1; } } @@ -548,13 +570,14 @@ impl PafRecord { /// force the qpos index to be at a match to the right (or left) pub fn qpos_to_idx_match(&self, qpos: u64, search_right: bool) -> Result { let mut idx = self.qpos_to_idx(qpos)?; + let max_idx = self.long_cigar.len(); // find the closes actual matching base if (search_right && self.strand == '+') || (!search_right && self.strand == '-') { - while !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) { + while idx < max_idx && !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) { idx += 1; } } else { - while !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) { + while idx > 0 && !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) { idx -= 1; } } diff --git a/src/trim_overlap.rs b/src/trim_overlap.rs index c82c66d..4014ee2 100644 --- a/src/trim_overlap.rs +++ b/src/trim_overlap.rs @@ -159,7 +159,7 @@ mod tests { paf.records = vec![left, center, right]; pretty_print_paf(&paf); - paf.overlapping_paf_recs(1, 1, 1); + paf.overlapping_paf_recs(1, 1, 1, false); pretty_print_paf(&paf); let expected_cigars = vec!["7=", "2=1X3=1M", "2=2X2="];