Skip to content

Commit

Permalink
Adding a new flag that allows for the removal of contained alignments…
Browse files Browse the repository at this point in the history
… in trim-paf
  • Loading branch information
mrvollger committed Mar 9, 2022
1 parent 2b8d97b commit fed758f
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 29 deletions.
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,4 @@ repos:
rev: v1.0
hooks:
- id: fmt
- id: clippy
- id: cargo-check
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ license = "MIT"
name = "rustybam"
readme = "README.md"
repository = "https://github.com/mrvollger/rustybam"
version = "0.1.29-alpha.1"
version = "0.1.29"

[[bin]]
name = "rb"
Expand All @@ -36,7 +36,7 @@ bio = "0.39.1"
bio-types = "0.12.0"
calm_io = "0.1.1"
chrono = "0.4.19"
clap = {version = "3.0.0", features = ["yaml", "derive"]}
clap = {version = "3.0.0", features = ["derive"]}
colored = "2.0.0"
env_logger = "0.9.0"
flate2 = {version = "~1", default-features = false}
Expand All @@ -51,7 +51,7 @@ regex = "1.5.4"
rust-htslib = "0.38.2"

[build-dependencies]
clap = {version = "3.0.0", features = ["yaml", "derive"]}
clap = {version = "3.0.0", features = ["derive"]}
clap_generate = "3.0.0"
env_logger = "0.9.0"
log = "0.4.14"
10 changes: 6 additions & 4 deletions src/cli.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use clap::IntoApp;
use clap::{App, AppSettings, Parser, Subcommand};
use clap::{AppSettings, Parser, Subcommand};

#[derive(Parser, Debug)]
#[clap(global_setting(AppSettings::PropagateVersion))]
#[clap(global_setting(AppSettings::DeriveDisplayOrder))]
#[clap(global_setting(AppSettings::InferSubcommands))]
#[clap(global_setting(AppSettings::HelpExpected))]
#[clap(global_setting(AppSettings::UseLongFormatForHelpSubcommand))]
#[clap(setting(AppSettings::SubcommandRequiredElseHelp))]
#[clap(author, version, about)]
pub struct Cli {
Expand Down Expand Up @@ -133,6 +132,9 @@ pub enum Commands {
/// Value subtracted for an insertion or deletion.
#[clap(short, long, default_value_t = 1)]
indel_score: i32,
/// Remove contained alignments as well as overlaps.
#[clap(short, long)]
remove_contained: bool,
},
/// Orient paf records so that most of the bases are in the forward direction.
///
Expand Down Expand Up @@ -240,6 +242,6 @@ pub fn make_cli_parse() -> Cli {
Cli::parse()
}

pub fn make_cli_app() -> App<'static> {
Cli::into_app()
pub fn make_cli_app() -> clap::Command<'static> {
Cli::command()
}
17 changes: 9 additions & 8 deletions src/liftover.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ pub fn trim_paf_rec_to_rgn(rgn: &bed::Region, paf: &PafRecord) -> Option<PafReco
trimmed_paf.t_st, rgn, paf
),
};
trimmed_paf.t_st = paf.tpos_aln[start_idx];
trimmed_paf.q_st = paf.qpos_aln[start_idx];

// index at the end of trimmed alignment
trimmed_paf.t_en = cmp::min(rgn.en, paf.t_en);
Expand All @@ -49,15 +47,18 @@ pub fn trim_paf_rec_to_rgn(rgn: &bed::Region, paf: &PafRecord) -> Option<PafReco
paf
),
};
trimmed_paf.t_en = paf.tpos_aln[end_idx];
trimmed_paf.q_en = paf.qpos_aln[end_idx];

// if this is the case then the whole internal region is indel
// TODO check to make sure this is true
// if this is the case then the whole internal regions is an indel
if start_idx > end_idx {
return None;
}

// update the start and end positions
trimmed_paf.t_st = paf.tpos_aln[start_idx];
trimmed_paf.q_st = paf.qpos_aln[start_idx];
trimmed_paf.t_en = paf.tpos_aln[end_idx];
trimmed_paf.q_en = paf.qpos_aln[end_idx];

// get the cigar opts
trimmed_paf.cigar = PafRecord::collapse_long_cigar(&paf.subset_cigar(start_idx, end_idx));

Expand Down Expand Up @@ -123,7 +124,7 @@ pub fn trim_helper(name: &str, recs: &[PafRecord], rgns: &[bed::Region]) -> Vec<
.iter()
.cartesian_product(cur_rgns) // make all pairwise combs
.par_bridge()
.filter(|(paf, rgn)| paf.paf_overlaps_rgn(rgn)) //filter to overlaping pairs
.filter(|(paf, rgn)| paf.paf_overlaps_rgn(rgn)) //filter to overlapping pairs
.filter_map(|(paf, rgn)| trim_paf_rec_to_rgn(rgn, paf))
.collect();

Expand All @@ -135,7 +136,7 @@ pub fn trim_paf_by_rgns(
paf_recs: &[PafRecord],
invert_query: bool,
) -> Vec<PafRecord> {
// swap qeury and ref if needed.
// swap query and ref if needed.
let mut newvec = Vec::new();
let recs: &[PafRecord] = if invert_query {
for rec in paf_recs.iter() {
Expand Down
3 changes: 2 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,10 @@ pub fn parse_cli() {
match_score,
diff_score,
indel_score,
remove_contained,
}) => {
let mut paf = paf::Paf::from_file(paf);
paf.overlapping_paf_recs(*match_score, *diff_score, *indel_score);
paf.overlapping_paf_recs(*match_score, *diff_score, *indel_score, *remove_contained);
for rec in &paf.records {
println!("{}", rec);
}
Expand Down
45 changes: 34 additions & 11 deletions src/paf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,21 @@ impl Paf {
}

/// Identify overlapping pairs in Paf set
pub fn overlapping_paf_recs(&mut self, match_score: i32, diff_score: i32, indel_score: i32) {
pub fn overlapping_paf_recs(
&mut self,
match_score: i32,
diff_score: i32,
indel_score: i32,
remove_contained: bool,
) {
// remove trailing indels in all cases
for rec in &mut self.records {
rec.remove_trailing_indels();
}

let mut overlap_pairs = Vec::new();
self.records.sort_by_key(|rec| rec.q_name.clone());

let mut contained_indexes = vec![false; self.records.len()];
for i in 0..(self.records.len() - 1) {
let rec1 = &self.records[i];
let rgn1 = rec1.get_query_as_region();
Expand All @@ -231,11 +237,11 @@ impl Paf {
j += 1;
continue;
} else if overlap == (rec2.q_en - rec2.q_st) {
//rec2.contained = true;
log::trace!("{}\n^is contained in another alignment", rec1);
contained_indexes[j] = true;
log::debug!("{}\n^is contained in another alignment", rec2);
} else if overlap == (rec1.q_en - rec1.q_st) {
//rec1.contained = true;
log::trace!("{}\n^is contained in another alignment", rec1);
contained_indexes[i] = true;
log::debug!("{}\n^is contained in another alignment", rec1);
} else {
// put recs in left, right order
if rec1.q_st <= rec2.q_st {
Expand Down Expand Up @@ -274,7 +280,22 @@ impl Paf {

if unseen > 0 {
// recursively call for next overlap
self.overlapping_paf_recs(match_score, diff_score, indel_score);
self.overlapping_paf_recs(match_score, diff_score, indel_score, remove_contained);
} else if remove_contained {
let n_to_remove = contained_indexes.iter().filter(|&x| *x).count();
log::info!("Removing {} contained alignments.", n_to_remove);
log::info!("{} total alignments.", self.records.len());
let mut new_records = Vec::new();
assert!(self.records.len() == contained_indexes.len());
for (i, rec) in self.records.iter().enumerate() {
if !contained_indexes[i] {
new_records.push(rec.clone());
}
}
self.records = new_records;
log::info!("{} total alignments.", self.records.len());
// remove contained records
//self.records.retain(|rec| !rec.contained);
}
}

Expand Down Expand Up @@ -520,13 +541,14 @@ impl PafRecord {
/// force the tpos index to be at a match to the right (or left)
pub fn tpos_to_idx_match(&self, qpos: u64, search_right: bool) -> Result<usize, usize> {
let mut idx = self.tpos_to_idx(qpos)?;
let max_idx = self.long_cigar.len();
// find the closes actual matching base
if search_right {
while !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) {
while idx < max_idx && !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) {
idx += 1;
}
} else {
while !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) {
while idx > 0 && !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) {
idx -= 1;
}
}
Expand All @@ -548,13 +570,14 @@ impl PafRecord {
/// force the qpos index to be at a match to the right (or left)
pub fn qpos_to_idx_match(&self, qpos: u64, search_right: bool) -> Result<usize, usize> {
let mut idx = self.qpos_to_idx(qpos)?;
let max_idx = self.long_cigar.len();
// find the closes actual matching base
if (search_right && self.strand == '+') || (!search_right && self.strand == '-') {
while !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) {
while idx < max_idx && !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) {
idx += 1;
}
} else {
while !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) {
while idx > 0 && !matches!(self.long_cigar[idx], Match(_) | Diff(_) | Equal(_)) {
idx -= 1;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/trim_overlap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ mod tests {
paf.records = vec![left, center, right];

pretty_print_paf(&paf);
paf.overlapping_paf_recs(1, 1, 1);
paf.overlapping_paf_recs(1, 1, 1, false);
pretty_print_paf(&paf);

let expected_cigars = vec!["7=", "2=1X3=1M", "2=2X2="];
Expand Down

0 comments on commit fed758f

Please sign in to comment.