Skip to content

Commit

Permalink
Merge pull request #45 from pangenome/less_memory_partition
Browse files Browse the repository at this point in the history
Do not store CIGAR strings during partitioning
  • Loading branch information
AndreaGuarracino authored Feb 2, 2025
2 parents a620f08 + 3089553 commit cb622c6
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 46 deletions.
31 changes: 19 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 38 additions & 28 deletions src/impg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,8 @@ impl Impg {
let result = project_target_range_through_alignment(
(range_start, range_end),
(metadata.target_start, metadata.target_end, metadata.query_start, metadata.query_end, metadata.strand),
&metadata.get_cigar_ops(&self.paf_file, self.paf_gzi_index.as_ref())
&metadata.get_cigar_ops(&self.paf_file, self.paf_gzi_index.as_ref()),
true
);
if let Some((adjusted_query_start, adjusted_query_end, adjusted_cigar, adjusted_target_start, adjusted_target_end)) = result {
let adjusted_interval = (
Expand Down Expand Up @@ -399,6 +400,7 @@ impl Impg {
max_depth: u16,
min_transitive_region_size: i32,
min_distance_between_ranges: i32,
store_cigar: bool
) -> Vec<AdjustedInterval> {
let mut results = Vec::new();
// Add the input range to the results
Expand All @@ -408,7 +410,7 @@ impl Impg {
last: range_end,
metadata: target_id,
},
vec![CigarOp::new(range_end - range_start, '=')],
if store_cigar { vec![CigarOp::new(range_end - range_start, '=')] } else { Vec::new() },
Interval {
first: range_start,
last: range_end,
Expand Down Expand Up @@ -446,7 +448,8 @@ impl Impg {
let result = project_target_range_through_alignment(
(current_target_start, current_target_end),
(metadata.target_start, metadata.target_end, metadata.query_start, metadata.query_end, metadata.strand),
&metadata.get_cigar_ops(&self.paf_file, self.paf_gzi_index.as_ref())
&metadata.get_cigar_ops(&self.paf_file, self.paf_gzi_index.as_ref()),
store_cigar
);
if let Some((adjusted_query_start, adjusted_query_end, adjusted_cigar, adjusted_target_start, adjusted_target_end)) = result {
let adjusted_interval = (
Expand Down Expand Up @@ -544,7 +547,8 @@ impl Impg {
fn project_target_range_through_alignment(
requested_target_range: (i32, i32),
record: (i32, i32, i32, i32, Strand),
cigar_ops: &[CigarOp]
cigar_ops: &[CigarOp],
store_cigar: bool
) -> Option<(i32, i32, Vec<CigarOp>, i32, i32)> {
let (target_start, target_end, query_start, query_end, strand) = record;

Expand Down Expand Up @@ -639,17 +643,23 @@ fn project_target_range_through_alignment(
// projected_query_start == projected_query_end in deletions in the query
// projected_target_start == projected_target_end in insertions in the query
(found_overlap && projected_query_start != projected_query_end && projected_target_start != projected_target_end).then(|| {
let mut projected_cigar_ops = cigar_ops[first_op_idx..last_op_idx].to_vec();

// Adjust first operation length
if first_op_offset > 0 {
projected_cigar_ops[0].adjust_len(-first_op_offset);
}
let projected_cigar_ops = if store_cigar {
let mut projected_cigar_ops = cigar_ops[first_op_idx..last_op_idx].to_vec();

// Adjust first operation length
if first_op_offset > 0 {
projected_cigar_ops[0].adjust_len(-first_op_offset);
}

// Adjust last operation length
if last_op_remaining < 0 {
projected_cigar_ops[last_op_idx - first_op_idx - 1].adjust_len(last_op_remaining);
}
// Adjust last operation length
if last_op_remaining < 0 {
projected_cigar_ops[last_op_idx - first_op_idx - 1].adjust_len(last_op_remaining);
}

projected_cigar_ops
} else {
Vec::new()
};

(
projected_query_start,
Expand Down Expand Up @@ -776,7 +786,7 @@ mod tests {
let target_range = (100, 200);
let record = (100, 200, 0, 100, Strand::Forward);
let cigar_ops = vec![CigarOp::new(100, '=')];
let result = project_target_range_through_alignment(target_range, record, &cigar_ops).unwrap();
let result = project_target_range_through_alignment(target_range, record, &cigar_ops, true).unwrap();

assert_eq!(result, (0, 100, cigar_ops.clone(), 100, 200));
}
Expand All @@ -786,7 +796,7 @@ mod tests {
let target_range = (100, 200);
let record = (100, 200, 0, 100, Strand::Reverse);
let cigar_ops = vec![CigarOp::new(100, '=')];
let result = project_target_range_through_alignment(target_range, record, &cigar_ops).unwrap();
let result = project_target_range_through_alignment(target_range, record, &cigar_ops, true).unwrap();

assert_eq!(result, (100, 0, cigar_ops.clone(), 100, 200));
}
Expand All @@ -803,15 +813,15 @@ mod tests {
];
let base = (0, 100, 50, 200, Strand::Forward);
{
let result = project_target_range_through_alignment((0, 100), base, &cigar_ops).unwrap();
let result = project_target_range_through_alignment((0, 100), base, &cigar_ops, true).unwrap();
assert_eq!(result, (50, 200, cigar_ops.clone(), 0, 100));
}
{
let result = project_target_range_through_alignment((50, 55), base, &cigar_ops).unwrap();
let result = project_target_range_through_alignment((50, 55), base, &cigar_ops, true).unwrap();
assert_eq!(result, (100, 105, vec![CigarOp::new(5, '=')], 50, 55));
}
{
let result = project_target_range_through_alignment((50, 64), base, &cigar_ops).unwrap();
let result = project_target_range_through_alignment((50, 64), base, &cigar_ops, true).unwrap();
assert_eq!(result, (100, 114, vec![CigarOp::new(14, '=')], 50, 64));
}
// We no longer output empty target ranges
Expand All @@ -820,15 +830,15 @@ mod tests {
// assert_eq!(result, (115, 165, vec![CigarOp::new(50, 'I')], 65, 65));
// }
{
let result = project_target_range_through_alignment((50, 65), base, &cigar_ops).unwrap();
let result = project_target_range_through_alignment((50, 65), base, &cigar_ops, true).unwrap();
let cigar_ops = vec![
CigarOp::new(15, '='),
CigarOp::new(50, 'I')
];
assert_eq!(result, (100, 165, cigar_ops, 50, 65));
}
{
let result = project_target_range_through_alignment((50, 66), base, &cigar_ops).unwrap();
let result = project_target_range_through_alignment((50, 66), base, &cigar_ops, true).unwrap();
let cigar_ops = vec![
CigarOp::new(15, '='),
CigarOp::new(50, 'I'),
Expand All @@ -837,7 +847,7 @@ mod tests {
assert_eq!(result, (100, 166, cigar_ops, 50, 66));
}
{
let result = project_target_range_through_alignment((70, 95), base, &cigar_ops).unwrap();
let result = project_target_range_through_alignment((70, 95), base, &cigar_ops, true).unwrap();
assert_eq!(result, (170, 195, vec![CigarOp::new(25, '=')], 70, 95));
}
}
Expand All @@ -848,7 +858,7 @@ mod tests {
let target_range = (100, 200);
let record = (100, 200, 100, 200, Strand::Forward);
let cigar_ops = vec![CigarOp::new(100, '=')];
let (query_start, query_end, cigar, target_start, target_end) = project_target_range_through_alignment(target_range, record, &cigar_ops).unwrap();
let (query_start, query_end, cigar, target_start, target_end) = project_target_range_through_alignment(target_range, record, &cigar_ops, true).unwrap();
assert_eq!((query_start, query_end, cigar, target_start, target_end), (100, 200, vec![CigarOp::new(100, '=')], 100, 200));
}

Expand All @@ -858,7 +868,7 @@ mod tests {
let target_range = (100, 200);
let record = (100, 200, 100, 200, Strand::Reverse);
let cigar_ops = vec![CigarOp::new(100, '=')];
let (query_start, query_end, cigar, target_start, target_end) = project_target_range_through_alignment(target_range, record, &cigar_ops).unwrap();
let (query_start, query_end, cigar, target_start, target_end) = project_target_range_through_alignment(target_range, record, &cigar_ops, true).unwrap();
assert_eq!((query_start, query_end, cigar, target_start, target_end), (200, 100, vec![CigarOp::new(100, '=')], 100, 200)); // Adjust for reverse calculation
}

Expand All @@ -872,7 +882,7 @@ mod tests {
CigarOp::new(10, 'I'), // Insertion
CigarOp::new(50, '='), // Match
];
let (start, end, cigar, _, _) = project_target_range_through_alignment(target_range, record, &cigar_ops).unwrap();
let (start, end, cigar, _, _) = project_target_range_through_alignment(target_range, record, &cigar_ops, true).unwrap();
assert_eq!((start, end, cigar), (50, 160, cigar_ops));
}

Expand All @@ -886,7 +896,7 @@ mod tests {
CigarOp::new(10, 'D'), // Deletion
CigarOp::new(40, '='), // Match
];
let (start, end, cigar, _, _) = project_target_range_through_alignment(target_range, record, &cigar_ops).unwrap();
let (start, end, cigar, _, _) = project_target_range_through_alignment(target_range, record, &cigar_ops, true).unwrap();
assert_eq!((start, end, cigar), (50, 140, cigar_ops));
}

Expand All @@ -901,7 +911,7 @@ mod tests {
CigarOp::new(10, 'I'), // 150, 250
CigarOp::new(40, '='), // 150, 250
];
let (start, end, cigar, _, _) = project_target_range_through_alignment(target_range, record, &cigar_ops).unwrap();
let (start, end, cigar, _, _) = project_target_range_through_alignment(target_range, record, &cigar_ops, true).unwrap();
let cigar_ops = vec![
CigarOp::new(10, 'D'), // 150, 260
CigarOp::new(10, 'I'), // 150, 250
Expand All @@ -924,7 +934,7 @@ mod tests {
CigarOp::new(10, 'I'), // Insertion in query
CigarOp::new(10, '='), // Match
];
let (query_start, query_end, cigar, target_start, target_end) = project_target_range_through_alignment(target_range, record, &cigar_ops).unwrap();
let (query_start, query_end, cigar, target_start, target_end) = project_target_range_through_alignment(target_range, record, &cigar_ops, true).unwrap();
assert_eq!((query_start, query_end, cigar, target_start, target_end), (0, 10, vec![CigarOp::new(10, '=')], 0, 10));
}

Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ fn perform_query(
panic!("Target range end ({}) exceeds the target sequence length ({})", target_end, target_length);
}
if transitive {
impg.query_transitive(target_id, target_start, target_end, None, max_depth, min_transitive_region_size, min_distance_between_ranges)
impg.query_transitive(target_id, target_start, target_end, None, max_depth, min_transitive_region_size, min_distance_between_ranges, true)
} else {
impg.query(target_id, target_start, target_end)
}
Expand Down
15 changes: 10 additions & 5 deletions src/partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ pub fn partition_alignments(
info!("Partitioning");

while !windows.is_empty() {
for (seq_id, start, end) in windows.iter() {
for (seq_id, start, end) in windows.iter() {
let chrom = impg.seq_index.get_name(*seq_id).unwrap();

if debug {
Expand Down Expand Up @@ -125,7 +125,12 @@ pub fn partition_alignments(

// Query overlaps for current window
//let query_start = Instant::now();
let mut overlaps = impg.query_transitive(*seq_id, *start, *end, Some(&masked_regions), max_depth, min_transitive_region_size, min_distance_between_ranges);
let mut overlaps = impg.query_transitive(
*seq_id, *start, *end,
Some(&masked_regions),
max_depth, min_transitive_region_size, min_distance_between_ranges,
false // Don't store CIGAR strings during partitioning
);
//let query_time = query_start.elapsed();
debug!(" Collected {} query overlaps", overlaps.len());

Expand Down Expand Up @@ -248,7 +253,7 @@ fn subtract_masked_regions(
) -> Vec<(Interval<u32>, Vec<CigarOp>, Interval<u32>)> {
let mut result = Vec::new();

for (query_interval, cigar, target_interval) in overlaps.drain(..) {
for (query_interval, _, target_interval) in overlaps.drain(..) {
// Get masked regions for this sequence
if let Some(masks) = masked_regions.get(&query_interval.metadata) {
let (start, end) = if query_interval.first <= query_interval.last {
Expand Down Expand Up @@ -306,11 +311,11 @@ fn subtract_masked_regions(
metadata: target_interval.metadata,
};

result.push((new_query, cigar.clone(), new_target));
result.push((new_query, Vec::new(), new_target));
}
} else {
// No masks for this sequence - keep original interval
result.push((query_interval, cigar, target_interval));
result.push((query_interval, Vec::new(), target_interval));
}
}

Expand Down

0 comments on commit cb622c6

Please sign in to comment.