Skip to content

Commit

Permalink
Merge pull request #531 from pangenome/odgi_extract_fix
Browse files Browse the repository at this point in the history
`odgi extract`: manage input path range boundaries
  • Loading branch information
AndreaGuarracino authored Sep 23, 2023
2 parents 2a3e813 + 0193f2f commit de70fcd
Showing 1 changed file with 31 additions and 5 deletions.
36 changes: 31 additions & 5 deletions src/subcommand/extract_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,11 +477,37 @@ namespace odgi {
if (show_progress) {
progress->increment(1);
}
algorithms::for_handle_in_path_range(
source, path_handle, path_range.begin.offset, path_range.end.offset,
[&](const handle_t& handle) {
keep_bv.set(source.get_id(handle) - shift);
});

// The extraction does not cut nodes, so the input path ranges have to be
// extended if their ranges (start, end) fall in the middle of the nodes.
bool first = true;
uint64_t new_start = 0;
uint64_t new_end = 0;

const uint64_t start = path_range.begin.offset;
const uint64_t end = path_range.end.offset;

uint64_t walked = 0;
const auto path_end = source.path_end(path_handle);
for (step_handle_t cur_step = source.path_begin(path_handle);
cur_step != path_end && walked < end; cur_step = source.get_next_step(cur_step)) {
const handle_t cur_handle = source.get_handle_of_step(cur_step);
walked += source.get_length(cur_handle);
if (walked > start) {
keep_bv.set(source.get_id(cur_handle) - shift);

if (first) {
first = false;
new_start = walked - source.get_length(cur_handle);
}
}
}
new_end = walked;

// Extend path range to entirely include the first and the last node of the range.
// Thi is important to path names with the correct path ranges.
path_range.begin.offset = new_start;
path_range.end.offset = new_end;
}
if (!pangenomic_ranges.empty()) {
uint64_t pos = 0;
Expand Down

0 comments on commit de70fcd

Please sign in to comment.