Skip to content

Commit

Permalink
Randstrobe rescue
Browse files Browse the repository at this point in the history
  • Loading branch information
marcelm committed Feb 28, 2024
1 parent 7ca428d commit 1960bef
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 0 deletions.
11 changes: 11 additions & 0 deletions src/aln.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,11 @@ void align_or_map_paired(
// Find NAMs
Timer nam_timer;
auto [nonrepetitive_fraction, nams] = find_nams(query_randstrobes, index);

if (nams.empty()) {
query_randstrobes = randstrobes_query_rescue(record.seq, index_parameters);
std::tie(nonrepetitive_fraction, nams) = find_nams(query_randstrobes, index);
}
statistics.tot_find_nams += nam_timer.duration();

if (map_param.rescue_level > 1) {
Expand Down Expand Up @@ -1092,6 +1097,12 @@ void align_or_map_single(
// Find NAMs
Timer nam_timer;
auto [nonrepetitive_fraction, nams] = find_nams(query_randstrobes, index);

if (nams.empty()) {
query_randstrobes = randstrobes_query_rescue(record.seq, index_parameters);
std::tie(nonrepetitive_fraction, nams) = find_nams(query_randstrobes, index);
}

statistics.tot_find_nams += nam_timer.duration();

if (map_param.rescue_level > 1) {
Expand Down
58 changes: 58 additions & 0 deletions src/randstrobes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,3 +253,61 @@ QueryRandstrobeVector randstrobes_query(const std::string_view seq, const IndexP
}
return randstrobes;
}

/*
* Generate all possible strobes using a pre-computed vector of syncmers
*/
void randstrobes_rescue(const std::vector<Syncmer>& syncmers, std::vector<QueryRandstrobe>& randstrobes, bool is_rc, RandstrobeParameters parameters, int k) {
const unsigned w_min = parameters.w_min;
const unsigned w_max = parameters.w_max;
const unsigned int max_dist = parameters.max_dist;
if (w_min > w_max) {
throw std::invalid_argument("w_min is greater than w_max");
}
unsigned int strobe1_index = 0;

while (strobe1_index + w_min < syncmers.size()) {
unsigned int w_end = std::min(static_cast<size_t>(strobe1_index + w_max), syncmers.size() - 1);
auto strobe1 = syncmers[strobe1_index];
auto max_position = strobe1.position + max_dist;
unsigned int w_start = strobe1_index + w_min;
Syncmer strobe2 = strobe1;

for (auto i = w_start; i <= w_end && syncmers[i].position <= max_position; i++) {
strobe2 = syncmers[i];
randstrobes.push_back(
QueryRandstrobe{
randstrobe_hash(strobe1.hash, strobe2.hash), static_cast<uint32_t>(strobe1.position), static_cast<uint32_t>(strobe2.position + k), is_rc
}
);
}
strobe1_index++;
}
}

/*
* Generate randstrobes for a query sequence and its reverse complement.
*/
QueryRandstrobeVector randstrobes_query_rescue(const std::string_view seq, const IndexParameters& parameters) {
QueryRandstrobeVector randstrobes;
if (seq.length() < parameters.randstrobe.w_max) {
return randstrobes;
}

// Generate syncmers for the forward sequence
auto syncmers = canonical_syncmers(seq, parameters.syncmer);
if (syncmers.empty()) {
return randstrobes;
}

randstrobes_rescue(syncmers, randstrobes, false, parameters.randstrobe, parameters.syncmer.k);

std::reverse(syncmers.begin(), syncmers.end());
for (size_t i = 0; i < syncmers.size(); i++) {
syncmers[i].position = seq.length() - syncmers[i].position - parameters.syncmer.k;
}

randstrobes_rescue(syncmers, randstrobes, true, parameters.randstrobe, parameters.syncmer.k);

return randstrobes;
}
1 change: 1 addition & 0 deletions src/randstrobes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ std::ostream& operator<<(std::ostream& os, const QueryRandstrobe& randstrobe);
using QueryRandstrobeVector = std::vector<QueryRandstrobe>;

QueryRandstrobeVector randstrobes_query(const std::string_view seq, const IndexParameters& parameters);
QueryRandstrobeVector randstrobes_query_rescue(const std::string_view seq, const IndexParameters& parameters);

struct Randstrobe {
randstrobe_hash_t hash;
Expand Down

0 comments on commit 1960bef

Please sign in to comment.