Skip to content

Commit

Permalink
Merge pull request #4421 from vgteam/path-normalize
Browse files Browse the repository at this point in the history
revert from distance index to snarl manager in path normalizer
  • Loading branch information
glennhickey authored Nov 3, 2024
2 parents ee562cc + 4f55782 commit 0097fb1
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 28 deletions.
2 changes: 1 addition & 1 deletion src/subcommand/paths_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ int main_paths(int argc, char** argv) {
for_each_selected_path([&](const path_handle_t& path_handle) {
selected_paths.insert(path_handle);
});
merge_equivalent_traversals_in_graph(mutable_graph, selected_paths);
merge_equivalent_traversals_in_graph(mutable_graph, selected_paths, true);
}
if (!to_destroy.empty()) {
mutable_graph->destroy_paths(to_destroy);
Expand Down
80 changes: 54 additions & 26 deletions src/traversal_clusters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "traversal_finder.hpp"
#include "integrated_snarl_finder.hpp"
#include "snarl_distance_index.hpp"
#include "snarls.hpp"

//#define debug

Expand Down Expand Up @@ -379,37 +380,64 @@ static void merge_equivalent_traversals_in_snarl(MutablePathHandleGraph* graph,
}
}

void merge_equivalent_traversals_in_graph(MutablePathHandleGraph* graph, const unordered_set<path_handle_t>& selected_paths) {
// compute the snarls
SnarlDistanceIndex distance_index;
{
IntegratedSnarlFinder snarl_finder(*graph);
fill_in_distance_index(&distance_index, graph, &snarl_finder, 0);
}
void merge_equivalent_traversals_in_graph(MutablePathHandleGraph* graph, const unordered_set<path_handle_t>& selected_paths,
bool use_snarl_manager) {

// only consider embedded paths that span snarl
PathTraversalFinder path_trav_finder(*graph);

// do every snarl top-down. this is because it's possible (tho probably rare) for a child snarl to
// be redundant after normalizing its parent. don't think the opposite (normalizing child)
// causes redundant parent.. todo: can we guarantee?!
net_handle_t root = distance_index.get_root();
deque<net_handle_t> queue = {root};

while (!queue.empty()) {
net_handle_t net_handle = queue.front();
queue.pop_front();
if (distance_index.is_snarl(net_handle)) {
net_handle_t start_bound = distance_index.get_bound(net_handle, false, true);
net_handle_t end_bound = distance_index.get_bound(net_handle, true, false);
handle_t start_handle = distance_index.get_handle(start_bound, graph);
handle_t end_handle = distance_index.get_handle(end_bound, graph);
if (use_snarl_manager) {
// compute the snarls using the old snarl manager
IntegratedSnarlFinder finder(*graph);
SnarlManager snarl_manager(std::move(finder.find_snarls_parallel()));

deque<const Snarl*> queue;
snarl_manager.for_each_top_level_snarl([&](const Snarl* snarl) {
queue.push_back(snarl);
});

while (!queue.empty()) {
const Snarl* snarl = queue.front();
queue.pop_front();
handle_t start_handle = graph->get_handle(snarl->start().node_id(), snarl->start().backward());
handle_t end_handle = graph->get_handle(snarl->end().node_id(), snarl->end().backward());
merge_equivalent_traversals_in_snarl(graph, selected_paths, path_trav_finder, start_handle, end_handle);
}
if (net_handle == root || distance_index.is_snarl(net_handle) || distance_index.is_chain(net_handle)) {
distance_index.for_each_child(net_handle, [&](net_handle_t child_handle) {
queue.push_back(child_handle);
});
const vector<const Snarl*>& children = snarl_manager.children_of(snarl);
for (const Snarl* child : children) {
queue.push_back(child);
}
}
} else {
// compute the snarls using the distance index
// this is what we want to do going forward since it uses the new api, no protobuf etc,
// but unfortunately it seems way slower on some graphs, hence
SnarlDistanceIndex distance_index;
{
IntegratedSnarlFinder snarl_finder(*graph);
fill_in_distance_index(&distance_index, graph, &snarl_finder, 0);
}

// do every snarl top-down. this is because it's possible (tho probably rare) for a child snarl to
// be redundant after normalizing its parent. don't think the opposite (normalizing child)
// causes redundant parent.. todo: can we guarantee?!
net_handle_t root = distance_index.get_root();
deque<net_handle_t> queue = {root};

while (!queue.empty()) {
net_handle_t net_handle = queue.front();
queue.pop_front();
if (distance_index.is_snarl(net_handle)) {
net_handle_t start_bound = distance_index.get_bound(net_handle, false, true);
net_handle_t end_bound = distance_index.get_bound(net_handle, true, false);
handle_t start_handle = distance_index.get_handle(start_bound, graph);
handle_t end_handle = distance_index.get_handle(end_bound, graph);
merge_equivalent_traversals_in_snarl(graph, selected_paths, path_trav_finder, start_handle, end_handle);
}
if (net_handle == root || distance_index.is_snarl(net_handle) || distance_index.is_chain(net_handle)) {
distance_index.for_each_child(net_handle, [&](net_handle_t child_handle) {
queue.push_back(child_handle);
});
}
}
}
}
Expand Down
5 changes: 4 additions & 1 deletion src/traversal_clusters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ vector<vector<int>> assign_child_snarls_to_traversals(const PathHandleGraph* gra
/// Note: this doesn't modify the graph toplogy, so uncovered nodes and edges as a result of path editing
/// would usually need removale with vg clip afterwards
///
void merge_equivalent_traversals_in_graph(MutablePathHandleGraph* graph, const unordered_set<path_handle_t>& selected_paths);
/// the use_snarl_manager toggles between distnace index and snarl manager for computing snarls
/// (adding this option to (hopefully) temporarily revert to the snarl manager for performance reasons)
void merge_equivalent_traversals_in_graph(MutablePathHandleGraph* graph, const unordered_set<path_handle_t>& selected_paths,
bool use_snarl_manager=false);

}

1 comment on commit 0097fb1

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17488 seconds

Please sign in to comment.