From 2446a336d171f67534ae54cb7b96a27ac49363d3 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Mon, 6 Feb 2023 16:06:02 -0600 Subject: [PATCH 01/12] prevent quadratic untangle cut point explosions --- src/algorithms/untangle.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/algorithms/untangle.cpp b/src/algorithms/untangle.cpp index 735f0b8c..066118a4 100644 --- a/src/algorithms/untangle.cpp +++ b/src/algorithms/untangle.cpp @@ -74,7 +74,10 @@ std::vector untangle_cuts( found_loop = true; } } - if (found_loop) { + if (found_loop + && !is_seen_fwd_step(step) && !is_seen_rev_step(step) + && !is_seen_fwd_step(other) && !is_seen_rev_step(other) + ) { // recurse this function into it, taking start as our current handle other side of the loop as our end // to cut_points we add the start position, the result from recursion, and our end position //std::cerr << "Found loop! " << step_index.get_position(step) << " " << step_index.get_position(other) << std::endl; @@ -120,7 +123,10 @@ std::vector untangle_cuts( found_loop = true; } } - if (found_loop) { + if (found_loop + && !is_seen_rev_step(other) && !is_seen_fwd_step(other) + && !is_seen_rev_step(step) && !is_seen_fwd_step(step) + ) { // recurse this function into it, taking start as our current handle other side of the loop as our end // to cut_points we add the start position, the result from recursion, and our end position todo.push_back(std::make_pair(other, step)); @@ -137,12 +143,13 @@ std::vector untangle_cuts( const step_handle_t& b) { return step_index.get_position(a, graph) < step_index.get_position(b, graph); }); - //auto prev_size = cut_points.size(); + auto prev_size = cut_points.size(); // then take unique positions cut_points.erase(std::unique(cut_points.begin(), cut_points.end()), cut_points.end()); - //std::cerr << "prev_size " << prev_size << " curr_size " << cut_points.size() << std::endl; + std::cerr << "[odgi untangle] cuts for " << path_name + << " prev_size " << prev_size << " curr_size " << cut_points.size() << std::endl; return cut_points; } @@ -835,7 +842,7 @@ void untangle( if (show_progress) { progress->finish(); } - + // todo: split up the graph space into regions of cut_every bp // write a map from node ids to segments // walk along every path @@ -869,7 +876,7 @@ void untangle( if (show_progress) { progress->finish(); } - + } } else { uint64_t num_cut_points_read = 0; From e08a9259bfe1ed79a0f7e0d817c8b999f4ab1e96 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Tue, 7 Feb 2023 15:12:34 -0600 Subject: [PATCH 02/12] put the seen current step bailout in the right place --- src/algorithms/untangle.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/algorithms/untangle.cpp b/src/algorithms/untangle.cpp index 066118a4..f7cb8dfa 100644 --- a/src/algorithms/untangle.cpp +++ b/src/algorithms/untangle.cpp @@ -52,7 +52,7 @@ std::vector untangle_cuts( for (step_handle_t step = start; step != end; step = graph.get_next_step(step)) { // we take the first and shortest loop we find // TODO change this, it can be computed based on the node length - if (is_seen_fwd_step(step)) { + if (is_seen_fwd_step(step) || is_seen_rev_step(step)) { continue; } auto curr_pos = step_index.get_position(step, graph); @@ -75,7 +75,6 @@ std::vector untangle_cuts( } } if (found_loop - && !is_seen_fwd_step(step) && !is_seen_rev_step(step) && !is_seen_fwd_step(other) && !is_seen_rev_step(other) ) { // recurse this function into it, taking start as our current handle other side of the loop as our end @@ -98,7 +97,7 @@ std::vector untangle_cuts( for (step_handle_t step = end; step_index.get_position(step, graph) > start_pos; step = graph.get_previous_step(step)) { - if (is_seen_rev_step(step)) { + if (is_seen_rev_step(step) || is_seen_fwd_step(step)) { continue; } // we take the first and shortest loop we find @@ -125,7 +124,6 @@ std::vector untangle_cuts( } if (found_loop && !is_seen_rev_step(other) && !is_seen_fwd_step(other) - && !is_seen_rev_step(step) && !is_seen_fwd_step(step) ) { // recurse this function into it, taking start as our current handle other side of the loop as our end // to cut_points we add the start position, the result from recursion, and our end position From 547e4d76f340dc0da34bae1522308e9b97efd0c9 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Tue, 7 Feb 2023 16:12:12 -0600 Subject: [PATCH 03/12] compute position based on length for a major speedup --- src/algorithms/untangle.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/algorithms/untangle.cpp b/src/algorithms/untangle.cpp index f7cb8dfa..7ff035c3 100644 --- a/src/algorithms/untangle.cpp +++ b/src/algorithms/untangle.cpp @@ -44,6 +44,7 @@ std::vector untangle_cuts( auto start = todo.front().first; auto end = todo.front().second; uint64_t start_pos = step_index.get_position(start, graph); + uint64_t curr_pos = start_pos; uint64_t end_pos = step_index.get_position(end, graph); //std::cerr << "todo: " << start_pos << " " << end_pos << std::endl; cut_points.push_back(start); @@ -51,12 +52,11 @@ std::vector untangle_cuts( // we go forward until we see a loop, where the other step has position < end_pos and > start_pos for (step_handle_t step = start; step != end; step = graph.get_next_step(step)) { // we take the first and shortest loop we find - // TODO change this, it can be computed based on the node length + handle_t handle = graph.get_handle_of_step(step); if (is_seen_fwd_step(step) || is_seen_rev_step(step)) { + curr_pos += graph.get_length(handle); continue; } - auto curr_pos = step_index.get_position(step, graph); - handle_t handle = graph.get_handle_of_step(step); if (is_cut(handle)) { cut_points.push_back(step); } @@ -84,6 +84,7 @@ std::vector untangle_cuts( // we then step forward to the loop end and continue iterating step = other; } + curr_pos += graph.get_length(handle); } // TODO this block is the same as the previous one, but in reverse // the differences in how positions are managed are subtle, making it hard to fold the @@ -97,13 +98,12 @@ std::vector untangle_cuts( for (step_handle_t step = end; step_index.get_position(step, graph) > start_pos; step = graph.get_previous_step(step)) { + handle_t handle = graph.get_handle_of_step(step); + curr_pos -= graph.get_length(handle); if (is_seen_rev_step(step) || is_seen_fwd_step(step)) { continue; } // we take the first and shortest loop we find - // TODO change this, it can be computed based on the node length - auto curr_pos = step_index.get_position(step, graph); - handle_t handle = graph.get_handle_of_step(step); if (is_cut(handle)) { cut_points.push_back(step); } @@ -146,8 +146,8 @@ std::vector untangle_cuts( cut_points.erase(std::unique(cut_points.begin(), cut_points.end()), cut_points.end()); - std::cerr << "[odgi untangle] cuts for " << path_name - << " prev_size " << prev_size << " curr_size " << cut_points.size() << std::endl; + //std::cerr << "[odgi untangle] cuts for " << path_name + // << " prev_size " << prev_size << " curr_size " << cut_points.size() << std::endl; return cut_points; } From 5f3bfd2a561adc6645c86820f1dba10acb782d1f Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Thu, 9 Feb 2023 21:18:36 -0600 Subject: [PATCH 04/12] this seems to improve a bit --- src/algorithms/untangle.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/algorithms/untangle.cpp b/src/algorithms/untangle.cpp index 7ff035c3..8f9535c2 100644 --- a/src/algorithms/untangle.cpp +++ b/src/algorithms/untangle.cpp @@ -99,8 +99,8 @@ std::vector untangle_cuts( step_index.get_position(step, graph) > start_pos; step = graph.get_previous_step(step)) { handle_t handle = graph.get_handle_of_step(step); - curr_pos -= graph.get_length(handle); if (is_seen_rev_step(step) || is_seen_fwd_step(step)) { + curr_pos -= graph.get_length(handle); continue; } // we take the first and shortest loop we find @@ -131,6 +131,7 @@ std::vector untangle_cuts( // we then step forward to the loop end and continue iterating step = other; } + curr_pos -= graph.get_length(handle); } cut_points.push_back(end); } From 2c78159a1b4bf122493075e436ea9c53033f430f Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Tue, 14 Feb 2023 14:55:18 -0600 Subject: [PATCH 05/12] resolve inconsistency with master --- src/algorithms/untangle.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/algorithms/untangle.cpp b/src/algorithms/untangle.cpp index 8f9535c2..c627dc33 100644 --- a/src/algorithms/untangle.cpp +++ b/src/algorithms/untangle.cpp @@ -53,7 +53,7 @@ std::vector untangle_cuts( for (step_handle_t step = start; step != end; step = graph.get_next_step(step)) { // we take the first and shortest loop we find handle_t handle = graph.get_handle_of_step(step); - if (is_seen_fwd_step(step) || is_seen_rev_step(step)) { + if (is_seen_fwd_step(step)) { curr_pos += graph.get_length(handle); continue; } @@ -74,9 +74,7 @@ std::vector untangle_cuts( found_loop = true; } } - if (found_loop - && !is_seen_fwd_step(other) && !is_seen_rev_step(other) - ) { + if (found_loop) { // recurse this function into it, taking start as our current handle other side of the loop as our end // to cut_points we add the start position, the result from recursion, and our end position //std::cerr << "Found loop! " << step_index.get_position(step) << " " << step_index.get_position(other) << std::endl; @@ -94,12 +92,14 @@ std::vector untangle_cuts( if (end == path_begin || !graph.has_previous_step(end)) { return cut_points; } + step_handle_t _step = graph.get_previous_step(end); + curr_pos = step_index.get_position(_step, graph); //std::cerr << "reversing" << std::endl; - for (step_handle_t step = end; - step_index.get_position(step, graph) > start_pos; + for (step_handle_t step = _step; + step != start; step = graph.get_previous_step(step)) { handle_t handle = graph.get_handle_of_step(step); - if (is_seen_rev_step(step) || is_seen_fwd_step(step)) { + if (is_seen_rev_step(step)) { curr_pos -= graph.get_length(handle); continue; } @@ -122,9 +122,7 @@ std::vector untangle_cuts( found_loop = true; } } - if (found_loop - && !is_seen_rev_step(other) && !is_seen_fwd_step(other) - ) { + if (found_loop) { // recurse this function into it, taking start as our current handle other side of the loop as our end // to cut_points we add the start position, the result from recursion, and our end position todo.push_back(std::make_pair(other, step)); From 2868c519414b5cae63903563b19ea3f3ef43b5c0 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Wed, 15 Feb 2023 17:00:06 -0600 Subject: [PATCH 06/12] avoid position index but keep track of correct position --- src/algorithms/untangle.cpp | 81 +++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/src/algorithms/untangle.cpp b/src/algorithms/untangle.cpp index c627dc33..0f18e93f 100644 --- a/src/algorithms/untangle.cpp +++ b/src/algorithms/untangle.cpp @@ -14,14 +14,12 @@ std::vector untangle_cuts( const std::function& is_cut) { auto path = graph.get_path_handle_of_step(_start); auto path_name = graph.get_path_name(path); - // this assumes that the end is not inclusive + /* std::cerr << "untangle_cuts(" << path_name << ", " << step_index.get_position(_start) << ", " << step_index.get_position(_end) << ")" << std::endl; */ - // TODO make a vector of handles we've seen as segment starts and of segment ends - // to avoid duplicating work std::vector seen_fwd_step(self_index.step_count); std::vector seen_rev_step(self_index.step_count); @@ -37,7 +35,24 @@ std::vector untangle_cuts( auto mark_seen_rev_step = [&seen_rev_step,&self_index](const step_handle_t& step) { seen_rev_step[self_index.get_step_idx(step)] = true; }; - std::vector cut_points; + + std::vector> cut_points; + auto clean_cut_points = [&cut_points,&step_index,&graph](void) { + std::sort(cut_points.begin(), + cut_points.end(), + [&](const std::pair& a, + const std::pair& b) { + return a.first < b.first; + }); + std::vector c; + for (auto& x : cut_points) c.push_back(x.second); + // then take unique positions + c.erase(std::unique(c.begin(), + c.end()), + c.end()); + return c; + }; + std::deque> todo; todo.push_back(std::make_pair(_start, _end)); while (!todo.empty()) { @@ -47,7 +62,7 @@ std::vector untangle_cuts( uint64_t curr_pos = start_pos; uint64_t end_pos = step_index.get_position(end, graph); //std::cerr << "todo: " << start_pos << " " << end_pos << std::endl; - cut_points.push_back(start); + cut_points.push_back(std::pair(curr_pos, start)); todo.pop_front(); // we go forward until we see a loop, where the other step has position < end_pos and > start_pos for (step_handle_t step = start; step != end; step = graph.get_next_step(step)) { @@ -58,14 +73,22 @@ std::vector untangle_cuts( continue; } if (is_cut(handle)) { - cut_points.push_back(step); + /* + if (curr_pos != step_index.get_position(step, graph)) { + std::cerr << "position error fwd " << curr_pos << " " << step_index.get_position(step, graph) << std::endl; + exit(1); + } + */ + cut_points.push_back(std::pair(curr_pos, step)); + //cut_points.push_back(std::make_pair(step_index.get_position(step, graph), step)); } mark_seen_fwd_step(step); bool found_loop = false; step_handle_t other; + uint64_t other_pos = 0; auto x = self_index.get_next_step_on_node(graph.get_id(handle), step); if (x.first) { - auto other_pos = step_index.get_position(x.second, graph); + other_pos = step_index.get_position(x.second, graph); if (other_pos > start_pos && other_pos < end_pos && other_pos > curr_pos @@ -80,9 +103,11 @@ std::vector untangle_cuts( //std::cerr << "Found loop! " << step_index.get_position(step) << " " << step_index.get_position(other) << std::endl; todo.push_back(std::make_pair(step, other)); // we then step forward to the loop end and continue iterating + curr_pos = other_pos + graph.get_length(graph.get_handle_of_step(other)); step = other; + } else { + curr_pos += graph.get_length(handle); } - curr_pos += graph.get_length(handle); } // TODO this block is the same as the previous one, but in reverse // the differences in how positions are managed are subtle, making it hard to fold the @@ -90,30 +115,38 @@ std::vector untangle_cuts( // now we reverse it step_handle_t path_begin = graph.path_begin(path); if (end == path_begin || !graph.has_previous_step(end)) { - return cut_points; + return clean_cut_points(); } - step_handle_t _step = graph.get_previous_step(end); - curr_pos = step_index.get_position(_step, graph); + //step_handle_t _step = graph.get_previous_step(end); + curr_pos = step_index.get_position(end, graph) + graph.get_length(graph.get_handle_of_step(end)); + //cut_points.push_back(std::make_pair(curr_pos, end)); //std::cerr << "reversing" << std::endl; - for (step_handle_t step = _step; + for (step_handle_t step = end; step != start; step = graph.get_previous_step(step)) { handle_t handle = graph.get_handle_of_step(step); + curr_pos -= graph.get_length(handle); if (is_seen_rev_step(step)) { - curr_pos -= graph.get_length(handle); continue; } // we take the first and shortest loop we find if (is_cut(handle)) { - cut_points.push_back(step); + /* + if (curr_pos != step_index.get_position(step, graph)) { + std::cerr << "position error rev " << curr_pos << " " << step_index.get_position(step, graph) << std::endl; + exit(1); + } + */ + cut_points.push_back(std::pair(curr_pos, step)); } mark_seen_rev_step(step); //std::cerr << "rev on step " << graph.get_id(handle) << " " << curr_pos << std::endl; bool found_loop = false; step_handle_t other; auto x = self_index.get_prev_step_on_node(graph.get_id(handle), step); + uint64_t other_pos = 0; if (x.first) { - auto other_pos = step_index.get_position(x.second, graph); + other_pos = step_index.get_position(x.second, graph); if (other_pos > start_pos && other_pos < end_pos && other_pos < curr_pos @@ -127,27 +160,13 @@ std::vector untangle_cuts( // to cut_points we add the start position, the result from recursion, and our end position todo.push_back(std::make_pair(other, step)); // we then step forward to the loop end and continue iterating + curr_pos = other_pos; step = other; } - curr_pos -= graph.get_length(handle); } - cut_points.push_back(end); } // and sort - std::sort(cut_points.begin(), - cut_points.end(), - [&](const step_handle_t& a, - const step_handle_t& b) { - return step_index.get_position(a, graph) < step_index.get_position(b, graph); - }); - auto prev_size = cut_points.size(); - // then take unique positions - cut_points.erase(std::unique(cut_points.begin(), - cut_points.end()), - cut_points.end()); - //std::cerr << "[odgi untangle] cuts for " << path_name - // << " prev_size " << prev_size << " curr_size " << cut_points.size() << std::endl; - return cut_points; + return clean_cut_points(); } void write_cuts( From 23226f9ff02a8a9cd575394c8e2ffd6674c451db Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Wed, 15 Feb 2023 17:16:42 -0600 Subject: [PATCH 07/12] allow adjustment of sampling rate for dynamically generated step indexes --- src/subcommand/untangle_main.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/subcommand/untangle_main.cpp b/src/subcommand/untangle_main.cpp index bcdcc179..a565a6d8 100644 --- a/src/subcommand/untangle_main.cpp +++ b/src/subcommand/untangle_main.cpp @@ -62,8 +62,10 @@ int main_untangle(int argc, char **argv) { args::Flag make_self_dotplot(debugging_opts, "DOTPLOT", "Render a table showing the positional dotplot of the query against itself.", {'S', "self-dotplot"}); args::Group step_index_opts(parser, "[ Step Index Options ]"); - args::ValueFlag _step_index(step_index_opts, "FILE", "Load the step index from this *FILE*. The file name usually ends with *.stpidx*. (default: build the step index from scratch with a sampling rate of 8).", + args::ValueFlag _step_index(step_index_opts, "FILE", "Load the step index from this *FILE*. The file name usually ends with *.stpidx*. (default: build the step index from scratch with sampling rate 8).", {'a', "step-index"}); + args::ValueFlag _step_sampling(step_index_opts, "N", "Sampling rate to use for step index construction. (default: 8).", + {'D', "step-sampling"}); args::Group threading(parser, "[ Threading ]"); args::ValueFlag nthreads( threading, "N", "Number of threads to use for parallel operations.", {'t', "threads"}); @@ -208,13 +210,14 @@ int main_untangle(int argc, char **argv) { } } else { if (!_step_index) { + auto step_sampling = _step_sampling ? args::get(_step_sampling) : 8; if (progress) { std::cerr - << "[odgi::untangle] warning: no step index specified. Building one with a sample rate of 8. This may take additional time. " - "A step index can be provided via -a, --step-index. A step index can be built using odgi stepindex." - << std::endl; + << "[odgi::untangle] warning: no step index specified. Building one with a sample rate of " << step_sampling << ". This may take additional time. " + "A step index can be provided via -a, --step-index. A step index can be built using odgi stepindex." + << std::endl; } - algorithms::step_index_t step_index(graph, paths, num_threads, progress, 8); + algorithms::step_index_t step_index(graph, paths, num_threads, progress, step_sampling); algorithms::untangle(graph, query_paths, target_paths, From fa95f780bbd2602f4b18a60d6b99f345ca6ec387 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Wed, 15 Feb 2023 17:31:13 -0600 Subject: [PATCH 08/12] make the path selfindex also record positions to reduce nonlocality --- src/algorithms/stepindex.cpp | 12 ++++++------ src/algorithms/stepindex.hpp | 8 +++++--- src/algorithms/untangle.cpp | 12 ++++++------ 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/algorithms/stepindex.cpp b/src/algorithms/stepindex.cpp index cf9332c9..194559ae 100644 --- a/src/algorithms/stepindex.cpp +++ b/src/algorithms/stepindex.cpp @@ -275,14 +275,14 @@ path_step_index_t::path_step_index_t(const PathHandleGraph& graph, node_offset[0] = 0; // first offset is 0 by definition for (auto& node_step : steps_by_node) { auto& idx = std::get<0>(node_step); - //auto& offset = std::get<1>(node_step); // just used for sorting + auto& offset = std::get<1>(node_step); // just used for sorting auto& step = std::get<2>(node_step); //std::cerr << "idx = " << idx << " " << as_integers(step)[0] << ":" << as_integers(step)[1] << std::endl; if (idx != last_idx) { node_offset[idx] = node_steps.size(); } step_offset[step_mphf->lookup(step)] = node_steps.size(); - node_steps.push_back(step); + node_steps.push_back(std::make_pair(step, offset)); last_idx = idx; } if (last_idx != node_count-1) { @@ -313,7 +313,7 @@ uint64_t path_step_index_t::n_steps_on_node(const nid_t& id) const { return node_offset[idx+1] - node_offset[idx]; } -std::pair +std::pair> path_step_index_t::get_next_step_on_node(const nid_t& id, const step_handle_t& step) const { auto node_idx = get_node_idx(id); auto curr_steps = node_offset[node_idx]; @@ -323,12 +323,12 @@ path_step_index_t::get_next_step_on_node(const nid_t& id, const step_handle_t& s if (has_next) { return std::make_pair(true, node_steps[step_idx+1]); } else { - step_handle_t empty_step; + std::pair empty_step; return std::make_pair(false, empty_step); } } -std::pair +std::pair> path_step_index_t::get_prev_step_on_node(const nid_t& id, const step_handle_t& step) const { auto curr_steps = node_offset[get_node_idx(id)]; auto step_idx = step_offset[get_step_idx(step)]; @@ -336,7 +336,7 @@ path_step_index_t::get_prev_step_on_node(const nid_t& id, const step_handle_t& s if (has_prev) { return std::make_pair(true, node_steps[step_idx-1]); } else { - step_handle_t empty_step; + std::pair empty_step; return std::make_pair(false, empty_step); } } diff --git a/src/algorithms/stepindex.hpp b/src/algorithms/stepindex.hpp index 3db8d5de..edf504ab 100644 --- a/src/algorithms/stepindex.hpp +++ b/src/algorithms/stepindex.hpp @@ -99,7 +99,7 @@ struct path_step_index_t { // map to the beginning of a range in node_steps std::vector node_offset; // record the steps in positional order by node (index given in node_offset) - std::vector node_steps; + std::vector> node_steps; // map from step to an index in step_offset boophf_step_t* step_mphf = nullptr; // index in handle_steps for the given step @@ -114,9 +114,11 @@ struct path_step_index_t { uint64_t n_steps_on_node(const nid_t& id) const; // these functions require, but do not check, that our step is in the indexed path // next step on node (sorted by position in path), (false, _) if there is no next step - std::pair get_next_step_on_node(const nid_t& id, const step_handle_t& step) const; + std::pair> + get_next_step_on_node(const nid_t& id, const step_handle_t& step) const; // prev step on node (sorted by position in path), (false, _) if there is no next step - std::pair get_prev_step_on_node(const nid_t& id, const step_handle_t& step) const; + std::pair> + get_prev_step_on_node(const nid_t& id, const step_handle_t& step) const; }; } diff --git a/src/algorithms/untangle.cpp b/src/algorithms/untangle.cpp index 0f18e93f..c0b47eda 100644 --- a/src/algorithms/untangle.cpp +++ b/src/algorithms/untangle.cpp @@ -88,12 +88,12 @@ std::vector untangle_cuts( uint64_t other_pos = 0; auto x = self_index.get_next_step_on_node(graph.get_id(handle), step); if (x.first) { - other_pos = step_index.get_position(x.second, graph); + other_pos = x.second.second; if (other_pos > start_pos && other_pos < end_pos && other_pos > curr_pos - && !is_seen_fwd_step(x.second)) { - other = x.second; + && !is_seen_fwd_step(x.second.first)) { + other = x.second.first; found_loop = true; } } @@ -146,12 +146,12 @@ std::vector untangle_cuts( auto x = self_index.get_prev_step_on_node(graph.get_id(handle), step); uint64_t other_pos = 0; if (x.first) { - other_pos = step_index.get_position(x.second, graph); + other_pos = x.second.second; if (other_pos > start_pos && other_pos < end_pos && other_pos < curr_pos - && !is_seen_rev_step(x.second)) { - other = x.second; + && !is_seen_rev_step(x.second.first)) { + other = x.second.first; found_loop = true; } } From 066bef0f7b1b60f07d56d9e4604a035c416b3c1b Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Tue, 21 Feb 2023 15:16:43 -0600 Subject: [PATCH 09/12] minor optimization --- src/algorithms/untangle.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/algorithms/untangle.cpp b/src/algorithms/untangle.cpp index c0b47eda..88e3a53f 100644 --- a/src/algorithms/untangle.cpp +++ b/src/algorithms/untangle.cpp @@ -30,10 +30,16 @@ std::vector untangle_cuts( return seen_rev_step[self_index.get_step_idx(step)]; }; auto mark_seen_fwd_step = [&seen_fwd_step,&self_index](const step_handle_t& step) { - seen_fwd_step[self_index.get_step_idx(step)] = true; + auto idx = self_index.get_step_idx(step); + bool state = seen_fwd_step[idx]; + seen_fwd_step[idx] = true; + return state; }; auto mark_seen_rev_step = [&seen_rev_step,&self_index](const step_handle_t& step) { - seen_rev_step[self_index.get_step_idx(step)] = true; + auto idx = self_index.get_step_idx(step); + bool state = seen_rev_step[idx]; + seen_rev_step[idx] = true; + return state; }; std::vector> cut_points; @@ -65,10 +71,12 @@ std::vector untangle_cuts( cut_points.push_back(std::pair(curr_pos, start)); todo.pop_front(); // we go forward until we see a loop, where the other step has position < end_pos and > start_pos - for (step_handle_t step = start; step != end; step = graph.get_next_step(step)) { + for (step_handle_t step = start; + step != end; + step = graph.get_next_step(step)) { // we take the first and shortest loop we find handle_t handle = graph.get_handle_of_step(step); - if (is_seen_fwd_step(step)) { + if (mark_seen_fwd_step(step)) { curr_pos += graph.get_length(handle); continue; } @@ -82,7 +90,6 @@ std::vector untangle_cuts( cut_points.push_back(std::pair(curr_pos, step)); //cut_points.push_back(std::make_pair(step_index.get_position(step, graph), step)); } - mark_seen_fwd_step(step); bool found_loop = false; step_handle_t other; uint64_t other_pos = 0; @@ -126,7 +133,7 @@ std::vector untangle_cuts( step = graph.get_previous_step(step)) { handle_t handle = graph.get_handle_of_step(step); curr_pos -= graph.get_length(handle); - if (is_seen_rev_step(step)) { + if (mark_seen_rev_step(step)) { continue; } // we take the first and shortest loop we find @@ -139,7 +146,6 @@ std::vector untangle_cuts( */ cut_points.push_back(std::pair(curr_pos, step)); } - mark_seen_rev_step(step); //std::cerr << "rev on step " << graph.get_id(handle) << " " << curr_pos << std::endl; bool found_loop = false; step_handle_t other; From 861fcc33b211e4add6c07bdbb56cc563fe17df25 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Tue, 21 Feb 2023 17:36:47 -0600 Subject: [PATCH 10/12] make it possible to skip the node spinlock with graph_t::set_static --- CMakeLists.txt | 1 + src/node.cpp | 11 ++++++++++- src/node.hpp | 15 +++++++++++---- src/odgi.cpp | 12 ++++++++++++ src/odgi.hpp | 6 ++++++ src/subcommand/untangle_main.cpp | 1 + 6 files changed, 41 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28318875..0172ace5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,7 @@ endif() # Set optimization through command line; see INSTALL.md if (${CMAKE_BUILD_TYPE} MATCHES Release) set(EXTRA_FLAGS "-Ofast -march=native -pipe -msse4.2 -funroll-all-loops") # -fprofile-generate=../pgo") + #set(EXTRA_FLAGS "-O0 -march=native -msse4.2 -pg") # for profiling with gprof set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") # reset CXX_FLAGS to be able to replace -O3 with -Ofast endif () diff --git a/src/node.cpp b/src/node.cpp index 04cfca13..51bb3c70 100644 --- a/src/node.cpp +++ b/src/node.cpp @@ -22,6 +22,14 @@ const std::string& node_t::get_sequence() const { return sequence; } +void node_t::set_volatile(void) { + dynamic = true; +} + +void node_t::set_static(void) { + dynamic = false; +} + // encode an internal representation of an external id (adding if none exists) uint64_t node_t::encode(const uint64_t& other_id) { uint64_t delta = to_delta(other_id); @@ -333,6 +341,7 @@ void node_t::clear_encoding() { void node_t::copy(const node_t& other) { clear(); id = other.id; + dynamic = other.dynamic; sequence = other.sequence; edges = other.edges; decoding = other.decoding; @@ -439,7 +448,7 @@ void node_t::load(std::istream& in) { in.read((char*)sequence.c_str(), len*sizeof(uint8_t)); in.read((char*)&id, sizeof(id)); edges.load(in); - decoding.load(in); + decoding.load(in); paths.load(in); //display(); } diff --git a/src/node.hpp b/src/node.hpp index 3c91c651..277e729c 100644 --- a/src/node.hpp +++ b/src/node.hpp @@ -26,6 +26,7 @@ const uint8_t PATH_RECORD_LENGTH = 6; class node_t { uint64_t id = 0; std::atomic_flag lock = ATOMIC_FLAG_INIT; + bool dynamic = true; std::string sequence; dyn::hacked_vector edges; dyn::hacked_vector decoding; @@ -82,16 +83,20 @@ class node_t { return type & (1 << 3); } }; - + public: node_t(void); // constructor // locking methods inline void get_lock(void) { - while (lock.test_and_set(std::memory_order_acquire)) // acquire lock - ; // spin + if (dynamic) { + while (lock.test_and_set(std::memory_order_acquire)) // acquire lock + ; // spin + } } inline void clear_lock(void) { - lock.clear(std::memory_order_release); + if (dynamic) { + lock.clear(std::memory_order_release); + } } inline const uint64_t edge_count(void) const { return edges.size()/EDGE_RECORD_LENGTH; } inline const uint64_t path_count(void) const { return paths.size()/PATH_RECORD_LENGTH; } @@ -114,6 +119,8 @@ class node_t { void set_sequence(const std::string& seq); const uint64_t& get_id(void) const; void set_id(const uint64_t& new_id); + void set_volatile(void); + void set_static(void); void for_each_edge(const std::functionset_static(); + } +} + +void graph_t::set_volatile(void) { + for (auto& node : node_v) { + node->set_volatile(); + } +} + } diff --git a/src/odgi.hpp b/src/odgi.hpp index 7f2d5eb1..495df37f 100644 --- a/src/odgi.hpp +++ b/src/odgi.hpp @@ -521,6 +521,12 @@ class graph_t : public MutablePathDeletableHandleGraph, public SerializableHandl /// get the backing node rank for a given node id uint64_t get_node_rank(const nid_t& node_id) const; + /// set the graph into static mode, which avoids spinlocks + void set_static(void); + + /// set the graph into volatile mode, which uses (spin)locks for dynamic hogwilding + void set_volatile(void); + }; //const static uint64_t path_begin_marker = std::numeric_limits::max(); diff --git a/src/subcommand/untangle_main.cpp b/src/subcommand/untangle_main.cpp index a565a6d8..25570e27 100644 --- a/src/subcommand/untangle_main.cpp +++ b/src/subcommand/untangle_main.cpp @@ -112,6 +112,7 @@ int main_untangle(int argc, char **argv) { graph); } } + graph.set_static(); } // path loading From 8a626e7cf47a9ed3f9ec8f4d6e9a6924ef227980 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Thu, 9 Mar 2023 14:49:22 +0100 Subject: [PATCH 11/12] rewrite the self step index to store the path as a vector of steps and return iterators into it --- src/algorithms/stepindex.cpp | 43 ++++++++++------- src/algorithms/stepindex.hpp | 16 +++++-- src/algorithms/untangle.cpp | 93 +++++++++++++++++++----------------- src/algorithms/untangle.hpp | 6 ++- 4 files changed, 90 insertions(+), 68 deletions(-) diff --git a/src/algorithms/stepindex.cpp b/src/algorithms/stepindex.cpp index 194559ae..4d55f809 100644 --- a/src/algorithms/stepindex.cpp +++ b/src/algorithms/stepindex.cpp @@ -226,7 +226,6 @@ path_step_index_t::path_step_index_t(const PathHandleGraph& graph, const uint64_t& nthreads) { // iterate through the paths, recording steps in the structure we'll use to build the mphf { - std::vector steps; std::vector nodes; graph.for_each_step_in_path( path, [&](const step_handle_t& step) { @@ -254,15 +253,12 @@ path_step_index_t::path_step_index_t(const PathHandleGraph& graph, // here, we sort steps by handle and then position // and build our handle->step list and step->offset maps // these are steps sorted by the bbhash of their node id, and then their offset in the path - std::vector> steps_by_node; + std::vector> steps_by_node; uint64_t offset = 0; - graph.for_each_step_in_path( - path, [&](const step_handle_t& step) { - steps_by_node.push_back(std::make_tuple(node_mphf->lookup(graph.get_id(graph.get_handle_of_step(step))), - offset, - step)); - offset += graph.get_length(graph.get_handle_of_step(step)); - }); + for (step_it step = steps.begin(); step != steps.end(); ++step) { + steps_by_node.emplace_back(node_mphf->lookup(graph.get_id(graph.get_handle_of_step(*step))), offset, step); + offset += graph.get_length(graph.get_handle_of_step(*step)); + } if (offset == 0) { std::cerr << "[odgi::algorithms::stepindex] unable to index empty path " << graph.get_path_name(path) << std::endl; std::abort(); @@ -281,7 +277,7 @@ path_step_index_t::path_step_index_t(const PathHandleGraph& graph, if (idx != last_idx) { node_offset[idx] = node_steps.size(); } - step_offset[step_mphf->lookup(step)] = node_steps.size(); + step_offset[step_mphf->lookup(*step)] = node_steps.size(); node_steps.push_back(std::make_pair(step, offset)); last_idx = idx; } @@ -313,7 +309,7 @@ uint64_t path_step_index_t::n_steps_on_node(const nid_t& id) const { return node_offset[idx+1] - node_offset[idx]; } -std::pair> +std::pair> path_step_index_t::get_next_step_on_node(const nid_t& id, const step_handle_t& step) const { auto node_idx = get_node_idx(id); auto curr_steps = node_offset[node_idx]; @@ -321,25 +317,36 @@ path_step_index_t::get_next_step_on_node(const nid_t& id, const step_handle_t& s auto step_idx = step_offset[get_step_idx(step)]; bool has_next = step_idx + 1 < next_steps; if (has_next) { - return std::make_pair(true, node_steps[step_idx+1]); + return std::pair(true, node_steps[step_idx+1]); } else { - std::pair empty_step; - return std::make_pair(false, empty_step); + auto empty_step = steps.end(); + return std::pair(false, std::pair(empty_step, 0)); } } -std::pair> +std::pair> path_step_index_t::get_prev_step_on_node(const nid_t& id, const step_handle_t& step) const { auto curr_steps = node_offset[get_node_idx(id)]; auto step_idx = step_offset[get_step_idx(step)]; bool has_prev = step_idx > curr_steps; if (has_prev) { - return std::make_pair(true, node_steps[step_idx-1]); + return std::pair(true, node_steps[step_idx-1]); } else { - std::pair empty_step; - return std::make_pair(false, empty_step); + auto empty_step = steps.end(); + return std::pair(false, std::pair(empty_step, 0)); } } +// get the first step in the path +path_step_index_t::step_it path_step_index_t::path_begin(void) const { + return steps.begin(); +} + +// get the last step in the path +path_step_index_t::step_it path_step_index_t::path_back(void) const { + return steps.end()-1; +} + + } } diff --git a/src/algorithms/stepindex.hpp b/src/algorithms/stepindex.hpp index edf504ab..c91e7d22 100644 --- a/src/algorithms/stepindex.hpp +++ b/src/algorithms/stepindex.hpp @@ -96,10 +96,14 @@ struct path_step_index_t { ~path_step_index_t(void); // map from node id in the path to an index in node_offsets boophf_uint64_t* node_mphf = nullptr; + // steps in the path + std::vector steps; + // iterator typedef for convenience + typedef std::vector::const_iterator step_it; // map to the beginning of a range in node_steps std::vector node_offset; // record the steps in positional order by node (index given in node_offset) - std::vector> node_steps; + std::vector> node_steps; // map from step to an index in step_offset boophf_step_t* step_mphf = nullptr; // index in handle_steps for the given step @@ -114,11 +118,17 @@ struct path_step_index_t { uint64_t n_steps_on_node(const nid_t& id) const; // these functions require, but do not check, that our step is in the indexed path // next step on node (sorted by position in path), (false, _) if there is no next step - std::pair> + std::pair> get_next_step_on_node(const nid_t& id, const step_handle_t& step) const; // prev step on node (sorted by position in path), (false, _) if there is no next step - std::pair> + std::pair> get_prev_step_on_node(const nid_t& id, const step_handle_t& step) const; + + // get the first step in the path + step_it path_begin(void) const; + + // get the last step in the path + step_it path_back(void) const; }; } diff --git a/src/algorithms/untangle.cpp b/src/algorithms/untangle.cpp index 88e3a53f..5041b40e 100644 --- a/src/algorithms/untangle.cpp +++ b/src/algorithms/untangle.cpp @@ -7,13 +7,13 @@ using namespace handlegraph; std::vector untangle_cuts( const PathHandleGraph& graph, - const step_handle_t& _start, - const step_handle_t& _end, + const path_handle_t& path, + const std::string& path_name, + const path_step_index_t::step_it& _start, + const path_step_index_t::step_it& _end, const step_index_t& step_index, const path_step_index_t& self_index, const std::function& is_cut) { - auto path = graph.get_path_handle_of_step(_start); - auto path_name = graph.get_path_name(path); /* std::cerr << "untangle_cuts(" << path_name << ", " @@ -21,24 +21,22 @@ std::vector untangle_cuts( << step_index.get_position(_end) << ")" << std::endl; */ - std::vector seen_fwd_step(self_index.step_count); - std::vector seen_rev_step(self_index.step_count); - auto is_seen_fwd_step = [&seen_fwd_step,&self_index](const step_handle_t& step) { - return seen_fwd_step[self_index.get_step_idx(step)]; + ska::flat_hash_set seen_fwd_step; + ska::flat_hash_set seen_rev_step; + auto is_seen_fwd_step = [&seen_fwd_step,&self_index](const path_step_index_t::step_it& step) { + return seen_fwd_step.count(&*step); }; - auto is_seen_rev_step = [&seen_rev_step,&self_index](const step_handle_t& step) { - return seen_rev_step[self_index.get_step_idx(step)]; + auto is_seen_rev_step = [&seen_rev_step,&self_index](const path_step_index_t::step_it& step) { + return seen_rev_step.count(&*step); }; - auto mark_seen_fwd_step = [&seen_fwd_step,&self_index](const step_handle_t& step) { - auto idx = self_index.get_step_idx(step); - bool state = seen_fwd_step[idx]; - seen_fwd_step[idx] = true; + auto mark_seen_fwd_step = [&seen_fwd_step,&self_index](const path_step_index_t::step_it& step) { + bool state = seen_fwd_step.count(&*step); + seen_fwd_step.insert(&*step); return state; }; - auto mark_seen_rev_step = [&seen_rev_step,&self_index](const step_handle_t& step) { - auto idx = self_index.get_step_idx(step); - bool state = seen_rev_step[idx]; - seen_rev_step[idx] = true; + auto mark_seen_rev_step = [&seen_rev_step,&self_index](const path_step_index_t::step_it& step) { + bool state = seen_rev_step.count(&*step); + seen_rev_step.insert(&*step); return state; }; @@ -59,23 +57,21 @@ std::vector untangle_cuts( return c; }; - std::deque> todo; + std::deque> todo; todo.push_back(std::make_pair(_start, _end)); while (!todo.empty()) { auto start = todo.front().first; auto end = todo.front().second; - uint64_t start_pos = step_index.get_position(start, graph); + uint64_t start_pos = step_index.get_position(*start, graph); uint64_t curr_pos = start_pos; - uint64_t end_pos = step_index.get_position(end, graph); + uint64_t end_pos = step_index.get_position(*end, graph); //std::cerr << "todo: " << start_pos << " " << end_pos << std::endl; - cut_points.push_back(std::pair(curr_pos, start)); + cut_points.push_back(std::pair(curr_pos, *start)); todo.pop_front(); // we go forward until we see a loop, where the other step has position < end_pos and > start_pos - for (step_handle_t step = start; - step != end; - step = graph.get_next_step(step)) { + for (auto step = start; step != end; ++step) { // we take the first and shortest loop we find - handle_t handle = graph.get_handle_of_step(step); + handle_t handle = graph.get_handle_of_step(*step); if (mark_seen_fwd_step(step)) { curr_pos += graph.get_length(handle); continue; @@ -87,13 +83,13 @@ std::vector untangle_cuts( exit(1); } */ - cut_points.push_back(std::pair(curr_pos, step)); + cut_points.push_back(std::pair(curr_pos, *step)); //cut_points.push_back(std::make_pair(step_index.get_position(step, graph), step)); } bool found_loop = false; - step_handle_t other; + path_step_index_t::step_it other; uint64_t other_pos = 0; - auto x = self_index.get_next_step_on_node(graph.get_id(handle), step); + auto x = self_index.get_next_step_on_node(graph.get_id(handle), *step); if (x.first) { other_pos = x.second.second; if (other_pos > start_pos @@ -110,7 +106,7 @@ std::vector untangle_cuts( //std::cerr << "Found loop! " << step_index.get_position(step) << " " << step_index.get_position(other) << std::endl; todo.push_back(std::make_pair(step, other)); // we then step forward to the loop end and continue iterating - curr_pos = other_pos + graph.get_length(graph.get_handle_of_step(other)); + curr_pos = other_pos + graph.get_length(graph.get_handle_of_step(*other)); step = other; } else { curr_pos += graph.get_length(handle); @@ -121,17 +117,15 @@ std::vector untangle_cuts( // forward and reverse version together // now we reverse it step_handle_t path_begin = graph.path_begin(path); - if (end == path_begin || !graph.has_previous_step(end)) { + if (*end == path_begin || !graph.has_previous_step(*end)) { return clean_cut_points(); } //step_handle_t _step = graph.get_previous_step(end); - curr_pos = step_index.get_position(end, graph) + graph.get_length(graph.get_handle_of_step(end)); + curr_pos = step_index.get_position(*end, graph) + graph.get_length(graph.get_handle_of_step(*end)); //cut_points.push_back(std::make_pair(curr_pos, end)); //std::cerr << "reversing" << std::endl; - for (step_handle_t step = end; - step != start; - step = graph.get_previous_step(step)) { - handle_t handle = graph.get_handle_of_step(step); + for (auto step = end; step != start; --step) { + handle_t handle = graph.get_handle_of_step(*step); curr_pos -= graph.get_length(handle); if (mark_seen_rev_step(step)) { continue; @@ -144,12 +138,12 @@ std::vector untangle_cuts( exit(1); } */ - cut_points.push_back(std::pair(curr_pos, step)); + cut_points.push_back(std::pair(curr_pos, *step)); } //std::cerr << "rev on step " << graph.get_id(handle) << " " << curr_pos << std::endl; bool found_loop = false; - step_handle_t other; - auto x = self_index.get_prev_step_on_node(graph.get_id(handle), step); + path_step_index_t::step_it other; + auto x = self_index.get_prev_step_on_node(graph.get_id(handle), *step); uint64_t other_pos = 0; if (x.first) { other_pos = x.second.second; @@ -304,12 +298,15 @@ segment_map_t::segment_map_t( #pragma omp parallel for schedule(dynamic, 1) num_threads(num_threads) for (uint64_t i = 0; i < paths.size(); ++i) { //auto& path : paths) { auto& path = paths[i]; + auto path_name = graph.get_path_name(path); auto self_index = path_step_index_t(graph, path, 1); all_cuts[i] = merge_cuts( untangle_cuts(graph, - graph.path_begin(path), - graph.path_back(path), + path, + path_name, + self_index.path_begin(), + self_index.path_back(), step_index, self_index, is_cut @@ -787,11 +784,14 @@ void untangle( for (auto& path : paths) { // test path_step_index_t auto self_index = path_step_index_t(graph, path, threads_per); + auto path_name = graph.get_path_name(path); std::vector cuts = merge_cuts( untangle_cuts(graph, - graph.path_begin(path), - graph.path_back(path), + path, + path_name, + self_index.path_begin(), + self_index.path_back(), step_index, self_index, [](const handle_t& h) { return false; }), @@ -986,11 +986,14 @@ void untangle( #pragma omp parallel for schedule(dynamic, 1) num_threads(num_threads) for (auto& query : queries) { auto self_index = path_step_index_t(graph, query, threads_per); + auto query_name = graph.get_path_name(query); std::vector cuts = merge_cuts( untangle_cuts(graph, - graph.path_begin(query), - graph.path_back(query), + query, + query_name, + self_index.path_begin(), + self_index.path_back(), step_index, self_index, [&cut_nodes,&graph](const handle_t& h) { diff --git a/src/algorithms/untangle.hpp b/src/algorithms/untangle.hpp index 11aff850..3ce67351 100644 --- a/src/algorithms/untangle.hpp +++ b/src/algorithms/untangle.hpp @@ -66,8 +66,10 @@ class segment_map_t { std::vector untangle_cuts( const PathHandleGraph& graph, - const step_handle_t& start, - const step_handle_t& end, + const path_handle_t& path, + const std::string& path_name, + const path_step_index_t::step_it& start, + const path_step_index_t::step_it& end, const step_index_t& step_index, const path_step_index_t& self_index, const std::function& is_cut); From 3831ccaba815027ea695e85c9d6f80222bc7a96e Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Thu, 16 Mar 2023 16:00:40 +0100 Subject: [PATCH 12/12] broken af --- src/algorithms/stepindex.cpp | 13 +++++++------ src/algorithms/untangle.cpp | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/algorithms/stepindex.cpp b/src/algorithms/stepindex.cpp index 4d55f809..785a05fe 100644 --- a/src/algorithms/stepindex.cpp +++ b/src/algorithms/stepindex.cpp @@ -232,7 +232,7 @@ path_step_index_t::path_step_index_t(const PathHandleGraph& graph, steps.push_back(step); nodes.push_back(graph.get_id(graph.get_handle_of_step(step))); }); - steps.push_back(graph.path_end(path)); + steps.push_back(graph.path_end(path)); // dangerous... // sort the steps, nb. they're unique ips4o::parallel::sort(steps.begin(), steps.end(), std::less<>(), nthreads); // build the hash function (quietly) @@ -253,10 +253,10 @@ path_step_index_t::path_step_index_t(const PathHandleGraph& graph, // here, we sort steps by handle and then position // and build our handle->step list and step->offset maps // these are steps sorted by the bbhash of their node id, and then their offset in the path - std::vector> steps_by_node; + std::vector> steps_by_node; uint64_t offset = 0; for (step_it step = steps.begin(); step != steps.end(); ++step) { - steps_by_node.emplace_back(node_mphf->lookup(graph.get_id(graph.get_handle_of_step(*step))), offset, step); + steps_by_node.emplace_back(node_mphf->lookup(graph.get_id(graph.get_handle_of_step(*step))), offset, *step, step); offset += graph.get_length(graph.get_handle_of_step(*step)); } if (offset == 0) { @@ -273,12 +273,13 @@ path_step_index_t::path_step_index_t(const PathHandleGraph& graph, auto& idx = std::get<0>(node_step); auto& offset = std::get<1>(node_step); // just used for sorting auto& step = std::get<2>(node_step); + auto& it_step = std::get<3>(node_step); //std::cerr << "idx = " << idx << " " << as_integers(step)[0] << ":" << as_integers(step)[1] << std::endl; if (idx != last_idx) { node_offset[idx] = node_steps.size(); } - step_offset[step_mphf->lookup(*step)] = node_steps.size(); - node_steps.push_back(std::make_pair(step, offset)); + step_offset[step_mphf->lookup(step)] = node_steps.size(); + node_steps.push_back(std::make_pair(it_step, offset)); last_idx = idx; } if (last_idx != node_count-1) { @@ -344,7 +345,7 @@ path_step_index_t::step_it path_step_index_t::path_begin(void) const { // get the last step in the path path_step_index_t::step_it path_step_index_t::path_back(void) const { - return steps.end()-1; + return std::prev(std::prev(steps.end())); } diff --git a/src/algorithms/untangle.cpp b/src/algorithms/untangle.cpp index 5041b40e..603ac1b3 100644 --- a/src/algorithms/untangle.cpp +++ b/src/algorithms/untangle.cpp @@ -299,6 +299,7 @@ segment_map_t::segment_map_t( for (uint64_t i = 0; i < paths.size(); ++i) { //auto& path : paths) { auto& path = paths[i]; auto path_name = graph.get_path_name(path); + //std::cerr << "path " << path_name << std::endl; auto self_index = path_step_index_t(graph, path, 1); all_cuts[i] = merge_cuts(