From 7c5f5e62f2bcbd59fab31d3cac8f73b4f64be0a2 Mon Sep 17 00:00:00 2001 From: Tim Ledbetter Date: Thu, 13 Jun 2024 19:43:08 +0100 Subject: [PATCH] LibWeb: Support finding text split across multiple text nodes Previously, the find in page function would fail to find text which was split across multiple text nodes. For example, given the following markup: `WHF` the query `WHF` would previously fail to be matched. This is done by traversing all of the document's text nodes - constructing a complete string to query against and keeping track of the locations where that string is split across multiple nodes. (cherry picked from commit ec4d29849dc1d0357c73690722aea1a7802dd0bc) --- Userland/Libraries/LibWeb/DOM/Document.cpp | 70 ++++++++++++++++------ 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/Userland/Libraries/LibWeb/DOM/Document.cpp b/Userland/Libraries/LibWeb/DOM/Document.cpp index 24be2be5c53c7b..275f1be51c1262 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.cpp +++ b/Userland/Libraries/LibWeb/DOM/Document.cpp @@ -5196,29 +5196,65 @@ Vector> Document::find_matching_text(String const& query, if (!document_element() || !document_element()->layout_node()) return {}; - Vector> matches; - document_element()->layout_node()->for_each_in_inclusive_subtree_of_type([&](auto const& text_node) { - auto const& text = text_node.text_for_rendering(); - size_t offset = 0; - while (true) { - auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive - ? text.find_byte_offset_ignoring_case(query, offset) - : text.find_byte_offset(query, offset); - if (!match_index.has_value()) - break; - - auto range = create_range(); - auto& dom_node = const_cast(text_node.dom_node()); - (void)range->set_start(dom_node, match_index.value()); - (void)range->set_end(dom_node, match_index.value() + query.code_points().length()); + struct TextPositionNode { + DOM::Text& dom_node; + size_t start_offset { 0 }; + }; - matches.append(range); - offset = match_index.value() + 1; + StringBuilder builder; + Vector text_positions; + size_t current_start_position = 0; + String current_node_text; + document_element()->layout_node()->for_each_in_inclusive_subtree_of_type([&](auto const& text_node) { + auto& dom_node = const_cast(text_node.dom_node()); + if (text_positions.is_empty()) { + text_positions.empend(dom_node); + } else { + current_start_position += current_node_text.bytes_as_string_view().length(); + text_positions.empend(dom_node, current_start_position); } + current_node_text = text_node.text_for_rendering(); + builder.append(current_node_text); return TraversalDecision::Continue; }); + if (text_positions.is_empty()) + return {}; + + size_t offset = 0; + auto* match_start_position = &text_positions[0]; + auto text = builder.to_string_without_validation(); + Vector> matches; + while (true) { + auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive + ? text.find_byte_offset_ignoring_case(query, offset) + : text.find_byte_offset(query, offset); + if (!match_index.has_value()) + break; + + size_t i = 0; + for (; i < text_positions.size() && match_index.value() > text_positions[i].start_offset; ++i) + match_start_position = &text_positions[i]; + + auto range = create_range(); + auto start_position = match_index.value() - match_start_position->start_offset; + auto& start_dom_node = match_start_position->dom_node; + (void)range->set_start(start_dom_node, start_position); + + auto* match_end_position = match_start_position; + for (; i < text_positions.size() && match_index.value() + query.bytes_as_string_view().length() > text_positions[i].start_offset; ++i) + match_end_position = &text_positions[i]; + + auto& end_dom_node = match_end_position->dom_node; + auto end_position = match_index.value() - match_end_position->start_offset + query.bytes_as_string_view().length(); + (void)range->set_end(end_dom_node, end_position); + + matches.append(range); + offset = match_index.value() + query.bytes_as_string_view().length() + 1; + match_start_position = match_end_position; + } + return matches; }