Skip to content

Commit

Permalink
LibWeb: Support finding text split across multiple text nodes
Browse files Browse the repository at this point in the history
Previously, the find in page function would fail to find text which was
split across multiple text nodes. For example, given the following
markup: `WH<span>F` the query `WHF` would previously fail to be
matched.

This is done by traversing all of the document's text nodes -
constructing a complete string to query against and keeping track of
the locations where that string is split across multiple nodes.

(cherry picked from commit ec4d29849dc1d0357c73690722aea1a7802dd0bc)
  • Loading branch information
tcl3 authored and nico committed Nov 1, 2024
1 parent 934fe3c commit 7c5f5e6
Showing 1 changed file with 53 additions and 17 deletions.
70 changes: 53 additions & 17 deletions Userland/Libraries/LibWeb/DOM/Document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5196,29 +5196,65 @@ Vector<JS::Handle<DOM::Range>> Document::find_matching_text(String const& query,
if (!document_element() || !document_element()->layout_node())
return {};

Vector<JS::Handle<DOM::Range>> matches;
document_element()->layout_node()->for_each_in_inclusive_subtree_of_type<Layout::TextNode>([&](auto const& text_node) {
auto const& text = text_node.text_for_rendering();
size_t offset = 0;
while (true) {
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
? text.find_byte_offset_ignoring_case(query, offset)
: text.find_byte_offset(query, offset);
if (!match_index.has_value())
break;

auto range = create_range();
auto& dom_node = const_cast<DOM::Text&>(text_node.dom_node());
(void)range->set_start(dom_node, match_index.value());
(void)range->set_end(dom_node, match_index.value() + query.code_points().length());
struct TextPositionNode {
DOM::Text& dom_node;
size_t start_offset { 0 };
};

matches.append(range);
offset = match_index.value() + 1;
StringBuilder builder;
Vector<TextPositionNode> text_positions;
size_t current_start_position = 0;
String current_node_text;
document_element()->layout_node()->for_each_in_inclusive_subtree_of_type<Layout::TextNode>([&](auto const& text_node) {
auto& dom_node = const_cast<DOM::Text&>(text_node.dom_node());
if (text_positions.is_empty()) {
text_positions.empend(dom_node);
} else {
current_start_position += current_node_text.bytes_as_string_view().length();
text_positions.empend(dom_node, current_start_position);
}

current_node_text = text_node.text_for_rendering();
builder.append(current_node_text);
return TraversalDecision::Continue;
});

if (text_positions.is_empty())
return {};

size_t offset = 0;
auto* match_start_position = &text_positions[0];
auto text = builder.to_string_without_validation();
Vector<JS::Handle<DOM::Range>> matches;
while (true) {
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
? text.find_byte_offset_ignoring_case(query, offset)
: text.find_byte_offset(query, offset);
if (!match_index.has_value())
break;

size_t i = 0;
for (; i < text_positions.size() && match_index.value() > text_positions[i].start_offset; ++i)
match_start_position = &text_positions[i];

auto range = create_range();
auto start_position = match_index.value() - match_start_position->start_offset;
auto& start_dom_node = match_start_position->dom_node;
(void)range->set_start(start_dom_node, start_position);

auto* match_end_position = match_start_position;
for (; i < text_positions.size() && match_index.value() + query.bytes_as_string_view().length() > text_positions[i].start_offset; ++i)
match_end_position = &text_positions[i];

auto& end_dom_node = match_end_position->dom_node;
auto end_position = match_index.value() - match_end_position->start_offset + query.bytes_as_string_view().length();
(void)range->set_end(end_dom_node, end_position);

matches.append(range);
offset = match_index.value() + query.bytes_as_string_view().length() + 1;
match_start_position = match_end_position;
}

return matches;
}

Expand Down

0 comments on commit 7c5f5e6

Please sign in to comment.