Skip to content

Commit

Permalink
ICU-22584 Fix RBBI rule builder stack overflow.
Browse files Browse the repository at this point in the history
The problem was found by fuzz testing.

A rule consisting of a long literal string produces a large, unbalanced parse tree,
one node per string element. Deleting the tree was recursive, once per node, resulting
in deep recursion.

This PR changes node deletion to use an iterative (non-recursive) approach.

This change only affects rule building. There is no change to the RBBI run time
using pre-built rules.
  • Loading branch information
aheninger committed Dec 8, 2023
1 parent bcae6f2 commit 65a2a3c
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 4 deletions.
53 changes: 49 additions & 4 deletions icu4c/source/common/rbbinode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,19 +123,64 @@ RBBINode::~RBBINode() {
break;

default:
delete fLeftChild;
// Avoid using a recursive implementation because of stack overflow problems.
// See bug ICU-22584.
// delete fLeftChild;
NRDeleteNode(fLeftChild);
fLeftChild = nullptr;
delete fRightChild;
// delete fRightChild;
NRDeleteNode(fRightChild);
fRightChild = nullptr;
}


delete fFirstPosSet;
delete fLastPosSet;
delete fFollowPos;

}

/**
* Non-recursive delete of a node + its children. Used from the node destructor
* instead of the more obvious recursive implementation to avoid problems with
* stack overflow with some perverse test rule data (from fuzzing).
*/
void RBBINode::NRDeleteNode(RBBINode *node) {
if (node == nullptr) {
return;
}

RBBINode *stopNode = node->fParent;
RBBINode *nextNode = node;
while (nextNode != stopNode) {
RBBINode *currentNode = nextNode;

if ((currentNode->fLeftChild == nullptr && currentNode->fRightChild == nullptr) ||
currentNode->fType == varRef || // varRef and setRef nodes do not
currentNode->fType == setRef) { // own their children nodes.
// CurrentNode is effectively a leaf node; it's safe to go ahead and delete it.
nextNode = currentNode->fParent;
if (nextNode->fLeftChild == currentNode) {
nextNode->fLeftChild = nullptr;
} else if (nextNode->fRightChild == currentNode) {
nextNode->fRightChild = nullptr;
}
delete currentNode;
} else if (currentNode->fLeftChild) {
nextNode = currentNode->fLeftChild;
if (nextNode->fParent == nullptr) {
nextNode->fParent = currentNode;
// fParent isn't always set; do it now if not.
}
U_ASSERT(nextNode->fParent == currentNode);
} else if (currentNode->fRightChild) {
nextNode = currentNode->fRightChild;
if (nextNode->fParent == nullptr) {
nextNode->fParent = currentNode;
// fParent isn't always set; do it now if not.
}
U_ASSERT(nextNode->fParent == currentNode);
}
}
}

//-------------------------------------------------------------------------
//
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/common/rbbinode.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class RBBINode : public UMemory {
RBBINode(NodeType t);
RBBINode(const RBBINode &other);
~RBBINode();
static void NRDeleteNode(RBBINode *node);

RBBINode *cloneTree();
RBBINode *flattenVariables();
Expand Down
15 changes: 15 additions & 0 deletions icu4c/source/test/intltest/rbbitst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
TESTCASE_AUTO(TestRandomAccess);
TESTCASE_AUTO(TestExternalBreakEngineWithFakeTaiLe);
TESTCASE_AUTO(TestExternalBreakEngineWithFakeYue);
TESTCASE_AUTO(TestBug22584);

#if U_ENABLE_TRACING
TESTCASE_AUTO(TestTraceCreateCharacter);
Expand Down Expand Up @@ -5858,4 +5859,18 @@ void RBBITest::TestExternalBreakEngineWithFakeTaiLe() {
expected2 == actual2);
}

void RBBITest::TestBug22584() {
// Creating a break iterator from a rule consisting of a very long
// literal input string caused a stack overflow when deleting the
// parse tree for the input during the rule building process.

// Failure of this test showed as a crash during the break iterator construction.

UnicodeString ruleStr(100000, (UChar32)0, 100000);
UParseError pe {};
UErrorCode ec {U_ZERO_ERROR};

RuleBasedBreakIterator bi(ruleStr, pe, ec);
}

#endif // #if !UCONFIG_NO_BREAK_ITERATION
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/rbbitst.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ class RBBITest: public IntlTest {
void TestRandomAccess();
void TestExternalBreakEngineWithFakeTaiLe();
void TestExternalBreakEngineWithFakeYue();
void TestBug22584();

#if U_ENABLE_TRACING
void TestTraceCreateCharacter();
Expand Down

0 comments on commit 65a2a3c

Please sign in to comment.