Skip to content

Commit

Permalink
ICU-22579 Fix Null deref while Unicode Set only has string
Browse files Browse the repository at this point in the history
  • Loading branch information
FrankYFTang committed Dec 12, 2023
1 parent 8b14c05 commit 4a7d61d
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 2 deletions.
7 changes: 6 additions & 1 deletion icu4c/source/common/rbbiscan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1224,6 +1224,7 @@ void RBBIRuleScanner::scanSet() {
UErrorCode localStatus = U_ZERO_ERROR;
LocalPointer<UnicodeSet> uset(new UnicodeSet(), localStatus);
if (U_FAILURE(localStatus)) {
error(localStatus);
return;
}
uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
Expand All @@ -1240,7 +1241,11 @@ void RBBIRuleScanner::scanSet() {
// Verify that the set contains at least one code point.
//
U_ASSERT(uset.isValid());
if (uset->isEmpty()) {
UnicodeSet tempSet(*uset);
// Use tempSet to handle the case that the UnicodeSet contains
// only string element, such as [{ab}] and treat it as empty set.
tempSet.removeAllStrings();
if (tempSet.isEmpty()) {
// This set is empty.

This comment has been minimized.

Copy link
@JackOehling

JackOehling Aug 1, 2024

Hello, I noticed that this was the change here that cycled out the [{bof}] tag feature. I was wondering if there are any equivalents to something like this for the new versions (75.1)? I was able to use it with version 74.2 but the upgrade to 75.1 broke because of this.

This comment has been minimized.

Copy link
@FrankYFTang

FrankYFTang Aug 1, 2024

Author Contributor

couuld you file a bug and report what was working and what is not working now with code example? ThanksI was not sure of such regression.

// Make it an error, because it almost certainly is not what the user wanted.
// Also, avoids having to think about corner cases in the tree manipulation code
Expand Down
9 changes: 9 additions & 0 deletions icu4c/source/test/intltest/rbbitst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
TESTCASE_AUTO(TestRandomAccess);
TESTCASE_AUTO(TestExternalBreakEngineWithFakeTaiLe);
TESTCASE_AUTO(TestExternalBreakEngineWithFakeYue);
TESTCASE_AUTO(TestBug22579);
TESTCASE_AUTO(TestBug22581);
TESTCASE_AUTO(TestBug22584);
TESTCASE_AUTO(TestBug22585);
Expand Down Expand Up @@ -5895,6 +5896,14 @@ void RBBITest::TestBug22584() {
RuleBasedBreakIterator bi2(ruleStr, pe, ec);
}

void RBBITest::TestBug22579() {
// Test not causing null deref in cloneTree
UnicodeString ruleStr = u"[{ab}];";
UParseError pe {};
UErrorCode ec {U_ZERO_ERROR};

RuleBasedBreakIterator bi(ruleStr, pe, ec);
}
void RBBITest::TestBug22581() {
// Test duplicate variable setting will not leak the rule compilation
UnicodeString ruleStr = u"$foo=[abc]; $foo=[xyz]; $foo;";
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/rbbitst.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ class RBBITest: public IntlTest {
void TestRandomAccess();
void TestExternalBreakEngineWithFakeTaiLe();
void TestExternalBreakEngineWithFakeYue();
void TestBug22579();
void TestBug22581();
void TestBug22584();
void TestBug22585();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1062,7 +1062,11 @@ void scanSet() {

// Verify that the set contains at least one code point.
//
if (uset.isEmpty()) {
// Use tempSet to handle the case that the UnicodeSet contains
// only string element, such as [{ab}] and treat it as empty set.
UnicodeSet tempSet = new UnicodeSet(uset);
tempSet.removeAllStrings();
if (tempSet.isEmpty()) {
// This set is empty.
// Make it an error, because it almost certainly is not what the user wanted.
// Also, avoids having to think about corner cases in the tree manipulation code
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,20 @@ public void TestUnpairedSurrogate() {
assertEquals("Rules does not match", rules, bi.toString());
}

@Test
public void TestBug22579() {
try {
new RuleBasedBreakIterator("[{ab}];");
fail("TestBug22579: RuleBasedBreakIterator() failed to throw an exception with only string in an Unicode set.");
}
catch (IllegalArgumentException e) {
// expected exception with only string inside an Unicode set.
}
catch (Exception e) {
fail("TestBug22579: Unexpected exception while new RuleBasedBreakIterator() with only string in an Unicode Set: " + e);
}

}
@Test
public void TestBug22585() {
try {
Expand Down

0 comments on commit 4a7d61d

Please sign in to comment.