Skip to content

Commit

Permalink
ICU-22889 Implemented a recursion limit in the RBNF parsing code to m…
Browse files Browse the repository at this point in the history
…atch the one already present in the RBNF

formatting code.
  • Loading branch information
richgillam committed Sep 18, 2024
1 parent 303b7e8 commit e7f5d97
Show file tree
Hide file tree
Showing 14 changed files with 138 additions and 33 deletions.
12 changes: 9 additions & 3 deletions icu4c/source/i18n/nfrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -689,14 +689,20 @@ static void dumpUS(FILE* f, const UnicodeString& us) {
#endif

UBool
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const
{
// try matching each rule in the rule set against the text being
// parsed. Whichever one matches the most characters is the one
// that determines the value we return.

result.setLong(0);

// dump out if we've reached the recursion limit
if (recursionCount >= RECURSION_LIMIT) {
// stop recursion
return false;
}

// dump out if there's no text to parse
if (text.length() == 0) {
return 0;
Expand All @@ -720,7 +726,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
nonNumericalExecutedRuleMask |= 1 << i;

Formattable tempResult;
UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult);
UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
result = tempResult;
highWaterMark = workingPos;
Expand Down Expand Up @@ -759,7 +765,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
continue;
}
Formattable tempResult;
UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
result = tempResult;
highWaterMark = workingPos;
Expand Down
2 changes: 1 addition & 1 deletion icu4c/source/i18n/nfrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class NFRuleSet : public UMemory {
void format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
void format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;

UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const;
UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const;

void appendRules(UnicodeString& result) const; // toString

Expand Down
6 changes: 6 additions & 0 deletions icu4c/source/i18n/nfrule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,7 @@ NFRule::doParse(const UnicodeString& text,
UBool isFractionRule,
double upperBound,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
Formattable& resVal) const
{
// internally we operate on a copy of the string being parsed
Expand Down Expand Up @@ -1021,6 +1022,7 @@ NFRule::doParse(const UnicodeString& text,
double partialResult = matchToDelimiter(workText, start, tempBaseValue,
temp, pp, sub1,
nonNumericalExecutedRuleMask,
recursionCount,
upperBound);

// if we got a successful match (or were trying to match a
Expand All @@ -1042,6 +1044,7 @@ NFRule::doParse(const UnicodeString& text,
partialResult = matchToDelimiter(workText2, 0, partialResult,
temp, pp2, sub2,
nonNumericalExecutedRuleMask,
recursionCount,
upperBound);

// if we got a successful match on this second
Expand Down Expand Up @@ -1179,6 +1182,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
ParsePosition& pp,
const NFSubstitution* sub,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
double upperBound) const
{
UErrorCode status = U_ZERO_ERROR;
Expand Down Expand Up @@ -1213,6 +1217,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
formatter->isLenient(),
#endif
nonNumericalExecutedRuleMask,
recursionCount,
result);

// if the substitution could match all the text up to
Expand Down Expand Up @@ -1267,6 +1272,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
formatter->isLenient(),
#endif
nonNumericalExecutedRuleMask,
recursionCount,
result);
if (success && (tempPP.getIndex() != 0)) {
// if there's a successful match (or it's a null
Expand Down
2 changes: 2 additions & 0 deletions icu4c/source/i18n/nfrule.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class NFRule : public UMemory {
UBool isFractional,
double upperBound,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
Formattable& result) const;

UBool shouldRollBack(int64_t number) const;
Expand All @@ -98,6 +99,7 @@ class NFRule : public UMemory {
double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue,
const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
double upperBound) const;
void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const;

Expand Down
21 changes: 14 additions & 7 deletions icu4c/source/i18n/nfsubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ class ModulusSubstitution : public NFSubstitution {
double upperBound,
UBool lenientParse,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
Formattable& result) const override;

virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const override {
Expand Down Expand Up @@ -242,6 +243,7 @@ class FractionalPartSubstitution : public NFSubstitution {
double upperBound,
UBool lenientParse,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
Formattable& result) const override;

virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const override { return newRuleValue + oldRuleValue; }
Expand Down Expand Up @@ -314,6 +316,7 @@ class NumeratorSubstitution : public NFSubstitution {
double upperBound,
UBool /*lenientParse*/,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
Formattable& result) const override;

virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const override { return newRuleValue / oldRuleValue; }
Expand Down Expand Up @@ -706,6 +709,7 @@ NFSubstitution::doParse(const UnicodeString& text,
double upperBound,
UBool lenientParse,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
Formattable& result) const
{
#ifdef RBNF_DEBUG
Expand All @@ -726,7 +730,7 @@ NFSubstitution::doParse(const UnicodeString& text,
// on), then also try parsing the text using a default-
// constructed NumberFormat
if (ruleSet != nullptr) {
ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, result);
ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, recursionCount, result);
if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
UErrorCode status = U_ZERO_ERROR;
NumberFormat* fmt = NumberFormat::createInstance(status);
Expand Down Expand Up @@ -949,18 +953,19 @@ ModulusSubstitution::doParse(const UnicodeString& text,
double upperBound,
UBool lenientParse,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
Formattable& result) const
{
// if this isn't a >>> substitution, we can just use the
// inherited parse() routine to do the parsing
if (ruleToUse == nullptr) {
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, result);
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, recursionCount, result);

// but if it IS a >>> substitution, we have to do it here: we
// use the specific rule's doParse() method, and then we have to
// do some of the other work of NFRuleSet.parse()
} else {
ruleToUse->doParse(text, parsePosition, false, upperBound, nonNumericalExecutedRuleMask, result);
ruleToUse->doParse(text, parsePosition, false, upperBound, nonNumericalExecutedRuleMask, recursionCount, result);

if (parsePosition.getIndex() != 0) {
UErrorCode status = U_ZERO_ERROR;
Expand Down Expand Up @@ -1136,12 +1141,13 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
double /*upperBound*/,
UBool lenientParse,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
Formattable& resVal) const
{
// if we're not in byDigits mode, we can just use the inherited
// doParse()
if (!byDigits) {
return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, resVal);
return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, recursionCount, resVal);

// if we ARE in byDigits mode, parse the text one digit at a time
// using this substitution's owning rule set (we do this by setting
Expand All @@ -1160,7 +1166,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
while (workText.length() > 0 && workPos.getIndex() != 0) {
workPos.setIndex(0);
Formattable temp;
getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, temp);
getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, recursionCount, temp);
UErrorCode status = U_ZERO_ERROR;
digit = temp.getLong(status);
// digit = temp.getType() == Formattable::kLong ?
Expand Down Expand Up @@ -1271,6 +1277,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
double upperBound,
UBool /*lenientParse*/,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
Formattable& result) const
{
// we don't have to do anything special to do the parsing here,
Expand All @@ -1289,7 +1296,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,

while (workText.length() > 0 && workPos.getIndex() != 0) {
workPos.setIndex(0);
getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, temp); // parse zero or nothing at all
getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, recursionCount, temp); // parse zero or nothing at all
if (workPos.getIndex() == 0) {
// we failed, either there were no more zeros, or the number was formatted with digits
// either way, we're done
Expand All @@ -1311,7 +1318,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
}

// we've parsed off the zeros, now let's parse the rest from our current position
NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, false, nonNumericalExecutedRuleMask, result);
NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, false, nonNumericalExecutedRuleMask, recursionCount, result);

if (withZeros) {
// any base value will do in this case. is there a way to
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/i18n/nfsubs.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ class NFSubstitution : public UObject {
double upperBound,
UBool lenientParse,
uint32_t nonNumericalExecutedRuleMask,
int32_t recursionCount,
Formattable& result) const;

/**
Expand Down
2 changes: 1 addition & 1 deletion icu4c/source/i18n/rbnf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1362,7 +1362,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text,
ParsePosition working_pp(0);
Formattable working_result;

rp->parse(workingText, working_pp, kMaxDouble, 0, working_result);
rp->parse(workingText, working_pp, kMaxDouble, 0, 0, working_result);
if (working_pp.getIndex() > high_pp.getIndex()) {
high_pp = working_pp;
high_result = working_result;
Expand Down
34 changes: 34 additions & 0 deletions icu4c/source/test/intltest/itrbnf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
TESTCASE(29, TestNumberingSystem);
TESTCASE(30, TestDFRounding);
TESTCASE(31, TestMemoryLeak22899);
TESTCASE(32, TestInfiniteRecursion);
#else
TESTCASE(0, TestRBNFDisabled);
#endif
Expand Down Expand Up @@ -2614,6 +2615,39 @@ IntlTestRBNF::TestNumberingSystem() {
}
}

void
IntlTestRBNF::TestInfiniteRecursion() {
UnicodeString badRules[] = {
">>",
"<<",
"<<<",
">>>",
"%foo: x=%foo=",
"%one: x>%two>; %two: y>%one>;"
};

for (int32_t i = 0; i < UPRV_LENGTHOF(badRules); i++) {
UErrorCode err = U_ZERO_ERROR;
UParseError parseErr;
RuleBasedNumberFormat rbnf(badRules[i], parseErr, err);

if (U_SUCCESS(err)) {
UnicodeString result;
rbnf.format(5, result);
// we don't actually care about the result and the function doesn't return an error code;
// we just want to make sure the function returns

Formattable pResult;
rbnf.parse("foo", pResult, err);
assertTrue("rbnf.parse() didn't return U_INVALID_FORMAT_ERROR!", err == U_INVALID_FORMAT_ERROR);
} else {
// eventually it'd be nice to statically analyze the rules for (at least) the most common
// causes of infinite recursion, in which case we'd end up down here and need to check
// the error code. But for now, we probably won't end up here and don't care if we do
}
}
}

/* U_HAVE_RBNF */
#else

Expand Down
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/itrbnf.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class IntlTestRBNF : public IntlTest {
void TestMinMaxIntegerDigitsIgnored();
void TestNumberingSystem();
void TestMemoryLeak22899();
void TestInfiniteRecursion();

protected:
virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1908,4 +1908,47 @@ public void TestNumberingSystem() {
rbnf.setDefaultRuleSet("%ethiopic");
assertEquals("Wrong result with Ethiopic rule set", "፻፳፫", rbnf.format(123));
}

@Test
public void TestInfiniteRecursion() {
final String[] badRules = {
">>",
"<<",
"<<<",
">>>",
"%foo: x=%foo=",
"%one: x>%two>; %two: y>%one>;"
};

for (String badRule : badRules) {
try {
RuleBasedNumberFormat rbnf = new RuleBasedNumberFormat(badRule);
try {
rbnf.format(5);
// we don't actually care about the result; we just want to make sure the function returns
} catch (IllegalStateException e) {
// we're supposed to get an IllegalStateException; swallow it and continue
}

try {
rbnf.parse("foo");
errln("Parse test didn't throw an exception!");
} catch (IllegalStateException e) {
// we're supposed to get an IllegalStateException; swallow it and continue
} catch (ParseException e) {
// if we don't hit the recursion limit, we'll still fail to parse the number,
// so also swallow this exception and continue
}
} catch (IllegalArgumentException e) {
// ">>>" generates a parse exception when you try to create the formatter (so we expect that rather
// than re-throwing and triggering a test failure)
// (eventually it'd be nice to statically analyze the rules for (at least) the most common
// causes of infinite recursion, in which case we'd end up down here and need to check
// the error code. But for now, we probably won't end up here and don't care if we do)
if (!badRule.equals("<<<")) {
throw e;
}
}
}
}
}
Loading

0 comments on commit e7f5d97

Please sign in to comment.