Skip to content

Commit

Permalink
ICU-22890 Add lookahead to parser
Browse files Browse the repository at this point in the history
This fixes tests that were broken by the original fix in this PR.
  • Loading branch information
catamorphism committed Sep 14, 2024
1 parent 2a9ea63 commit 8699988
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 22 deletions.
46 changes: 25 additions & 21 deletions icu4c/source/i18n/messageformat2_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,17 @@ void Parser::parseTokenWithWhitespace(UChar32 c, UErrorCode& errorCode) {
CHECK_BOUNDS(source, index, parseError, errorCode);
}

// Terrible hack to get around the ambiguity between unsupported keywords
// and supported keywords
bool Parser::nextIs(const std::u16string_view &keyword) const {
for(int32_t i = 0; i < (int32_t) keyword.length(); i++) {
if (!inBounds(source, index + i) || source[index + i] != keyword[i]) {
return false;
}
}
return true;
}

/*
Consumes a non-empty sequence of `name-char`s, the first of which is
also a `name-start`.
Expand Down Expand Up @@ -1653,6 +1664,10 @@ void Parser::parseUnsupportedStatement(UErrorCode& status) {
UnicodeString keyword(PERIOD);
normalizedInput += UnicodeString(PERIOD);
index++;

// A message just consisting of '.' is a parse error
CHECK_BOUNDS(source, index, parseError, status);

keyword += parseName(status);
builder.setKeyword(keyword);

Expand Down Expand Up @@ -1718,15 +1733,6 @@ void Parser::parseUnsupportedStatement(UErrorCode& status) {
dataModel.addUnsupportedStatement(builder.build(status), status);
}

// Terrible hack to get around the ambiguity between `matcher` and `reserved-statement`
bool Parser::nextIsMatch() const {
for(int32_t i = 0; i < 6; i++) {
if (!inBounds(source, index + i) || source[index + i] != ID_MATCH[i]) {
return false;
}
}
return true;
}
/*
Consume a possibly-empty sequence of declarations separated by whitespace;
each declaration matches the `declaration` nonterminal in the grammar
Expand All @@ -1739,15 +1745,14 @@ void Parser::parseDeclarations(UErrorCode& status) {
CHECK_BOUNDS(source, index, parseError, status);

while (source[index] == PERIOD) {
CHECK_BOUNDS(source, index + 1, parseError, status);
if (source[index + 1] == ID_LOCAL[1]) {
parseLocalDeclaration(status);
} else if (source[index + 1] == ID_INPUT[1]) {
parseInputDeclaration(status);
} else {
// Unsupported statement
// Lookahead is needed to disambiguate this from a `match`
if (!nextIsMatch()) {
if (!nextIs(u".match")) {
parseUnsupportedStatement(status);
} else {
// Done parsing declarations
Expand Down Expand Up @@ -1786,8 +1791,10 @@ UnicodeString Parser::parseText(UErrorCode& status) {
}

if (!(isTextChar(source[index]) || source[index] == BACKSLASH)) {
// Error -- text is expected here
ERROR(parseError, status, index);
if (source[index] != RIGHT_CURLY_BRACE) {
// Error -- text is expected here
ERROR(parseError, status, index);
}
return str;
}

Expand Down Expand Up @@ -2088,8 +2095,6 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
void Parser::parseSelectors(UErrorCode& status) {
CHECK_ERROR(status);

U_ASSERT(inBounds(source, index));

parseToken(ID_MATCH, status);

bool empty = true;
Expand Down Expand Up @@ -2213,12 +2218,11 @@ void Parser::parseBody(UErrorCode& status) {
dataModel.setPattern(parseQuotedPattern(status));
break;
}
case ID_MATCH[0]: {
// Selectors
parseSelectors(status);
return;
}
default: {
if (nextIs(u".match")) {
parseSelectors(status);
return;
}
ERROR(parseError, status, index);
errorPattern(status);
return;
Expand All @@ -2240,7 +2244,7 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
|| (index < static_cast<uint32_t>(source.length()) + 1
&& source[index] == LEFT_CURLY_BRACE
&& source[index + 1] == LEFT_CURLY_BRACE)) {
// A complex message begins with a '.' or '{'
// A complex message begins with a keyword or '{'
parseDeclarations(status);
parseBody(status);
simple = false;
Expand Down
2 changes: 1 addition & 1 deletion icu4c/source/i18n/messageformat2_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ namespace message2 {
void parseToken(const UChar32 (&)[N], UErrorCode&);
template <int32_t N>
void parseTokenWithWhitespace(const UChar32 (&)[N], UErrorCode&);
bool nextIsMatch() const;
bool nextIs(const std::u16string_view&) const;
UnicodeString parseName(UErrorCode&);
UnicodeString parseIdentifier(UErrorCode&);
UnicodeString parseDigits(UErrorCode&);
Expand Down

0 comments on commit 8699988

Please sign in to comment.