diff --git a/icu4c/source/i18n/messageformat2_parser.cpp b/icu4c/source/i18n/messageformat2_parser.cpp index 3d5ea6e560d2..8eadd1de697b 100644 --- a/icu4c/source/i18n/messageformat2_parser.cpp +++ b/icu4c/source/i18n/messageformat2_parser.cpp @@ -507,6 +507,17 @@ void Parser::parseTokenWithWhitespace(UChar32 c, UErrorCode& errorCode) { CHECK_BOUNDS(source, index, parseError, errorCode); } +// Terrible hack to get around the ambiguity between unsupported keywords +// and supported keywords +bool Parser::nextIs(const std::u16string_view &keyword) const { + for(int32_t i = 0; i < (int32_t) keyword.length(); i++) { + if (!inBounds(source, index + i) || source[index + i] != keyword[i]) { + return false; + } + } + return true; +} + /* Consumes a non-empty sequence of `name-char`s, the first of which is also a `name-start`. @@ -1653,6 +1664,10 @@ void Parser::parseUnsupportedStatement(UErrorCode& status) { UnicodeString keyword(PERIOD); normalizedInput += UnicodeString(PERIOD); index++; + + // A message just consisting of '.' is a parse error + CHECK_BOUNDS(source, index, parseError, status); + keyword += parseName(status); builder.setKeyword(keyword); @@ -1718,15 +1733,6 @@ void Parser::parseUnsupportedStatement(UErrorCode& status) { dataModel.addUnsupportedStatement(builder.build(status), status); } -// Terrible hack to get around the ambiguity between `matcher` and `reserved-statement` -bool Parser::nextIsMatch() const { - for(int32_t i = 0; i < 6; i++) { - if (!inBounds(source, index + i) || source[index + i] != ID_MATCH[i]) { - return false; - } - } - return true; -} /* Consume a possibly-empty sequence of declarations separated by whitespace; each declaration matches the `declaration` nonterminal in the grammar @@ -1739,7 +1745,6 @@ void Parser::parseDeclarations(UErrorCode& status) { CHECK_BOUNDS(source, index, parseError, status); while (source[index] == PERIOD) { - CHECK_BOUNDS(source, index + 1, parseError, status); if (source[index + 1] == ID_LOCAL[1]) { parseLocalDeclaration(status); } else if (source[index + 1] == ID_INPUT[1]) { @@ -1747,7 +1752,7 @@ void Parser::parseDeclarations(UErrorCode& status) { } else { // Unsupported statement // Lookahead is needed to disambiguate this from a `match` - if (!nextIsMatch()) { + if (!nextIs(u".match")) { parseUnsupportedStatement(status); } else { // Done parsing declarations @@ -1786,8 +1791,10 @@ UnicodeString Parser::parseText(UErrorCode& status) { } if (!(isTextChar(source[index]) || source[index] == BACKSLASH)) { - // Error -- text is expected here - ERROR(parseError, status, index); + if (source[index] != RIGHT_CURLY_BRACE) { + // Error -- text is expected here + ERROR(parseError, status, index); + } return str; } @@ -2088,8 +2095,6 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) { void Parser::parseSelectors(UErrorCode& status) { CHECK_ERROR(status); - U_ASSERT(inBounds(source, index)); - parseToken(ID_MATCH, status); bool empty = true; @@ -2213,12 +2218,11 @@ void Parser::parseBody(UErrorCode& status) { dataModel.setPattern(parseQuotedPattern(status)); break; } - case ID_MATCH[0]: { - // Selectors - parseSelectors(status); - return; - } default: { + if (nextIs(u".match")) { + parseSelectors(status); + return; + } ERROR(parseError, status, index); errorPattern(status); return; @@ -2240,7 +2244,7 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) { || (index < static_cast(source.length()) + 1 && source[index] == LEFT_CURLY_BRACE && source[index + 1] == LEFT_CURLY_BRACE)) { - // A complex message begins with a '.' or '{' + // A complex message begins with a keyword or '{' parseDeclarations(status); parseBody(status); simple = false; diff --git a/icu4c/source/i18n/messageformat2_parser.h b/icu4c/source/i18n/messageformat2_parser.h index 92c0475d67db..e5e6c3b1288c 100644 --- a/icu4c/source/i18n/messageformat2_parser.h +++ b/icu4c/source/i18n/messageformat2_parser.h @@ -115,7 +115,7 @@ namespace message2 { void parseToken(const UChar32 (&)[N], UErrorCode&); template void parseTokenWithWhitespace(const UChar32 (&)[N], UErrorCode&); - bool nextIsMatch() const; + bool nextIs(const std::u16string_view&) const; UnicodeString parseName(UErrorCode&); UnicodeString parseIdentifier(UErrorCode&); UnicodeString parseDigits(UErrorCode&);