diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index d23ac8409d81..e77c61d6d36e 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -378,7 +378,7 @@ namespace message2 { // Must be a raw pointer to avoid including the internal header file // defining StaticErrors // Owned by `this` - StaticErrors* errors; + StaticErrors* errors = nullptr; }; // class MessageFormatter diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index 524ee4ab79d9..5b0c04697b33 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -30,6 +30,8 @@ TestMessageFormat2::runIndexedTest(int32_t index, UBool exec, TESTCASE_AUTO(testAPI); TESTCASE_AUTO(testAPISimple); TESTCASE_AUTO(testDataModelAPI); + TESTCASE_AUTO(testHighLoneSurrogate); + TESTCASE_AUTO(testLowLoneSurrogate); TESTCASE_AUTO(dataDrivenTests); TESTCASE_AUTO_END; } @@ -263,6 +265,34 @@ void TestMessageFormat2::testAPICustomFunctions() { delete person; } +// ICU-22890 lone surrogate cause infinity loop +void TestMessageFormat2::testHighLoneSurrogate() { + IcuTestErrorCode errorCode(*this, "testHighLoneSurrogate"); + UParseError pe = { 0, 0, {0}, {0} }; + // Lone surrogate with only high surrogate + UnicodeString loneSurrogate({0xda02, 0}); + icu::message2::MessageFormatter msgfmt1 = + icu::message2::MessageFormatter::Builder(errorCode) + .setPattern(loneSurrogate, pe, errorCode) + .build(errorCode); + UnicodeString result = msgfmt1.formatToString({}, errorCode); + errorCode.expectErrorAndReset(U_MF_SYNTAX_ERROR, "testHighLoneSurrogate"); +} + +// ICU-22890 lone surrogate cause infinity loop +void TestMessageFormat2::testLowLoneSurrogate() { + IcuTestErrorCode errorCode(*this, "testLowLoneSurrogate"); + UParseError pe = { 0, 0, {0}, {0} }; + // Lone surrogate with only low surrogate + UnicodeString loneSurrogate({0xdc02, 0}); + icu::message2::MessageFormatter msgfmt2 = + icu::message2::MessageFormatter::Builder(errorCode) + .setPattern(loneSurrogate, pe, errorCode) + .build(errorCode); + UnicodeString result = msgfmt2.formatToString({}, errorCode); + errorCode.expectErrorAndReset(U_MF_SYNTAX_ERROR, "testLowLoneSurrogate"); +} + void TestMessageFormat2::dataDrivenTests() { IcuTestErrorCode errorCode(*this, "jsonTests"); diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index 7c7c2612f50d..b4b250cb5106 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -87,6 +87,8 @@ class TestMessageFormat2: public IntlTest { void testMessageFormatDateTimeSkeleton(message2::TestCase::Builder&, IcuTestErrorCode&); void testMf1Behavior(message2::TestCase::Builder&, IcuTestErrorCode&); + void testHighLoneSurrogate(void); + void testLowLoneSurrogate(void); }; // class TestMessageFormat2 U_NAMESPACE_BEGIN diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/message2/InputSource.java b/icu4j/main/core/src/main/java/com/ibm/icu/message2/InputSource.java index 8c89c93bf51a..167ace313cde 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/message2/InputSource.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/message2/InputSource.java @@ -29,6 +29,13 @@ int peekChar() { return buffer.charAt(cursor); } + // Used when checking for unpaired surrogates + int readChar() { + int ch = peekChar(); + cursor++; + return (char) ch; + } + int readCodePoint() { // TODO: remove this? // START Detect possible infinite loop diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java b/icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java index a5c826f87397..791b45e864ee 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java @@ -113,9 +113,17 @@ private MFDataModel.PatternPart getPatternPart() throws MFParseException { } } - private String getText() { + private String getText() throws MFParseException { StringBuilder result = new StringBuilder(); while (true) { + int ch = input.readChar(); + // Check for unmatched surrogates + checkCondition(!(Character.isHighSurrogate((char) ch) + && (input.atEnd() + || !Character.isLowSurrogate((char) input.peekChar()))), + "Unpaired high surrogate"); + checkCondition (!Character.isLowSurrogate((char) ch), "Unpaired low surrogate"); + input.backup(1); int cp = input.readCodePoint(); switch (cp) { case EOF: diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2/MessageFormat2Test.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2/MessageFormat2Test.java index 6ac37d6085fd..fece39bfa603 100644 --- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2/MessageFormat2Test.java +++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2/MessageFormat2Test.java @@ -46,6 +46,36 @@ public void test() { mf2.formatToString(Args.NONE)); } + @Test + public void testHighLoneSurrogate() { + try { + MessageFormatter mf2 = MessageFormatter.builder() + .setPattern("\uda02").build(); + assertEquals("testHighLoneSurrogate: expected to throw, but didn't", + false, true); + } catch (IllegalArgumentException e) { + // Parse error was thrown, as expected + } catch (Exception e) { + assertEquals("testHighLoneSurrogate: expected IllegalArgumentException " + + "but threw " + e.toString(), false, true); + } + } + + @Test + public void testLowLoneSurrogate() { + try { + MessageFormatter mf2 = MessageFormatter.builder() + .setPattern("\udc02").build(); + assertEquals("testLowLoneSurrogate: expected to throw, but didn't", + false, true); + } catch (IllegalArgumentException e) { + // Parse error was thrown, as expected + } catch (Exception e) { + assertEquals("testLowLoneSurrogate: expected IllegalArgumentException " + + "but threw " + e.toString(), false, true); + } + } + @Test public void testDateFormat() { Date expiration = new Date(2022 - 1900, java.util.Calendar.OCTOBER, 27);