From 4bc0354962077be6280daa7e4e0dc682e4181059 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Mon, 29 Apr 2024 14:15:31 -0700 Subject: [PATCH 01/23] ICU-22763 MF2: Handle time zones correctly in :datetime Previously, the time zone components of date/time literals were ignored. In order to store the time zone properly for re-formatting, this change also changes the representation of `Formattable` date values to use a `GregorianCalendar` rather than a `UDate`. (This is a public API change.) --- .../i18n/messageformat2_formattable.cpp | 21 +- .../i18n/messageformat2_function_registry.cpp | 179 +++++++++++++++--- .../i18n/unicode/messageformat2_formattable.h | 36 ++-- .../test/intltest/messageformat2test.cpp | 11 +- .../intltest/messageformat2test_read_json.cpp | 2 +- .../test/intltest/messageformat2test_utils.h | 12 +- .../message2/icu4j/icu-test-functions.json | 6 +- .../testdata/message2/more-functions.json | 16 +- 8 files changed, 217 insertions(+), 66 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 3152ccb44fd8..0296a23684ad 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -35,7 +35,6 @@ namespace message2 { Formattable::Formattable(const Formattable& other) { contents = other.contents; - holdsDate = other.holdsDate; } Formattable Formattable::forDecimal(std::string_view number, UErrorCode &status) { @@ -53,7 +52,7 @@ namespace message2 { UFormattableType Formattable::getType() const { if (std::holds_alternative(contents)) { - return holdsDate ? UFMT_DATE : UFMT_DOUBLE; + return UFMT_DOUBLE; } if (std::holds_alternative(contents)) { return UFMT_INT64; @@ -74,6 +73,9 @@ namespace message2 { } } } + if (isDate()) { + return UFMT_DATE; + } if (std::holds_alternative(contents)) { return UFMT_OBJECT; } @@ -223,12 +225,19 @@ namespace message2 { return df.orphan(); } - void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString& result, UErrorCode& errorCode) { + void formatDateWithDefaults(const Locale& locale, const GregorianCalendar& cal, UnicodeString& result, UErrorCode& errorCode) { CHECK_ERROR(errorCode); LocalPointer df(defaultDateTimeInstance(locale, errorCode)); CHECK_ERROR(errorCode); - df->format(date, result, 0, errorCode); + // We have to copy the `const` reference that was passed in, + // because DateFormat::format() takes a non-const reference. + LocalPointer copied(new GregorianCalendar(cal)); + if (!copied.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + df->format(*copied, result, nullptr, errorCode); } // Called when output is required and the contents are an unevaluated `Formattable`; @@ -259,9 +268,9 @@ namespace message2 { switch (type) { case UFMT_DATE: { UnicodeString result; - UDate d = toFormat.getDate(status); + const GregorianCalendar* cal = toFormat.getDate(status); U_ASSERT(U_SUCCESS(status)); - formatDateWithDefaults(locale, d, result, status); + formatDateWithDefaults(locale, *cal, result, status); return FormattedPlaceholder(input, FormattedValue(std::move(result))); } case UFMT_DOUBLE: { diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 7bf5e2d82c36..68319274fed4 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -10,6 +10,7 @@ #include "unicode/dtptngen.h" #include "unicode/messageformat2_data_model_names.h" #include "unicode/messageformat2_function_registry.h" +#include "unicode/simpletz.h" #include "unicode/smpdtfmt.h" #include "charstr.h" #include "messageformat2_allocation.h" @@ -905,6 +906,135 @@ Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& loc return result; } +static UDate tryPattern(const char* pat, const UnicodeString& sourceStr, UErrorCode& errorCode) { + LocalPointer dateParser(new SimpleDateFormat(UnicodeString(pat), errorCode)); + if (U_FAILURE(errorCode)) { + return 0; + } + return dateParser->parse(sourceStr, errorCode); +} + +static UDate tryPatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return 0; + } + UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:ss", sourceStr, errorCode); + if (U_FAILURE(errorCode)) { + errorCode = U_ZERO_ERROR; + d = tryPattern("YYYY-MM-dd", sourceStr, errorCode); + } + return d; +} + +static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return 0; + } + UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode); + if (U_FAILURE(errorCode)) { + errorCode = U_ZERO_ERROR; + d = tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode); + } + return d; +} + +SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + int32_t len = offsetStr.length(); + + // `offsetStr` has a format like +03:30 or -03:30 + if (len <= 2) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + + int32_t sign = offsetStr.charAt(0) == '-' ? -1 : 1; + int32_t indexOfColon = offsetStr.indexOf(':'); + if (indexOfColon <= 2) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + UnicodeString tzPart1 = offsetStr.tempSubString(1, indexOfColon); + UnicodeString tzPart2 = offsetStr.tempSubString(indexOfColon + 1, len); + double tzHour, tzMin; + strToDouble(tzPart1, tzHour, errorCode); + strToDouble(tzPart2, tzMin, errorCode); + if (U_FAILURE(errorCode)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + int32_t offset = sign * (static_cast(tzHour) * 60 + static_cast(tzMin)) * 60 * 1000; + LocalPointer tz(new SimpleTimeZone(offset, UnicodeString("offset"))); + if (!tz.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return tz.orphan(); +} + +static bool hasTzOffset(const UnicodeString& sourceStr) { + int32_t len = sourceStr.length(); + + if (len <= 6) { + return false; + } + return ((sourceStr[len - 6] == PLUS || sourceStr[len - 6] == HYPHEN) + && sourceStr[len - 3] == COLON); +} + +static GregorianCalendar* createCalendarFromDateString(const UnicodeString& sourceStr, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + UDate absoluteDate; + + // Try time zone part + int32_t indexOfZ = sourceStr.indexOf('Z'); + int32_t indexOfPlus = sourceStr.lastIndexOf('+'); + int32_t indexOfMinus = sourceStr.lastIndexOf('-'); + int32_t indexOfSign = indexOfPlus > -1 ? indexOfPlus : indexOfMinus; + bool isTzOffset = hasTzOffset(sourceStr); + bool isGMT = indexOfZ > 0; + UnicodeString offsetPart; + bool hasTimeZone = isTzOffset || isGMT; + if (!hasTimeZone) { + // No time zone + absoluteDate = tryPatterns(sourceStr, errorCode); + } else { + // Try to split into time zone and non-time-zone parts + UnicodeString noTimeZone; + if (isGMT) { + noTimeZone = sourceStr.tempSubString(0, indexOfZ); + } else { + noTimeZone = sourceStr.tempSubString(0, indexOfSign); + } + tryPatterns(noTimeZone, errorCode); + // Failure -- can't parse this string + NULL_ON_ERROR(errorCode); + // Success -- now handle the time zone part + if (isGMT) { + noTimeZone += UnicodeString("GMT"); + absoluteDate = tryTimeZonePatterns(noTimeZone, errorCode); + } else { + // Finally, try [+-]nn:nn + absoluteDate = tryTimeZonePatterns(sourceStr, errorCode); + offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length()); + } + } + LocalPointer cal(new GregorianCalendar(errorCode)); + NULL_ON_ERROR(errorCode); + cal->setTime(absoluteDate, errorCode); + if (hasTimeZone) { + if (isGMT) { + cal->setTimeZone(*TimeZone::getGMT()); + } else { + LocalPointer tz(createTimeZonePart(offsetPart, errorCode)); + NULL_ON_ERROR(errorCode); + cal->adoptTimeZone(tz.orphan()); + } + } + return cal.orphan(); +} + FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat, FunctionOptions&& opts, UErrorCode& errorCode) const { @@ -1093,39 +1223,32 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& case UFMT_STRING: { const UnicodeString& sourceStr = source.getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - // Pattern for ISO 8601 format - datetime - UnicodeString pattern("YYYY-MM-dd'T'HH:mm:ss"); - LocalPointer dateParser(new SimpleDateFormat(pattern, errorCode)); + + LocalPointer cal(createCalendarFromDateString(sourceStr, errorCode)); + if (U_FAILURE(errorCode)) { - errorCode = U_MF_FORMATTING_ERROR; - } else { - // Parse the date - UDate d = dateParser->parse(sourceStr, errorCode); - if (U_FAILURE(errorCode)) { - // Pattern for ISO 8601 format - date - UnicodeString pattern("YYYY-MM-dd"); - errorCode = U_ZERO_ERROR; - dateParser.adoptInstead(new SimpleDateFormat(pattern, errorCode)); - if (U_FAILURE(errorCode)) { - errorCode = U_MF_FORMATTING_ERROR; - } else { - d = dateParser->parse(sourceStr, errorCode); - if (U_FAILURE(errorCode)) { - errorCode = U_MF_OPERAND_MISMATCH_ERROR; - } - } - } - // Use the parsed date as the source value - // in the returned FormattedPlaceholder; this is necessary - // so the date can be re-formatted - toFormat = FormattedPlaceholder(message2::Formattable::forDate(d), - toFormat.getFallback()); - df->format(d, result, 0, errorCode); + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return {}; } + + // Use the parsed date as the source value + // in the returned FormattedPlaceholder; this is necessary + // so the date can be re-formatted + df->format(*cal, result, 0, errorCode); + toFormat = FormattedPlaceholder(message2::Formattable(std::move(*cal)), + toFormat.getFallback()); break; } case UFMT_DATE: { - df->format(source.asICUFormattable(errorCode), result, 0, errorCode); + const GregorianCalendar* cal = source.getDate(errorCode); + U_ASSERT(U_SUCCESS(errorCode)); + U_ASSERT(cal != nullptr); + LocalPointer copied(new GregorianCalendar(*cal)); + if (!copied.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return {}; + } + df->format(*copied, result, 0, errorCode); if (U_FAILURE(errorCode)) { if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 8a779adb9ab3..b5cfd4c1c969 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -13,8 +13,10 @@ #if !UCONFIG_NO_MF2 #include "unicode/chariter.h" +#include "unicode/gregocal.h" #include "unicode/numberformatter.h" #include "unicode/messageformat2_data_model_names.h" +#include "unicode/smpdtfmt.h" #ifndef U_HIDE_DEPRECATED_API @@ -82,6 +84,7 @@ template class U_I18N_API std::_Variant_storage_>; #endif @@ -90,6 +93,7 @@ template class U_I18N_API std::variant; #endif @@ -226,22 +230,24 @@ namespace message2 { } /** - * Gets the Date value of this object. If this object is not of type - * kDate then the result is undefined and the error code is set. + * Gets the calendar representing the date value of this object. + * If this object is not of type kDate then the result is + * undefined and the error code is set. * * @param status Input/output error code. - * @return the Date value of this object. + * @return A non-owned pointer to a GregorianCalendar object + * representing the underlying date of this object. * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - UDate getDate(UErrorCode& status) const { + const GregorianCalendar* getDate(UErrorCode& status) const { if (U_SUCCESS(status)) { if (isDate()) { - return *std::get_if(&contents); + return std::get_if(&contents); } status = U_ILLEGAL_ARGUMENT_ERROR; } - return 0; + return nullptr; } /** @@ -299,7 +305,6 @@ namespace message2 { using std::swap; swap(f1.contents, f2.contents); - swap(f1.holdsDate, f2.holdsDate); } /** * Copy constructor. @@ -351,18 +356,15 @@ namespace message2 { */ Formattable(int64_t i) : contents(i) {} /** - * Date factory method. + * Date constructor. * - * @param d A UDate value to wrap as a Formattable. + * @param c A GregorianCalendar value representing a date, + * to wrap as a Formattable. + * Passed by move * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - static Formattable forDate(UDate d) { - Formattable f; - f.contents = d; - f.holdsDate = true; - return f; - } + Formattable(GregorianCalendar&& c) : contents(std::move(c)) {} /** * Creates a Formattable object of an appropriate numeric type from a * a decimal number in string form. The Formattable will retain the @@ -422,16 +424,16 @@ namespace message2 { int64_t, UnicodeString, icu::Formattable, // represents a Decimal + GregorianCalendar, const FormattableObject*, std::pair> contents; - bool holdsDate = false; // otherwise, we get type errors about UDate being a duplicate type UnicodeString bogusString; // :(((( UBool isDecimal() const { return std::holds_alternative(contents); } UBool isDate() const { - return std::holds_alternative(contents) && holdsDate; + return std::holds_alternative(contents); } }; // class Formattable diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index a7137d8493a0..a41b8fbb659c 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -91,13 +91,12 @@ void TestMessageFormat2::testAPISimple() { .setLocale(locale) .build(errorCode); - Calendar* cal(Calendar::createInstance(errorCode)); + GregorianCalendar cal(errorCode); // Sunday, October 28, 2136 8:39:12 AM PST - cal->set(2136, Calendar::OCTOBER, 28, 8, 39, 12); - UDate date = cal->getTime(errorCode); + cal.set(2136, Calendar::OCTOBER, 28, 8, 39, 12); argsBuilder.clear(); - argsBuilder["today"] = message2::Formattable::forDate(date); + argsBuilder["today"] = message2::Formattable(std::move(cal)); args = MessageArguments(argsBuilder, errorCode); result = mf.formatToString(args, errorCode); assertEquals("testAPI", "Today is Sunday, October 28, 2136.", result); @@ -119,8 +118,6 @@ void TestMessageFormat2::testAPISimple() { .build(errorCode); result = mf.formatToString(args, errorCode); assertEquals("testAPI", "Maria added 12 photos to her album.", result); - - delete cal; } // Design doc example, with more details @@ -145,7 +142,7 @@ void TestMessageFormat2::testAPI() { test = testBuilder.setName("testAPI") .setPattern("Today is {$today :date style=full}.") - .setDateArgument("today", date) + .setDateArgument("today", date, errorCode) .setExpected("Today is Sunday, October 28, 2136.") .setLocale("en_US") .build(); diff --git a/icu4c/source/test/intltest/messageformat2test_read_json.cpp b/icu4c/source/test/intltest/messageformat2test_read_json.cpp index b52b3c48d4be..25b7bb68a416 100644 --- a/icu4c/source/test/intltest/messageformat2test_read_json.cpp +++ b/icu4c/source/test/intltest/messageformat2test_read_json.cpp @@ -100,7 +100,7 @@ static bool setArguments(TestCase::Builder& test, const json::object_t& params, // from number values auto obj = argsIter->second.template get(); if (obj["date"].is_number()) { - test.setDateArgument(argName, obj["date"]); + test.setDateArgument(argName, obj["date"], errorCode); } else if (obj["decimal"].is_string()) { // Decimal strings: represent in tests as { "decimal" : string }, // to distinguish from string values diff --git a/icu4c/source/test/intltest/messageformat2test_utils.h b/icu4c/source/test/intltest/messageformat2test_utils.h index 08cdd57bfa72..7a6248c7a5dc 100644 --- a/icu4c/source/test/intltest/messageformat2test_utils.h +++ b/icu4c/source/test/intltest/messageformat2test_utils.h @@ -106,8 +106,16 @@ class TestCase : public UMemory { arguments[k] = Formattable(val); return *this; } - Builder& setDateArgument(const UnicodeString& k, UDate date) { - arguments[k] = Formattable::forDate(date); + Builder& setDateArgument(const UnicodeString& k, UDate date, UErrorCode& errorCode) { + // This ignores time zones; the data-driven tests represent date/time values + // as a datestamp, so this code suffices to handle those. + // Date/time literal strings would be handled using `setArgument()` with a string + // argument. + GregorianCalendar cal(errorCode); + THIS_ON_ERROR(errorCode); + cal.setTime(date, errorCode); + THIS_ON_ERROR(errorCode); + arguments[k] = Formattable(std::move(cal)); return *this; } Builder& setDecimalArgument(const UnicodeString& k, std::string_view decimal, UErrorCode& errorCode) { diff --git a/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json b/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json index 6d78ffe4f04d..21f917da6e52 100644 --- a/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json +++ b/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json @@ -108,13 +108,11 @@ }, { "src": "Expires at {|2024-07-02T19:23:45Z| :datetime timeStyle=long}", - "exp": "Expires at 7:23:45 PM GMT", - "ignoreTest": "ICU-22754 Time zones not working yet (bug)" + "exp": "Expires at 7:23:45 PM GMT" }, { "src": "Expires at {|2024-07-02T19:23:45+03:30| :datetime timeStyle=full}", - "exp": "Expires at 7:23:45 PM GMT+03:30", - "ignoreTest": "ICU-22754 Time zones not working yet (bug)" + "exp": "Expires at 7:23:45 PM GMT+03:30" } ], "Chaining" : [ diff --git a/icu4c/source/test/testdata/message2/more-functions.json b/icu4c/source/test/testdata/message2/more-functions.json index 161379a63c55..bfffcd343359 100644 --- a/icu4c/source/test/testdata/message2/more-functions.json +++ b/icu4c/source/test/testdata/message2/more-functions.json @@ -1,5 +1,5 @@ { - "number": [ + "number_extra": [ { "src": "Format {123456789.9876 :number} number", "locale": "en-IN", @@ -107,5 +107,19 @@ "locale": "ro", "params": {"val": {"decimal": "1234567890123456789.987654321"}} } + ], + "datetime_extra": [ + { + "srcs": [".local $d = {|2024-07-02T19:23:45+03:30| :datetime timeStyle=long}\n", + "{{Expires at {$d} or {$d :datetime timeStyle=full}}}"], + "exp": "Expires at 7:23:45\u202FPM GMT+3:30 or 7:23:45\u202FPM GMT+03:30", + "comment": "This checks that the return value of :datetime preserves the time zone if specified" + }, + { + "src": "{$d :datetime timeStyle=full}", + "exp": "7:23:45\u202FPM GMT+03:30", + "params": {"d": "2024-07-02T19:23:45+03:30"}, + "comment": "This checks that an argument can be a date literal string." + } ] } From fa6278ee28362e1f74b7561e78949cdca1a931a0 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 18 Jun 2024 11:31:57 -0700 Subject: [PATCH 02/23] Tests pass with new DateInfo struct --- .../i18n/messageformat2_formattable.cpp | 50 +++++++++++++++---- .../i18n/messageformat2_function_registry.cpp | 34 +++++++++---- .../i18n/unicode/messageformat2_formattable.h | 34 +++++++++---- .../test/intltest/messageformat2test.cpp | 7 ++- .../intltest/messageformat2test_read_json.cpp | 2 +- .../test/intltest/messageformat2test_utils.h | 9 ++-- 6 files changed, 99 insertions(+), 37 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 0296a23684ad..bca67093db76 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -9,6 +9,7 @@ #include "unicode/messageformat2_formattable.h" #include "unicode/smpdtfmt.h" +#include "messageformat2_allocation.h" #include "messageformat2_macros.h" #include "limits.h" @@ -225,19 +226,50 @@ namespace message2 { return df.orphan(); } - void formatDateWithDefaults(const Locale& locale, const GregorianCalendar& cal, UnicodeString& result, UErrorCode& errorCode) { + GregorianCalendar* DateInfo::createGregorianCalendar(UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + + LocalPointer cal(new GregorianCalendar(errorCode)); + if (!cal.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + // Copy info from this + // Non-Gregorian calendars not implemented yet + U_ASSERT(calendarName.length() == 0); + cal->setTime(date, errorCode); + // If time zone is present... + if (zoneName.length() > 0) { + if (zoneName == UnicodeString("UTC")) { + cal->setTimeZone(*TimeZone::getGMT()); + } else { + LocalPointer tz(TimeZone::createTimeZone(zoneName)); + if (!tz.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + cal->setTimeZone(*tz); + } + } + return cal.orphan(); + } + + void formatDateWithDefaults(const Locale& locale, + const DateInfo& dateInfo, + UnicodeString& result, + UErrorCode& errorCode) { CHECK_ERROR(errorCode); LocalPointer df(defaultDateTimeInstance(locale, errorCode)); CHECK_ERROR(errorCode); // We have to copy the `const` reference that was passed in, // because DateFormat::format() takes a non-const reference. - LocalPointer copied(new GregorianCalendar(cal)); - if (!copied.isValid()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - df->format(*copied, result, nullptr, errorCode); + + // Non-Gregorian calendars not supported yet + U_ASSERT(dateInfo.calendarName.length() == 0); + LocalPointer cal(dateInfo.createGregorianCalendar(errorCode)); + CHECK_ERROR(errorCode); + df->format(*cal, result, nullptr, errorCode); } // Called when output is required and the contents are an unevaluated `Formattable`; @@ -268,9 +300,9 @@ namespace message2 { switch (type) { case UFMT_DATE: { UnicodeString result; - const GregorianCalendar* cal = toFormat.getDate(status); + const DateInfo* dateInfo = toFormat.getDate(status); U_ASSERT(U_SUCCESS(status)); - formatDateWithDefaults(locale, *cal, result, status); + formatDateWithDefaults(locale, *dateInfo, result, status); return FormattedPlaceholder(input, FormattedValue(std::move(result))); } case UFMT_DOUBLE: { diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 68319274fed4..48ddfab8c3a5 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -965,7 +965,10 @@ SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& e return nullptr; } int32_t offset = sign * (static_cast(tzHour) * 60 + static_cast(tzMin)) * 60 * 1000; - LocalPointer tz(new SimpleTimeZone(offset, UnicodeString("offset"))); + LocalPointer tz(new SimpleTimeZone(offset, UnicodeString("GMT") + offsetStr)); +/* +Use tz.getIanaID() to get the time zone name +*/ if (!tz.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -982,6 +985,8 @@ static bool hasTzOffset(const UnicodeString& sourceStr) { && sourceStr[len - 3] == COLON); } +// Note: `calendar` option to :datetime not implemented yet; +// Gregorian calendar is assumed static GregorianCalendar* createCalendarFromDateString(const UnicodeString& sourceStr, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -1225,7 +1230,6 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& U_ASSERT(U_SUCCESS(errorCode)); LocalPointer cal(createCalendarFromDateString(sourceStr, errorCode)); - if (U_FAILURE(errorCode)) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; return {}; @@ -1235,20 +1239,32 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& // in the returned FormattedPlaceholder; this is necessary // so the date can be re-formatted df->format(*cal, result, 0, errorCode); - toFormat = FormattedPlaceholder(message2::Formattable(std::move(*cal)), + + // Construct DateInfo from Date + UDate absoluteDate = cal->getTime(errorCode); + const TimeZone& tz = cal->getTimeZone(); + UnicodeString timeZoneId; + tz.getID(timeZoneId); + UnicodeString timeZoneName; + // This doesn't work for offsets -- TODO + // tz.getIanaID(timeZoneId, timeZoneName, errorCode); + // Empty string for Gregorian calendar (default); + // `:datetime` `calendar` option not implemented yet, + // so other calendars aren't implemented + DateInfo dateInfo = {absoluteDate, timeZoneId, {}}; // Should be timeZoneName, but getIanaID() doesn't work for strings like GMT+03:30 + toFormat = FormattedPlaceholder(message2::Formattable(std::move(dateInfo)), toFormat.getFallback()); break; } case UFMT_DATE: { - const GregorianCalendar* cal = source.getDate(errorCode); + const DateInfo* dateInfo = source.getDate(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - U_ASSERT(cal != nullptr); - LocalPointer copied(new GregorianCalendar(*cal)); - if (!copied.isValid()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; + + LocalPointer cal(dateInfo->createGregorianCalendar(errorCode)); + if (U_FAILURE(errorCode)) { return {}; } - df->format(*copied, result, 0, errorCode); + df->format(*cal, result, 0, errorCode); if (U_FAILURE(errorCode)) { if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index b5cfd4c1c969..dda003374b31 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -66,6 +66,7 @@ namespace message2 { virtual ~FormattableObject(); }; // class FormattableObject + struct DateInfo; class Formattable; } // namespace message2 @@ -84,7 +85,7 @@ template class U_I18N_API std::_Variant_storage_>; #endif @@ -93,7 +94,7 @@ template class U_I18N_API std::variant; #endif @@ -102,6 +103,19 @@ template class U_I18N_API std::variant(&contents); + return std::get_if(&contents); } status = U_ILLEGAL_ARGUMENT_ERROR; } @@ -358,13 +372,13 @@ namespace message2 { /** * Date constructor. * - * @param c A GregorianCalendar value representing a date, + * @param d A DateInfo struct representing a date, * to wrap as a Formattable. * Passed by move * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - Formattable(GregorianCalendar&& c) : contents(std::move(c)) {} + Formattable(DateInfo&& d) : contents(std::move(d)) {} /** * Creates a Formattable object of an appropriate numeric type from a * a decimal number in string form. The Formattable will retain the @@ -424,7 +438,7 @@ namespace message2 { int64_t, UnicodeString, icu::Formattable, // represents a Decimal - GregorianCalendar, + DateInfo, const FormattableObject*, std::pair> contents; UnicodeString bogusString; // :(((( @@ -433,7 +447,7 @@ namespace message2 { return std::holds_alternative(contents); } UBool isDate() const { - return std::holds_alternative(contents); + return std::holds_alternative(contents); } }; // class Formattable diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index a41b8fbb659c..57fbdf6d70d2 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -96,7 +96,10 @@ void TestMessageFormat2::testAPISimple() { cal.set(2136, Calendar::OCTOBER, 28, 8, 39, 12); argsBuilder.clear(); - argsBuilder["today"] = message2::Formattable(std::move(cal)); + DateInfo dateInfo = { cal.getTime(errorCode), + "Pacific Standard Time", + "" }; + argsBuilder["today"] = message2::Formattable(std::move(dateInfo)); args = MessageArguments(argsBuilder, errorCode); result = mf.formatToString(args, errorCode); assertEquals("testAPI", "Today is Sunday, October 28, 2136.", result); @@ -142,7 +145,7 @@ void TestMessageFormat2::testAPI() { test = testBuilder.setName("testAPI") .setPattern("Today is {$today :date style=full}.") - .setDateArgument("today", date, errorCode) + .setDateArgument("today", date) .setExpected("Today is Sunday, October 28, 2136.") .setLocale("en_US") .build(); diff --git a/icu4c/source/test/intltest/messageformat2test_read_json.cpp b/icu4c/source/test/intltest/messageformat2test_read_json.cpp index 25b7bb68a416..b52b3c48d4be 100644 --- a/icu4c/source/test/intltest/messageformat2test_read_json.cpp +++ b/icu4c/source/test/intltest/messageformat2test_read_json.cpp @@ -100,7 +100,7 @@ static bool setArguments(TestCase::Builder& test, const json::object_t& params, // from number values auto obj = argsIter->second.template get(); if (obj["date"].is_number()) { - test.setDateArgument(argName, obj["date"], errorCode); + test.setDateArgument(argName, obj["date"]); } else if (obj["decimal"].is_string()) { // Decimal strings: represent in tests as { "decimal" : string }, // to distinguish from string values diff --git a/icu4c/source/test/intltest/messageformat2test_utils.h b/icu4c/source/test/intltest/messageformat2test_utils.h index 7a6248c7a5dc..cda9c57449be 100644 --- a/icu4c/source/test/intltest/messageformat2test_utils.h +++ b/icu4c/source/test/intltest/messageformat2test_utils.h @@ -106,16 +106,13 @@ class TestCase : public UMemory { arguments[k] = Formattable(val); return *this; } - Builder& setDateArgument(const UnicodeString& k, UDate date, UErrorCode& errorCode) { + Builder& setDateArgument(const UnicodeString& k, UDate date) { // This ignores time zones; the data-driven tests represent date/time values // as a datestamp, so this code suffices to handle those. // Date/time literal strings would be handled using `setArgument()` with a string // argument. - GregorianCalendar cal(errorCode); - THIS_ON_ERROR(errorCode); - cal.setTime(date, errorCode); - THIS_ON_ERROR(errorCode); - arguments[k] = Formattable(std::move(cal)); + DateInfo dateInfo = { date, {}, {} }; // No time zone or calendar name + arguments[k] = Formattable(std::move(dateInfo)); return *this; } Builder& setDecimalArgument(const UnicodeString& k, std::string_view decimal, UErrorCode& errorCode) { From 8451a3016336320d02633ffe17469fc8330d2d8d Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 18 Jun 2024 12:36:45 -0700 Subject: [PATCH 03/23] Eliminate GregorianCalendar usage --- .../i18n/messageformat2_formattable.cpp | 46 +++++--------- .../i18n/messageformat2_function_registry.cpp | 63 ++++++++++--------- .../i18n/unicode/messageformat2_formattable.h | 3 +- .../test/intltest/messageformat2test.cpp | 2 +- 4 files changed, 51 insertions(+), 63 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index bca67093db76..bf31a3add50f 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -216,42 +216,30 @@ namespace message2 { return number::NumberFormatter::withLocale(locale).formatDecimal(toFormat, errorCode); } - DateFormat* defaultDateTimeInstance(const Locale& locale, UErrorCode& errorCode) { + TimeZone* DateInfo::createTimeZone(UErrorCode& errorCode) const { NULL_ON_ERROR(errorCode); - LocalPointer df(DateFormat::createDateTimeInstance(DateFormat::SHORT, DateFormat::SHORT, locale)); - if (!df.isValid()) { + + TimeZone* tz; + if (zoneName.length() == 0) { + tz = TimeZone::createDefault(); + } else { + tz = TimeZone::createTimeZone(zoneName); + } + if (tz == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return nullptr; } - return df.orphan(); + return tz; } - GregorianCalendar* DateInfo::createGregorianCalendar(UErrorCode& errorCode) const { - NULL_ON_ERROR(errorCode); - LocalPointer cal(new GregorianCalendar(errorCode)); - if (!cal.isValid()) { + DateFormat* defaultDateTimeInstance(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer df(DateFormat::createDateTimeInstance(DateFormat::SHORT, DateFormat::SHORT, locale)); + if (!df.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; return nullptr; } - // Copy info from this - // Non-Gregorian calendars not implemented yet - U_ASSERT(calendarName.length() == 0); - cal->setTime(date, errorCode); - // If time zone is present... - if (zoneName.length() > 0) { - if (zoneName == UnicodeString("UTC")) { - cal->setTimeZone(*TimeZone::getGMT()); - } else { - LocalPointer tz(TimeZone::createTimeZone(zoneName)); - if (!tz.isValid()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - cal->setTimeZone(*tz); - } - } - return cal.orphan(); + return df.orphan(); } void formatDateWithDefaults(const Locale& locale, @@ -267,9 +255,9 @@ namespace message2 { // Non-Gregorian calendars not supported yet U_ASSERT(dateInfo.calendarName.length() == 0); - LocalPointer cal(dateInfo.createGregorianCalendar(errorCode)); + df->adoptTimeZone(dateInfo.createTimeZone(errorCode)); CHECK_ERROR(errorCode); - df->format(*cal, result, nullptr, errorCode); + df->format(dateInfo.date, result, nullptr, errorCode); } // Called when output is required and the contents are an unevaluated `Formattable`; diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 48ddfab8c3a5..a9d80af5ecdd 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -987,8 +987,10 @@ static bool hasTzOffset(const UnicodeString& sourceStr) { // Note: `calendar` option to :datetime not implemented yet; // Gregorian calendar is assumed -static GregorianCalendar* createCalendarFromDateString(const UnicodeString& sourceStr, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); +static DateInfo createDateInfoFromString(const UnicodeString& sourceStr, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return {}; + } UDate absoluteDate; @@ -1014,7 +1016,9 @@ static GregorianCalendar* createCalendarFromDateString(const UnicodeString& sour } tryPatterns(noTimeZone, errorCode); // Failure -- can't parse this string - NULL_ON_ERROR(errorCode); + if (U_FAILURE(errorCode)) { + return {}; + } // Success -- now handle the time zone part if (isGMT) { noTimeZone += UnicodeString("GMT"); @@ -1025,19 +1029,31 @@ static GregorianCalendar* createCalendarFromDateString(const UnicodeString& sour offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length()); } } - LocalPointer cal(new GregorianCalendar(errorCode)); - NULL_ON_ERROR(errorCode); - cal->setTime(absoluteDate, errorCode); + const TimeZone* tz; if (hasTimeZone) { if (isGMT) { - cal->setTimeZone(*TimeZone::getGMT()); + tz = TimeZone::getGMT(); } else { - LocalPointer tz(createTimeZonePart(offsetPart, errorCode)); - NULL_ON_ERROR(errorCode); - cal->adoptTimeZone(tz.orphan()); + tz = createTimeZonePart(offsetPart, errorCode); } + } else { + tz = TimeZone::createDefault(); } - return cal.orphan(); + if (tz == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(errorCode)) { + return {}; + } + UnicodeString tzID; + // This doesn't work for offsets -- TODO + // tz.getIanaID(timeZoneId, timeZoneName, errorCode); + tz->getID(tzID); + // Empty string for Gregorian calendar (default); + // `:datetime` `calendar` option not implemented yet, + // so other calendars aren't implemented + + return { absoluteDate, tzID, {} }; } FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat, @@ -1229,29 +1245,17 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& const UnicodeString& sourceStr = source.getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - LocalPointer cal(createCalendarFromDateString(sourceStr, errorCode)); + DateInfo dateInfo = createDateInfoFromString(sourceStr, errorCode); if (U_FAILURE(errorCode)) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; return {}; } + df->adoptTimeZone(dateInfo.createTimeZone(errorCode)); // Use the parsed date as the source value // in the returned FormattedPlaceholder; this is necessary // so the date can be re-formatted - df->format(*cal, result, 0, errorCode); - - // Construct DateInfo from Date - UDate absoluteDate = cal->getTime(errorCode); - const TimeZone& tz = cal->getTimeZone(); - UnicodeString timeZoneId; - tz.getID(timeZoneId); - UnicodeString timeZoneName; - // This doesn't work for offsets -- TODO - // tz.getIanaID(timeZoneId, timeZoneName, errorCode); - // Empty string for Gregorian calendar (default); - // `:datetime` `calendar` option not implemented yet, - // so other calendars aren't implemented - DateInfo dateInfo = {absoluteDate, timeZoneId, {}}; // Should be timeZoneName, but getIanaID() doesn't work for strings like GMT+03:30 + df->format(dateInfo.date, result, 0, errorCode); toFormat = FormattedPlaceholder(message2::Formattable(std::move(dateInfo)), toFormat.getFallback()); break; @@ -1260,11 +1264,8 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& const DateInfo* dateInfo = source.getDate(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - LocalPointer cal(dateInfo->createGregorianCalendar(errorCode)); - if (U_FAILURE(errorCode)) { - return {}; - } - df->format(*cal, result, 0, errorCode); + df->adoptTimeZone(dateInfo->createTimeZone(errorCode)); + df->format(dateInfo->date, result, 0, errorCode); if (U_FAILURE(errorCode)) { if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index dda003374b31..c68d95896b78 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -13,7 +13,6 @@ #if !UCONFIG_NO_MF2 #include "unicode/chariter.h" -#include "unicode/gregocal.h" #include "unicode/numberformatter.h" #include "unicode/messageformat2_data_model_names.h" #include "unicode/smpdtfmt.h" @@ -113,7 +112,7 @@ namespace message2 { // Empty if not specified; proleptic Gregorian (8601) calendar is default UnicodeString calendarName; - GregorianCalendar* createGregorianCalendar(UErrorCode&) const; + TimeZone* createTimeZone(UErrorCode&) const; }; /** diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index 57fbdf6d70d2..88d0376a2bec 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -6,7 +6,7 @@ #if !UCONFIG_NO_MF2 -#include "unicode/calendar.h" +#include "unicode/gregocal.h" #include "messageformat2test.h" using namespace icu::message2; From b4cc1191b1c9cab3281241e150f6593e37ba6598 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 18 Jun 2024 12:42:11 -0700 Subject: [PATCH 04/23] Docs --- .../i18n/unicode/messageformat2_formattable.h | 42 ++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index c68d95896b78..56eed505461d 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -103,15 +103,47 @@ U_NAMESPACE_BEGIN namespace message2 { -// TODO: doc comments + /** + * The `DateInfo` struct represents all the information needed to + * format a date with a time zone. It includes an absolute date and a time zone name, + * as well as a calendar name. The calendar name is not currently used. + * + * @internal ICU 76 technology preview + * @deprecated This API is for technology preview only. + */ struct U_I18N_API DateInfo { - // Milliseconds since Unix epoch + /** + * Milliseconds since Unix epoch + * + * @internal ICU 76 technology preview + * @deprecated This API is for technology preview only. + */ UDate date; - // IANA time zone name; "UTC" if UTC; empty string if value is floating + /** + * IANA time zone name; "UTC" if UTC; empty string if value is floating + * + * @internal ICU 76 technology preview + * @deprecated This API is for technology preview only. + */ UnicodeString zoneName; - // Empty if not specified; proleptic Gregorian (8601) calendar is default + /** + * Calendar name. Note: non-Gregorian calendars not yet implemented. + * String is empty if calendar not specified. Proleptic Gregorian calendar + * is default. + * + * @internal ICU 76 technology preview + * @deprecated This API is for technology preview only. + */ UnicodeString calendarName; - + /** + * Creates a `TimeZone` from this `DateInfo`'s time zone name. + * + * @param status Input/output error code. + * @return A TimeZone object, which the caller adopts. + * + * @internal ICU 76 technology preview + * @deprecated This API is for technology preview only. + */ TimeZone* createTimeZone(UErrorCode&) const; }; From eeb534b5e8c882af4a3988e7641a24fdb1e25c2b Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 18 Jun 2024 12:43:26 -0700 Subject: [PATCH 05/23] Remove obsolete comment --- icu4c/source/i18n/messageformat2_formattable.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index bf31a3add50f..7af83b3f658c 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -221,6 +221,7 @@ namespace message2 { TimeZone* tz; if (zoneName.length() == 0) { + // Floating time value -- use default time zone tz = TimeZone::createDefault(); } else { tz = TimeZone::createTimeZone(zoneName); @@ -250,8 +251,6 @@ namespace message2 { LocalPointer df(defaultDateTimeInstance(locale, errorCode)); CHECK_ERROR(errorCode); - // We have to copy the `const` reference that was passed in, - // because DateFormat::format() takes a non-const reference. // Non-Gregorian calendars not supported yet U_ASSERT(dateInfo.calendarName.length() == 0); From 8a1638b2142091ae959bbbc91fe445c6aead363b Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 18 Jun 2024 12:54:40 -0700 Subject: [PATCH 06/23] Fix comment --- .../source/i18n/messageformat2_function_registry.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index a9d80af5ecdd..d887fe327a2a 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -966,9 +966,6 @@ SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& e } int32_t offset = sign * (static_cast(tzHour) * 60 + static_cast(tzMin)) * 60 * 1000; LocalPointer tz(new SimpleTimeZone(offset, UnicodeString("GMT") + offsetStr)); -/* -Use tz.getIanaID() to get the time zone name -*/ if (!tz.isValid()) { errorCode = U_MEMORY_ALLOCATION_ERROR; } @@ -1029,6 +1026,7 @@ static DateInfo createDateInfoFromString(const UnicodeString& sourceStr, UErrorC offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length()); } } + const TimeZone* tz; if (hasTimeZone) { if (isGMT) { @@ -1046,13 +1044,16 @@ static DateInfo createDateInfoFromString(const UnicodeString& sourceStr, UErrorC return {}; } UnicodeString tzID; - // This doesn't work for offsets -- TODO + + // TODO: + // Here, tz.getIanaID() should be called to normalize the time zone name. + // But it doesn't seem to work for strings with offsets, like GMT+03:30 // tz.getIanaID(timeZoneId, timeZoneName, errorCode); tz->getID(tzID); + // Empty string for Gregorian calendar (default); // `:datetime` `calendar` option not implemented yet, // so other calendars aren't implemented - return { absoluteDate, tzID, {} }; } From 60a66d919572d5f74e3ad10b80813e71563d2d12 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 18 Jun 2024 12:59:41 -0700 Subject: [PATCH 07/23] Add comments --- .../i18n/messageformat2_function_registry.cpp | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index d887fe327a2a..3dd5a4dc6a98 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -938,6 +938,7 @@ static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& err return d; } +// Parse a string like [+-]nn:nn to a time zone SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -955,6 +956,7 @@ SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& e errorCode = U_ILLEGAL_ARGUMENT_ERROR; return nullptr; } + // Split string into hour and minute parts and parse each part as a double UnicodeString tzPart1 = offsetStr.tempSubString(1, indexOfColon); UnicodeString tzPart2 = offsetStr.tempSubString(indexOfColon + 1, len); double tzHour, tzMin; @@ -964,6 +966,7 @@ SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& e errorCode = U_ILLEGAL_ARGUMENT_ERROR; return nullptr; } + // Create integer offset from parsed parts int32_t offset = sign * (static_cast(tzHour) * 60 + static_cast(tzMin)) * 60 * 1000; LocalPointer tz(new SimpleTimeZone(offset, UnicodeString("GMT") + offsetStr)); if (!tz.isValid()) { @@ -991,7 +994,7 @@ static DateInfo createDateInfoFromString(const UnicodeString& sourceStr, UErrorC UDate absoluteDate; - // Try time zone part + // Check if the string has a time zone part int32_t indexOfZ = sourceStr.indexOf('Z'); int32_t indexOfPlus = sourceStr.lastIndexOf('+'); int32_t indexOfMinus = sourceStr.lastIndexOf('-'); @@ -1000,33 +1003,37 @@ static DateInfo createDateInfoFromString(const UnicodeString& sourceStr, UErrorC bool isGMT = indexOfZ > 0; UnicodeString offsetPart; bool hasTimeZone = isTzOffset || isGMT; + if (!hasTimeZone) { - // No time zone + // No time zone; parse the date and time absoluteDate = tryPatterns(sourceStr, errorCode); } else { // Try to split into time zone and non-time-zone parts - UnicodeString noTimeZone; + UnicodeString dateTimePart; if (isGMT) { - noTimeZone = sourceStr.tempSubString(0, indexOfZ); + dateTimePart = sourceStr.tempSubString(0, indexOfZ); } else { - noTimeZone = sourceStr.tempSubString(0, indexOfSign); + dateTimePart = sourceStr.tempSubString(0, indexOfSign); } - tryPatterns(noTimeZone, errorCode); + // Parse the date from the date/time part + tryPatterns(dateTimePart, errorCode); // Failure -- can't parse this string if (U_FAILURE(errorCode)) { return {}; } - // Success -- now handle the time zone part + // Success -- now parse the time zone part if (isGMT) { - noTimeZone += UnicodeString("GMT"); - absoluteDate = tryTimeZonePatterns(noTimeZone, errorCode); + dateTimePart += UnicodeString("GMT"); + absoluteDate = tryTimeZonePatterns(dateTimePart, errorCode); } else { - // Finally, try [+-]nn:nn + // Try to parse time zone in offset format: [+-]nn:nn absoluteDate = tryTimeZonePatterns(sourceStr, errorCode); offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length()); } } + // Create a TimeZone from the parsed time zone, in order to get a canonical + // ID for the time zone const TimeZone* tz; if (hasTimeZone) { if (isGMT) { From a65362fd710126b07b7fca1df5737e2c18042d2d Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 18 Jun 2024 13:44:28 -0700 Subject: [PATCH 08/23] Avoid creating time zone where not necessary --- .../i18n/messageformat2_function_registry.cpp | 31 +++++-------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 3dd5a4dc6a98..4b406fb9dc43 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -1032,36 +1032,21 @@ static DateInfo createDateInfoFromString(const UnicodeString& sourceStr, UErrorC } } - // Create a TimeZone from the parsed time zone, in order to get a canonical - // ID for the time zone - const TimeZone* tz; + // If the time zone was provided, get its canonical ID, + // in order to return it in the DateInfo + UnicodeString canonicalID; if (hasTimeZone) { - if (isGMT) { - tz = TimeZone::getGMT(); - } else { - tz = createTimeZonePart(offsetPart, errorCode); + UnicodeString tzID("GMT"); + if (!isGMT) { + tzID += offsetPart; } - } else { - tz = TimeZone::createDefault(); - } - if (tz == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; + TimeZone::getCanonicalID(tzID, canonicalID, errorCode); } - if (U_FAILURE(errorCode)) { - return {}; - } - UnicodeString tzID; - - // TODO: - // Here, tz.getIanaID() should be called to normalize the time zone name. - // But it doesn't seem to work for strings with offsets, like GMT+03:30 - // tz.getIanaID(timeZoneId, timeZoneName, errorCode); - tz->getID(tzID); // Empty string for Gregorian calendar (default); // `:datetime` `calendar` option not implemented yet, // so other calendars aren't implemented - return { absoluteDate, tzID, {} }; + return { absoluteDate, canonicalID, {} }; } FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat, From 3d7803379eaa81b263caeda41cbab6dfaf8a6e57 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 18 Jun 2024 13:45:57 -0700 Subject: [PATCH 09/23] Fix doc comment --- icu4c/source/i18n/unicode/messageformat2_formattable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 56eed505461d..026531a3a483 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -144,7 +144,7 @@ namespace message2 { * @internal ICU 76 technology preview * @deprecated This API is for technology preview only. */ - TimeZone* createTimeZone(UErrorCode&) const; + TimeZone* createTimeZone(UErrorCode& status) const; }; /** From 804ab9e4151432a961836f9bd5b8f3f4b280fee5 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 18 Jun 2024 14:08:39 -0700 Subject: [PATCH 10/23] Fix MSVC compilation errors --- .../i18n/messageformat2_formattable.cpp | 1 - .../i18n/unicode/messageformat2_formattable.h | 75 +++++++++---------- 2 files changed, 37 insertions(+), 39 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 7af83b3f658c..99ad3dafdee3 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -232,7 +232,6 @@ namespace message2 { return tz; } - DateFormat* defaultDateTimeInstance(const Locale& locale, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); LocalPointer df(DateFormat::createDateTimeInstance(DateFormat::SHORT, DateFormat::SHORT, locale)); diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 026531a3a483..edd7ca167bb3 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -65,44 +65,6 @@ namespace message2 { virtual ~FormattableObject(); }; // class FormattableObject - struct DateInfo; - class Formattable; -} // namespace message2 - -U_NAMESPACE_END - -/// @cond DOXYGEN_IGNORE -// Export an explicit template instantiation of the std::variant that is used -// to represent the message2::Formattable class. -// (When building DLLs for Windows this is required.) -// (See measunit_impl.h, datefmt.h, collationiterator.h, erarules.h and others -// for similar examples.) -#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN -#if defined(U_REAL_MSVC) && defined(_MSVC_STL_VERSION) -template class U_I18N_API std::_Variant_storage_>; -#endif -typedef std::pair P; -template class U_I18N_API std::variant; -#endif -/// @endcond - -U_NAMESPACE_BEGIN - -namespace message2 { - /** * The `DateInfo` struct represents all the information needed to * format a date with a time zone. It includes an absolute date and a time zone name, @@ -147,6 +109,43 @@ namespace message2 { TimeZone* createTimeZone(UErrorCode& status) const; }; + class Formattable; +} // namespace message2 + +U_NAMESPACE_END + +/// @cond DOXYGEN_IGNORE +// Export an explicit template instantiation of the std::variant that is used +// to represent the message2::Formattable class. +// (When building DLLs for Windows this is required.) +// (See measunit_impl.h, datefmt.h, collationiterator.h, erarules.h and others +// for similar examples.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +#if defined(U_REAL_MSVC) && defined(_MSVC_STL_VERSION) +template class U_I18N_API std::_Variant_storage_>; +#endif +typedef std::pair P; +template class U_I18N_API std::variant; +#endif +/// @endcond + +U_NAMESPACE_BEGIN + +namespace message2 { + /** * The `Formattable` class represents a typed value that can be formatted, * originating either from a message argument or a literal in the code. From 1c68d511902932ccccaef4f5eb75019e9f6d7d80 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Mon, 24 Jun 2024 12:29:01 -0700 Subject: [PATCH 11/23] Make createTimeZone() a static method instead of a method on DateInfo --- .../i18n/messageformat2_formattable.cpp | 33 +---------------- .../i18n/messageformat2_function_registry.cpp | 36 +++++++++++++++++-- ...essageformat2_function_registry_internal.h | 2 +- .../i18n/unicode/messageformat2_formattable.h | 10 ------ 4 files changed, 36 insertions(+), 45 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 99ad3dafdee3..fec8272b1da6 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -10,6 +10,7 @@ #include "unicode/messageformat2_formattable.h" #include "unicode/smpdtfmt.h" #include "messageformat2_allocation.h" +#include "messageformat2_function_registry_internal.h" #include "messageformat2_macros.h" #include "limits.h" @@ -216,22 +217,6 @@ namespace message2 { return number::NumberFormatter::withLocale(locale).formatDecimal(toFormat, errorCode); } - TimeZone* DateInfo::createTimeZone(UErrorCode& errorCode) const { - NULL_ON_ERROR(errorCode); - - TimeZone* tz; - if (zoneName.length() == 0) { - // Floating time value -- use default time zone - tz = TimeZone::createDefault(); - } else { - tz = TimeZone::createTimeZone(zoneName); - } - if (tz == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return tz; - } - DateFormat* defaultDateTimeInstance(const Locale& locale, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); LocalPointer df(DateFormat::createDateTimeInstance(DateFormat::SHORT, DateFormat::SHORT, locale)); @@ -242,22 +227,6 @@ namespace message2 { return df.orphan(); } - void formatDateWithDefaults(const Locale& locale, - const DateInfo& dateInfo, - UnicodeString& result, - UErrorCode& errorCode) { - CHECK_ERROR(errorCode); - - LocalPointer df(defaultDateTimeInstance(locale, errorCode)); - CHECK_ERROR(errorCode); - - // Non-Gregorian calendars not supported yet - U_ASSERT(dateInfo.calendarName.length() == 0); - df->adoptTimeZone(dateInfo.createTimeZone(errorCode)); - CHECK_ERROR(errorCode); - df->format(dateInfo.date, result, nullptr, errorCode); - } - // Called when output is required and the contents are an unevaluated `Formattable`; // formats the source `Formattable` to a string with defaults, if it can be // formatted with a default formatter diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 4b406fb9dc43..cd4d1528618f 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -938,6 +938,22 @@ static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& err return d; } +static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + TimeZone* tz; + if (dateInfo.zoneName.length() == 0) { + // Floating time value -- use default time zone + tz = TimeZone::createDefault(); + } else { + tz = TimeZone::createTimeZone(dateInfo.zoneName); + } + if (tz == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return tz; +} + // Parse a string like [+-]nn:nn to a time zone SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); @@ -1049,6 +1065,22 @@ static DateInfo createDateInfoFromString(const UnicodeString& sourceStr, UErrorC return { absoluteDate, canonicalID, {} }; } +void formatDateWithDefaults(const Locale& locale, + const DateInfo& dateInfo, + UnicodeString& result, + UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer df(defaultDateTimeInstance(locale, errorCode)); + CHECK_ERROR(errorCode); + + // Non-Gregorian calendars not supported yet + U_ASSERT(dateInfo.calendarName.length() == 0); + df->adoptTimeZone(createTimeZone(dateInfo, errorCode)); + CHECK_ERROR(errorCode); + df->format(dateInfo.date, result, nullptr, errorCode); +} + FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat, FunctionOptions&& opts, UErrorCode& errorCode) const { @@ -1243,7 +1275,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& errorCode = U_MF_OPERAND_MISMATCH_ERROR; return {}; } - df->adoptTimeZone(dateInfo.createTimeZone(errorCode)); + df->adoptTimeZone(createTimeZone(dateInfo, errorCode)); // Use the parsed date as the source value // in the returned FormattedPlaceholder; this is necessary @@ -1257,7 +1289,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& const DateInfo* dateInfo = source.getDate(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - df->adoptTimeZone(dateInfo->createTimeZone(errorCode)); + df->adoptTimeZone(createTimeZone(*dateInfo, errorCode)); df->format(dateInfo->date, result, 0, errorCode); if (U_FAILURE(errorCode)) { if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 733fc5e945d5..8122eba7a5db 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -211,7 +211,7 @@ namespace message2 { }; }; - extern void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString&, UErrorCode& errorCode); + extern void formatDateWithDefaults(const Locale& locale, const DateInfo& date, UnicodeString&, UErrorCode& errorCode); extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, double toFormat, UErrorCode& errorCode); extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int32_t toFormat, UErrorCode& errorCode); extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int64_t toFormat, UErrorCode& errorCode); diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index edd7ca167bb3..c31953c5de3b 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -97,16 +97,6 @@ namespace message2 { * @deprecated This API is for technology preview only. */ UnicodeString calendarName; - /** - * Creates a `TimeZone` from this `DateInfo`'s time zone name. - * - * @param status Input/output error code. - * @return A TimeZone object, which the caller adopts. - * - * @internal ICU 76 technology preview - * @deprecated This API is for technology preview only. - */ - TimeZone* createTimeZone(UErrorCode& status) const; }; class Formattable; From 5d15835086908f8c31f1e737015d4b2f0e32940a Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Wed, 17 Jul 2024 13:35:12 -0700 Subject: [PATCH 12/23] Update icu4c/source/test/testdata/message2/more-functions.json Co-authored-by: Steven R. Loomis --- icu4c/source/test/testdata/message2/more-functions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icu4c/source/test/testdata/message2/more-functions.json b/icu4c/source/test/testdata/message2/more-functions.json index bfffcd343359..7178479f3652 100644 --- a/icu4c/source/test/testdata/message2/more-functions.json +++ b/icu4c/source/test/testdata/message2/more-functions.json @@ -113,7 +113,7 @@ "srcs": [".local $d = {|2024-07-02T19:23:45+03:30| :datetime timeStyle=long}\n", "{{Expires at {$d} or {$d :datetime timeStyle=full}}}"], "exp": "Expires at 7:23:45\u202FPM GMT+3:30 or 7:23:45\u202FPM GMT+03:30", - "comment": "This checks that the return value of :datetime preserves the time zone if specified" + "comment": "This checks that the return value of :datetime preserves the time zone offset if specified" }, { "src": "{$d :datetime timeStyle=full}", From 82f9ddc591347bc3805230dbca8c9cea25bd4191 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Wed, 17 Jul 2024 13:38:51 -0700 Subject: [PATCH 13/23] Add comment about meaning of calendarName --- icu4c/source/i18n/unicode/messageformat2_formattable.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index c31953c5de3b..e85d12c7a4d4 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -92,6 +92,10 @@ namespace message2 { * Calendar name. Note: non-Gregorian calendars not yet implemented. * String is empty if calendar not specified. Proleptic Gregorian calendar * is default. + * In the future, the calendar name will map to one of the options defined + * in https://cldr-smoke.unicode.org/spec/main/ldml/tr35.html#UnicodeCalendarIdentifier + * or will be consistent with the calendar names supported by JavaScript Temporal + * ( https://tc39.es/proposal-temporal/docs/calendar.html ) * * @internal ICU 76 technology preview * @deprecated This API is for technology preview only. From 34bcb51500ab8271f81c1157ba752001d2ab58d5 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 8 Aug 2024 15:08:27 -0700 Subject: [PATCH 14/23] Address review feedback --- .../i18n/messageformat2_function_registry.cpp | 82 ++++++------------- 1 file changed, 26 insertions(+), 56 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index cd4d1528618f..6d5c16bd2720 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -914,35 +914,40 @@ static UDate tryPattern(const char* pat, const UnicodeString& sourceStr, UErrorC return dateParser->parse(sourceStr, errorCode); } +// From https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#date-and-time-operands : +// "A date/time literal value is a non-empty string consisting of an ISO 8601 date, or +// an ISO 8601 datetime optionally followed by a timezone offset." static UDate tryPatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return 0; } - UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:ss", sourceStr, errorCode); - if (U_FAILURE(errorCode)) { - errorCode = U_ZERO_ERROR; - d = tryPattern("YYYY-MM-dd", sourceStr, errorCode); + // Handle ISO 8601 datetime (tryTimeZonePatterns() handles the case + // where a timezone offset follows) + if (sourceStr.length() > 10) { + return tryPattern("YYYY-MM-dd'T'HH:mm:ss", sourceStr, errorCode); } - return d; + // Handle ISO 8601 date + return tryPattern("YYYY-MM-dd", sourceStr, errorCode); } +// See comment on tryPatterns() for spec reference static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return 0; } - UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode); - if (U_FAILURE(errorCode)) { - errorCode = U_ZERO_ERROR; - d = tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode); + // 'zzzz' to handle the case where the timezone offset follows the ISO 8601 datetime + if (sourceStr.length() > 25) { + return tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode); } - return d; + // 'Z' to handle the literal 'Z' + return tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode); } static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); TimeZone* tz; - if (dateInfo.zoneName.length() == 0) { + if (dateInfo.zoneName.isEmpty()) { // Floating time value -- use default time zone tz = TimeZone::createDefault(); } else { @@ -954,43 +959,6 @@ static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) return tz; } -// Parse a string like [+-]nn:nn to a time zone -SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& errorCode) { - NULL_ON_ERROR(errorCode); - - int32_t len = offsetStr.length(); - - // `offsetStr` has a format like +03:30 or -03:30 - if (len <= 2) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - - int32_t sign = offsetStr.charAt(0) == '-' ? -1 : 1; - int32_t indexOfColon = offsetStr.indexOf(':'); - if (indexOfColon <= 2) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - // Split string into hour and minute parts and parse each part as a double - UnicodeString tzPart1 = offsetStr.tempSubString(1, indexOfColon); - UnicodeString tzPart2 = offsetStr.tempSubString(indexOfColon + 1, len); - double tzHour, tzMin; - strToDouble(tzPart1, tzHour, errorCode); - strToDouble(tzPart2, tzMin, errorCode); - if (U_FAILURE(errorCode)) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - // Create integer offset from parsed parts - int32_t offset = sign * (static_cast(tzHour) * 60 + static_cast(tzMin)) * 60 * 1000; - LocalPointer tz(new SimpleTimeZone(offset, UnicodeString("GMT") + offsetStr)); - if (!tz.isValid()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - return tz.orphan(); -} - static bool hasTzOffset(const UnicodeString& sourceStr) { int32_t len = sourceStr.length(); @@ -1075,7 +1043,7 @@ void formatDateWithDefaults(const Locale& locale, CHECK_ERROR(errorCode); // Non-Gregorian calendars not supported yet - U_ASSERT(dateInfo.calendarName.length() == 0); + U_ASSERT(dateInfo.calendarName.isEmpty()); df->adoptTimeZone(createTimeZone(dateInfo, errorCode)); CHECK_ERROR(errorCode); df->format(dateInfo.date, result, nullptr, errorCode); @@ -1287,13 +1255,15 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& } case UFMT_DATE: { const DateInfo* dateInfo = source.getDate(errorCode); - U_ASSERT(U_SUCCESS(errorCode)); - - df->adoptTimeZone(createTimeZone(*dateInfo, errorCode)); - df->format(dateInfo->date, result, 0, errorCode); - if (U_FAILURE(errorCode)) { - if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { - errorCode = U_MF_OPERAND_MISMATCH_ERROR; + if (U_SUCCESS(errorCode)) { + // If U_SUCCESS(errorCode), then source.getDate() returned + // a non-null pointer + df->adoptTimeZone(createTimeZone(*dateInfo, errorCode)); + df->format(dateInfo->date, result, 0, errorCode); + if (U_FAILURE(errorCode)) { + if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + } } } break; From 197caefa5926efa5cae1df5b082fc9b92c49be0e Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 8 Aug 2024 15:40:11 -0700 Subject: [PATCH 15/23] Add clarifying comments to DateInfo struct; rename zoneName field to zoneId --- icu4c/source/i18n/messageformat2_function_registry.cpp | 4 ++-- icu4c/source/i18n/unicode/messageformat2_formattable.h | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 6d5c16bd2720..e1b3a3c8ff53 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -947,11 +947,11 @@ static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) NULL_ON_ERROR(errorCode); TimeZone* tz; - if (dateInfo.zoneName.isEmpty()) { + if (dateInfo.zoneId.isEmpty()) { // Floating time value -- use default time zone tz = TimeZone::createDefault(); } else { - tz = TimeZone::createTimeZone(dateInfo.zoneName); + tz = TimeZone::createTimeZone(dateInfo.zoneId); } if (tz == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index e85d12c7a4d4..75dac8754068 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -75,7 +75,7 @@ namespace message2 { */ struct U_I18N_API DateInfo { /** - * Milliseconds since Unix epoch + * Date in UTC * * @internal ICU 76 technology preview * @deprecated This API is for technology preview only. @@ -83,11 +83,14 @@ namespace message2 { UDate date; /** * IANA time zone name; "UTC" if UTC; empty string if value is floating + * The time zone is required in order to format the date/time value + * (its offset is added to/subtracted from the datestamp in order to + * produce the formatted date). * * @internal ICU 76 technology preview * @deprecated This API is for technology preview only. */ - UnicodeString zoneName; + UnicodeString zoneId; /** * Calendar name. Note: non-Gregorian calendars not yet implemented. * String is empty if calendar not specified. Proleptic Gregorian calendar From 9800253a9d21071b1abf70d7256c82e36430af1a Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 8 Aug 2024 15:43:04 -0700 Subject: [PATCH 16/23] Removed calendarName field from DateInfo --- .../i18n/messageformat2_function_registry.cpp | 7 +------ .../i18n/unicode/messageformat2_formattable.h | 13 ------------- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index e1b3a3c8ff53..a95077dbf772 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -1027,10 +1027,7 @@ static DateInfo createDateInfoFromString(const UnicodeString& sourceStr, UErrorC TimeZone::getCanonicalID(tzID, canonicalID, errorCode); } - // Empty string for Gregorian calendar (default); - // `:datetime` `calendar` option not implemented yet, - // so other calendars aren't implemented - return { absoluteDate, canonicalID, {} }; + return { absoluteDate, canonicalID }; } void formatDateWithDefaults(const Locale& locale, @@ -1042,8 +1039,6 @@ void formatDateWithDefaults(const Locale& locale, LocalPointer df(defaultDateTimeInstance(locale, errorCode)); CHECK_ERROR(errorCode); - // Non-Gregorian calendars not supported yet - U_ASSERT(dateInfo.calendarName.isEmpty()); df->adoptTimeZone(createTimeZone(dateInfo, errorCode)); CHECK_ERROR(errorCode); df->format(dateInfo.date, result, nullptr, errorCode); diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 75dac8754068..cf6c7cc0d7c9 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -91,19 +91,6 @@ namespace message2 { * @deprecated This API is for technology preview only. */ UnicodeString zoneId; - /** - * Calendar name. Note: non-Gregorian calendars not yet implemented. - * String is empty if calendar not specified. Proleptic Gregorian calendar - * is default. - * In the future, the calendar name will map to one of the options defined - * in https://cldr-smoke.unicode.org/spec/main/ldml/tr35.html#UnicodeCalendarIdentifier - * or will be consistent with the calendar names supported by JavaScript Temporal - * ( https://tc39.es/proposal-temporal/docs/calendar.html ) - * - * @internal ICU 76 technology preview - * @deprecated This API is for technology preview only. - */ - UnicodeString calendarName; }; class Formattable; From 2ecf406b989ad8b5aa3dd70e45307f13f1389cb2 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 8 Aug 2024 15:56:30 -0700 Subject: [PATCH 17/23] Remove calendar name harder --- icu4c/source/test/intltest/messageformat2test.cpp | 3 +-- icu4c/source/test/intltest/messageformat2test_utils.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index 88d0376a2bec..afdb0a40cff6 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -97,8 +97,7 @@ void TestMessageFormat2::testAPISimple() { argsBuilder.clear(); DateInfo dateInfo = { cal.getTime(errorCode), - "Pacific Standard Time", - "" }; + "Pacific Standard Time" }; argsBuilder["today"] = message2::Formattable(std::move(dateInfo)); args = MessageArguments(argsBuilder, errorCode); result = mf.formatToString(args, errorCode); diff --git a/icu4c/source/test/intltest/messageformat2test_utils.h b/icu4c/source/test/intltest/messageformat2test_utils.h index cda9c57449be..6a487cd8acda 100644 --- a/icu4c/source/test/intltest/messageformat2test_utils.h +++ b/icu4c/source/test/intltest/messageformat2test_utils.h @@ -111,7 +111,7 @@ class TestCase : public UMemory { // as a datestamp, so this code suffices to handle those. // Date/time literal strings would be handled using `setArgument()` with a string // argument. - DateInfo dateInfo = { date, {}, {} }; // No time zone or calendar name + DateInfo dateInfo = { date, {} }; // No time zone or calendar name arguments[k] = Formattable(std::move(dateInfo)); return *this; } From c63c6b52832c7251b93636ff33ca54d8161b271c Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Thu, 8 Aug 2024 15:57:40 -0700 Subject: [PATCH 18/23] Detect 'Z' literal correctly; add test case --- .../source/i18n/messageformat2_function_registry.cpp | 11 ++++++----- .../source/test/testdata/message2/more-functions.json | 8 +++++++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index a95077dbf772..7a69ae46849e 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -935,12 +935,13 @@ static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& err if (U_FAILURE(errorCode)) { return 0; } - // 'zzzz' to handle the case where the timezone offset follows the ISO 8601 datetime - if (sourceStr.length() > 25) { - return tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode); + int32_t len = sourceStr.length(); + if (len > 0 && sourceStr[len] == 'Z') { + // 'Z' to handle the literal 'Z' + return tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode); } - // 'Z' to handle the literal 'Z' - return tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode); + // 'zzzz' to handle the case where the timezone offset follows the ISO 8601 datetime + return tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode); } static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) { diff --git a/icu4c/source/test/testdata/message2/more-functions.json b/icu4c/source/test/testdata/message2/more-functions.json index 7178479f3652..6256e22deec3 100644 --- a/icu4c/source/test/testdata/message2/more-functions.json +++ b/icu4c/source/test/testdata/message2/more-functions.json @@ -108,7 +108,13 @@ "params": {"val": {"decimal": "1234567890123456789.987654321"}} } ], - "datetime_extra": [ + "datetime_extra": [ + { + "srcs": [".local $d = {|2024-07-02T19:23:45Z| :datetime timeStyle=long}\n", + "{{Expires at {$d} or {$d :datetime timeStyle=full}}}"], + "exp": "Expires at 7:23:45\u202FPM GMT or 7:23:45\u202FPM Greenwich Mean Time", + "comment": "This checks that the return value of :datetime preserves the time zone offset if specified" + }, { "srcs": [".local $d = {|2024-07-02T19:23:45+03:30| :datetime timeStyle=long}\n", "{{Expires at {$d} or {$d :datetime timeStyle=full}}}"], From 8d4da423992d90520b3e52043f8f317dbb2c3993 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 13 Aug 2024 13:24:27 -0700 Subject: [PATCH 19/23] Cache DateFormat objects in the DateTimeFactory object and lazily initialize them --- .../i18n/messageformat2_function_registry.cpp | 53 +++++++++++++------ ...essageformat2_function_registry_internal.h | 20 ++++++- .../testdata/message2/more-functions.json | 4 ++ 3 files changed, 61 insertions(+), 16 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 7a69ae46849e..e3f5cd7106b6 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -17,6 +17,7 @@ #include "messageformat2_function_registry_internal.h" #include "messageformat2_macros.h" #include "hash.h" +#include "mutex.h" #include "number_types.h" #include "uvector.h" // U_ASSERT @@ -899,49 +900,64 @@ static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorC Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - Formatter* result = new StandardFunctions::DateTime(locale, type); + Formatter* result = new StandardFunctions::DateTime(locale, *this, type); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return result; } -static UDate tryPattern(const char* pat, const UnicodeString& sourceStr, UErrorCode& errorCode) { - LocalPointer dateParser(new SimpleDateFormat(UnicodeString(pat), errorCode)); - if (U_FAILURE(errorCode)) { - return 0; +static UMutex gDateTimeFactoryMutex; + +// Lazily initialize DateFormat objects used for parsing date literals +void StandardFunctions::DateTimeFactory::initDateParsers(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + Mutex lock(&gDateTimeFactoryMutex); + + if (dateParser.isValid()) { + // Already initialized + return; } - return dateParser->parse(sourceStr, errorCode); + + // Handles ISO 8601 date + dateParser.adoptInstead(new SimpleDateFormat(UnicodeString("YYYY-MM-dd"), errorCode)); + // Handles ISO 8601 datetime without time zone + dateTimeParser.adoptInstead(new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ss"), errorCode)); + // Handles ISO 8601 datetime with 'Z' to denote UTC + dateTimeUTCParser.adoptInstead(new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ssZ"), errorCode)); + // Handles ISO 8601 datetime with timezone offset; 'zzzz' denotes timezone offset + dateTimeZoneParser.adoptInstead(new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:sszzzz"), errorCode)); } // From https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#date-and-time-operands : // "A date/time literal value is a non-empty string consisting of an ISO 8601 date, or // an ISO 8601 datetime optionally followed by a timezone offset." -static UDate tryPatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) { +UDate StandardFunctions::DateTime::tryPatterns(const UnicodeString& sourceStr, + UErrorCode& errorCode) const { if (U_FAILURE(errorCode)) { return 0; } // Handle ISO 8601 datetime (tryTimeZonePatterns() handles the case // where a timezone offset follows) if (sourceStr.length() > 10) { - return tryPattern("YYYY-MM-dd'T'HH:mm:ss", sourceStr, errorCode); + return parent.dateTimeParser->parse(sourceStr, errorCode); } // Handle ISO 8601 date - return tryPattern("YYYY-MM-dd", sourceStr, errorCode); + return parent.dateParser->parse(sourceStr, errorCode); } // See comment on tryPatterns() for spec reference -static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) { +UDate StandardFunctions::DateTime::tryTimeZonePatterns(const UnicodeString& sourceStr, + UErrorCode& errorCode) const { if (U_FAILURE(errorCode)) { return 0; } int32_t len = sourceStr.length(); if (len > 0 && sourceStr[len] == 'Z') { - // 'Z' to handle the literal 'Z' - return tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode); + return parent.dateTimeUTCParser->parse(sourceStr, errorCode); } - // 'zzzz' to handle the case where the timezone offset follows the ISO 8601 datetime - return tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode); + return parent.dateTimeZoneParser->parse(sourceStr, errorCode); } static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) { @@ -972,7 +988,8 @@ static bool hasTzOffset(const UnicodeString& sourceStr) { // Note: `calendar` option to :datetime not implemented yet; // Gregorian calendar is assumed -static DateInfo createDateInfoFromString(const UnicodeString& sourceStr, UErrorCode& errorCode) { +DateInfo StandardFunctions::DateTime::createDateInfoFromString(const UnicodeString& sourceStr, + UErrorCode& errorCode) const { if (U_FAILURE(errorCode)) { return {}; } @@ -1231,6 +1248,12 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& const Formattable& source = toFormat.asFormattable(); switch (source.getType()) { case UFMT_STRING: { + // Lazily initialize date parsers used for parsing date literals + parent.initDateParsers(errorCode); + if (U_FAILURE(errorCode)) { + return {}; + } + const UnicodeString& sourceStr = source.getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 8122eba7a5db..c117329b8c7e 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -57,6 +57,15 @@ namespace message2 { DateTimeType type; DateTimeFactory(DateTimeType t) : type(t) {} + + // Lazily initialize cached date parsers + void initDateParsers(UErrorCode&); + + // Cached parsers; lazily initialized + LocalPointer dateParser; + LocalPointer dateTimeParser; + LocalPointer dateTimeUTCParser; + LocalPointer dateTimeZoneParser; }; class DateTime : public Formatter { @@ -68,9 +77,18 @@ namespace message2 { const Locale& locale; const DateTimeFactory::DateTimeType type; friend class DateTimeFactory; - DateTime(const Locale& l, DateTimeFactory::DateTimeType t) : locale(l), type(t) {} + DateTime(const Locale& l, DateTimeFactory& p, DateTimeFactory::DateTimeType t) + : locale(l), type(t), parent(p) {} const LocalPointer icuFormatter; + // Stores the cached DateFormat objects for parsing date literals + DateTimeFactory& parent; + + // Methods for parsing date literals + UDate tryPatterns(const UnicodeString&, UErrorCode&) const; + UDate tryTimeZonePatterns(const UnicodeString&, UErrorCode&) const; + DateInfo createDateInfoFromString(const UnicodeString&, UErrorCode&) const; + /* Looks up an option by name, first checking `opts`, then the cached options in `toFormat` if applicable, and finally using a default diff --git a/icu4c/source/test/testdata/message2/more-functions.json b/icu4c/source/test/testdata/message2/more-functions.json index 6256e22deec3..f034d04d51cc 100644 --- a/icu4c/source/test/testdata/message2/more-functions.json +++ b/icu4c/source/test/testdata/message2/more-functions.json @@ -126,6 +126,10 @@ "exp": "7:23:45\u202FPM GMT+03:30", "params": {"d": "2024-07-02T19:23:45+03:30"}, "comment": "This checks that an argument can be a date literal string." + }, + { + "src": "{|2006-01-02T15:04:06| :datetime dateStyle=long} / {|2006-01-03| :datetime dateStyle=medium}", + "exp": "January 2, 2006 / Jan 3, 2006" } ] } From 4a7da29c58c703ab5fe6a08c3357f3da98559dd4 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 13 Aug 2024 13:24:36 -0700 Subject: [PATCH 20/23] Add comment on hasTzOffset() --- icu4c/source/i18n/messageformat2_function_registry.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index e3f5cd7106b6..ea4f33e74555 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -976,6 +976,10 @@ static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) return tz; } +// Returns true iff `sourceStr` ends in an offset like +03:30 or -06:00 +// (This function is just used to determine whether to call tryPatterns() +// or tryTimeZonePatterns(); tryTimeZonePatterns() checks fully that the +// string matches the expected format) static bool hasTzOffset(const UnicodeString& sourceStr) { int32_t len = sourceStr.length(); From da9307cc1b85da5ce0a39e98f7f5ea468f624216 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Tue, 10 Sep 2024 19:04:01 -0700 Subject: [PATCH 21/23] Share DateFormat objects across threads --- .../i18n/messageformat2_function_registry.cpp | 71 +++++++++++++------ ...essageformat2_function_registry_internal.h | 16 +---- icu4c/source/i18n/ucln_in.h | 1 + 3 files changed, 54 insertions(+), 34 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index ea4f33e74555..f2545c1a0456 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -19,6 +19,7 @@ #include "hash.h" #include "mutex.h" #include "number_types.h" +#include "ucln_in.h" #include "uvector.h" // U_ASSERT #include @@ -900,34 +901,64 @@ static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorC Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { NULL_ON_ERROR(errorCode); - Formatter* result = new StandardFunctions::DateTime(locale, *this, type); + Formatter* result = new StandardFunctions::DateTime(locale, type); if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } return result; } -static UMutex gDateTimeFactoryMutex; +// DateFormat parsers that are shared across threads +static DateFormat* dateParser = nullptr; +static DateFormat* dateTimeParser = nullptr; +static DateFormat* dateTimeUTCParser = nullptr; +static DateFormat* dateTimeZoneParser = nullptr; +static icu::UInitOnce gMF2DateParsersInitOnce {}; -// Lazily initialize DateFormat objects used for parsing date literals -void StandardFunctions::DateTimeFactory::initDateParsers(UErrorCode& errorCode) { - CHECK_ERROR(errorCode); - - Mutex lock(&gDateTimeFactoryMutex); - - if (dateParser.isValid()) { - // Already initialized - return; +// Clean up shared DateFormat objects +static UBool mf2_date_parsers_cleanup() { + if (dateParser != nullptr) { + delete dateParser; + dateParser = nullptr; + } + if (dateTimeParser != nullptr) { + delete dateTimeParser; + dateTimeParser = nullptr; + } + if (dateTimeUTCParser != nullptr) { + delete dateTimeUTCParser; + dateTimeUTCParser = nullptr; + } + if (dateTimeZoneParser != nullptr) { + delete dateTimeZoneParser; + dateTimeZoneParser = nullptr; } + return true; +} + +// Initialize DateFormat objects used for parsing date literals +static void initDateParsersOnce(UErrorCode& errorCode) { + U_ASSERT(dateParser == nullptr); + U_ASSERT(dateTimeParser == nullptr); + U_ASSERT(dateTimeUTCParser == nullptr); + U_ASSERT(dateTimeZoneParser == nullptr); // Handles ISO 8601 date - dateParser.adoptInstead(new SimpleDateFormat(UnicodeString("YYYY-MM-dd"), errorCode)); + dateParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd"), errorCode); // Handles ISO 8601 datetime without time zone - dateTimeParser.adoptInstead(new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ss"), errorCode)); + dateTimeParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ss"), errorCode); // Handles ISO 8601 datetime with 'Z' to denote UTC - dateTimeUTCParser.adoptInstead(new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ssZ"), errorCode)); + dateTimeUTCParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ssZ"), errorCode); // Handles ISO 8601 datetime with timezone offset; 'zzzz' denotes timezone offset - dateTimeZoneParser.adoptInstead(new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:sszzzz"), errorCode)); + dateTimeZoneParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:sszzzz"), errorCode); + ucln_i18n_registerCleanup(UCLN_I18N_MF2_DATE_PARSERS, mf2_date_parsers_cleanup); +} + +// Lazily initialize DateFormat objects used for parsing date literals +static void initDateParsers(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + umtx_initOnce(gMF2DateParsersInitOnce, &initDateParsersOnce, errorCode); } // From https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#date-and-time-operands : @@ -941,10 +972,10 @@ UDate StandardFunctions::DateTime::tryPatterns(const UnicodeString& sourceStr, // Handle ISO 8601 datetime (tryTimeZonePatterns() handles the case // where a timezone offset follows) if (sourceStr.length() > 10) { - return parent.dateTimeParser->parse(sourceStr, errorCode); + return dateTimeParser->parse(sourceStr, errorCode); } // Handle ISO 8601 date - return parent.dateParser->parse(sourceStr, errorCode); + return dateParser->parse(sourceStr, errorCode); } // See comment on tryPatterns() for spec reference @@ -955,9 +986,9 @@ UDate StandardFunctions::DateTime::tryTimeZonePatterns(const UnicodeString& sour } int32_t len = sourceStr.length(); if (len > 0 && sourceStr[len] == 'Z') { - return parent.dateTimeUTCParser->parse(sourceStr, errorCode); + return dateTimeUTCParser->parse(sourceStr, errorCode); } - return parent.dateTimeZoneParser->parse(sourceStr, errorCode); + return dateTimeZoneParser->parse(sourceStr, errorCode); } static TimeZone* createTimeZone(const DateInfo& dateInfo, UErrorCode& errorCode) { @@ -1253,7 +1284,7 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& switch (source.getType()) { case UFMT_STRING: { // Lazily initialize date parsers used for parsing date literals - parent.initDateParsers(errorCode); + initDateParsers(errorCode); if (U_FAILURE(errorCode)) { return {}; } diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index c117329b8c7e..00229d0e4ef1 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -57,15 +57,6 @@ namespace message2 { DateTimeType type; DateTimeFactory(DateTimeType t) : type(t) {} - - // Lazily initialize cached date parsers - void initDateParsers(UErrorCode&); - - // Cached parsers; lazily initialized - LocalPointer dateParser; - LocalPointer dateTimeParser; - LocalPointer dateTimeUTCParser; - LocalPointer dateTimeZoneParser; }; class DateTime : public Formatter { @@ -77,13 +68,10 @@ namespace message2 { const Locale& locale; const DateTimeFactory::DateTimeType type; friend class DateTimeFactory; - DateTime(const Locale& l, DateTimeFactory& p, DateTimeFactory::DateTimeType t) - : locale(l), type(t), parent(p) {} + DateTime(const Locale& l, DateTimeFactory::DateTimeType t) + : locale(l), type(t) {} const LocalPointer icuFormatter; - // Stores the cached DateFormat objects for parsing date literals - DateTimeFactory& parent; - // Methods for parsing date literals UDate tryPatterns(const UnicodeString&, UErrorCode&) const; UDate tryTimeZonePatterns(const UnicodeString&, UErrorCode&) const; diff --git a/icu4c/source/i18n/ucln_in.h b/icu4c/source/i18n/ucln_in.h index 765cdd559fb4..c6bd47e09773 100644 --- a/icu4c/source/i18n/ucln_in.h +++ b/icu4c/source/i18n/ucln_in.h @@ -62,6 +62,7 @@ typedef enum ECleanupI18NType { UCLN_I18N_REGION, UCLN_I18N_LIST_FORMATTER, UCLN_I18N_NUMSYS, + UCLN_I18N_MF2_DATE_PARSERS, UCLN_I18N_COUNT /* This must be last */ } ECleanupI18NType; From 1b8e43f91e681f1dabbbdb5a2ca1e294959523df Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Wed, 11 Sep 2024 12:41:52 -0700 Subject: [PATCH 22/23] Fix use-after-free in TimeZoneDelegate destructor --- icu4c/source/i18n/tznames.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/icu4c/source/i18n/tznames.cpp b/icu4c/source/i18n/tznames.cpp index 24ca161e88c8..3e6e64a5477c 100644 --- a/icu4c/source/i18n/tznames.cpp +++ b/icu4c/source/i18n/tznames.cpp @@ -72,8 +72,12 @@ static UBool U_CALLCONV timeZoneNames_cleanup() static void U_CALLCONV deleteTimeZoneNamesCacheEntry(void *obj) { icu::TimeZoneNamesCacheEntry *entry = (icu::TimeZoneNamesCacheEntry*)obj; - delete (icu::TimeZoneNamesImpl*) entry->names; - uprv_free(entry); + if (entry->refCount <= 1) { + delete (icu::TimeZoneNamesImpl*) entry->names; + uprv_free(entry); + } else { + entry->refCount--; + } } U_CDECL_END @@ -175,7 +179,9 @@ TimeZoneNamesDelegate::TimeZoneNamesDelegate(const Locale& locale, UErrorCode& s status = U_MEMORY_ALLOCATION_ERROR; } else { cacheEntry->names = tznames; - cacheEntry->refCount = 1; + // The initial refCount is 2 because the entry is referenced both + // by this TimeZoneDelegate and by the gTimeZoneNamesCache + cacheEntry->refCount = 2; cacheEntry->lastAccess = (double)uprv_getUTCtime(); uhash_put(gTimeZoneNamesCache, newKey, cacheEntry, &status); @@ -209,9 +215,13 @@ TimeZoneNamesDelegate::~TimeZoneNamesDelegate() { umtx_lock(&gTimeZoneNamesLock); { if (fTZnamesCacheEntry) { - U_ASSERT(fTZnamesCacheEntry->refCount > 0); - // Just decrement the reference count - fTZnamesCacheEntry->refCount--; + if (fTZnamesCacheEntry->refCount <= 1) { + delete fTZnamesCacheEntry->names; + uprv_free(fTZnamesCacheEntry); + } else { + // Just decrement the reference count + fTZnamesCacheEntry->refCount--; + } } } umtx_unlock(&gTimeZoneNamesLock); From 087982055246942fccab09adbe8a15e3676ad51a Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 6 Dec 2024 18:37:46 -0800 Subject: [PATCH 23/23] Add null checks --- icu4c/source/i18n/messageformat2_function_registry.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index f2545c1a0456..ff37abe588fe 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -951,6 +951,11 @@ static void initDateParsersOnce(UErrorCode& errorCode) { dateTimeUTCParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:ssZ"), errorCode); // Handles ISO 8601 datetime with timezone offset; 'zzzz' denotes timezone offset dateTimeZoneParser = new SimpleDateFormat(UnicodeString("YYYY-MM-dd'T'HH:mm:sszzzz"), errorCode); + + if (!dateParser || !dateTimeParser || !dateTimeUTCParser || !dateTimeZoneParser) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } ucln_i18n_registerCleanup(UCLN_I18N_MF2_DATE_PARSERS, mf2_date_parsers_cleanup); }