From ca6d29d5727a50422c249f1ed1995c33c7c40d0f Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Mon, 29 Apr 2024 14:15:31 -0700 Subject: [PATCH] ICU-22763 MF2: Handle time zones correctly in :datetime Previously, the time zone components of date/time literals were ignored. In order to store the time zone properly for re-formatting, this change also changes the representation of `Formattable` date values to use a `GregorianCalendar` rather than a `UDate`. (This is a public API change.) --- .../i18n/messageformat2_formattable.cpp | 21 +- .../i18n/messageformat2_function_registry.cpp | 179 +++++++++++++++--- .../i18n/unicode/messageformat2_formattable.h | 34 ++-- .../test/intltest/messageformat2test.cpp | 11 +- .../intltest/messageformat2test_read_json.cpp | 2 +- .../test/intltest/messageformat2test_utils.h | 12 +- .../message2/icu4j/icu-test-functions.json | 6 +- .../testdata/message2/more-functions.json | 16 +- 8 files changed, 215 insertions(+), 66 deletions(-) diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 3152ccb44fd8..0296a23684ad 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -35,7 +35,6 @@ namespace message2 { Formattable::Formattable(const Formattable& other) { contents = other.contents; - holdsDate = other.holdsDate; } Formattable Formattable::forDecimal(std::string_view number, UErrorCode &status) { @@ -53,7 +52,7 @@ namespace message2 { UFormattableType Formattable::getType() const { if (std::holds_alternative(contents)) { - return holdsDate ? UFMT_DATE : UFMT_DOUBLE; + return UFMT_DOUBLE; } if (std::holds_alternative(contents)) { return UFMT_INT64; @@ -74,6 +73,9 @@ namespace message2 { } } } + if (isDate()) { + return UFMT_DATE; + } if (std::holds_alternative(contents)) { return UFMT_OBJECT; } @@ -223,12 +225,19 @@ namespace message2 { return df.orphan(); } - void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString& result, UErrorCode& errorCode) { + void formatDateWithDefaults(const Locale& locale, const GregorianCalendar& cal, UnicodeString& result, UErrorCode& errorCode) { CHECK_ERROR(errorCode); LocalPointer df(defaultDateTimeInstance(locale, errorCode)); CHECK_ERROR(errorCode); - df->format(date, result, 0, errorCode); + // We have to copy the `const` reference that was passed in, + // because DateFormat::format() takes a non-const reference. + LocalPointer copied(new GregorianCalendar(cal)); + if (!copied.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + df->format(*copied, result, nullptr, errorCode); } // Called when output is required and the contents are an unevaluated `Formattable`; @@ -259,9 +268,9 @@ namespace message2 { switch (type) { case UFMT_DATE: { UnicodeString result; - UDate d = toFormat.getDate(status); + const GregorianCalendar* cal = toFormat.getDate(status); U_ASSERT(U_SUCCESS(status)); - formatDateWithDefaults(locale, d, result, status); + formatDateWithDefaults(locale, *cal, result, status); return FormattedPlaceholder(input, FormattedValue(std::move(result))); } case UFMT_DOUBLE: { diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 7bf5e2d82c36..68319274fed4 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -10,6 +10,7 @@ #include "unicode/dtptngen.h" #include "unicode/messageformat2_data_model_names.h" #include "unicode/messageformat2_function_registry.h" +#include "unicode/simpletz.h" #include "unicode/smpdtfmt.h" #include "charstr.h" #include "messageformat2_allocation.h" @@ -905,6 +906,135 @@ Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& loc return result; } +static UDate tryPattern(const char* pat, const UnicodeString& sourceStr, UErrorCode& errorCode) { + LocalPointer dateParser(new SimpleDateFormat(UnicodeString(pat), errorCode)); + if (U_FAILURE(errorCode)) { + return 0; + } + return dateParser->parse(sourceStr, errorCode); +} + +static UDate tryPatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return 0; + } + UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:ss", sourceStr, errorCode); + if (U_FAILURE(errorCode)) { + errorCode = U_ZERO_ERROR; + d = tryPattern("YYYY-MM-dd", sourceStr, errorCode); + } + return d; +} + +static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return 0; + } + UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode); + if (U_FAILURE(errorCode)) { + errorCode = U_ZERO_ERROR; + d = tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode); + } + return d; +} + +SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + int32_t len = offsetStr.length(); + + // `offsetStr` has a format like +03:30 or -03:30 + if (len <= 2) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + + int32_t sign = offsetStr.charAt(0) == '-' ? -1 : 1; + int32_t indexOfColon = offsetStr.indexOf(':'); + if (indexOfColon <= 2) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + UnicodeString tzPart1 = offsetStr.tempSubString(1, indexOfColon); + UnicodeString tzPart2 = offsetStr.tempSubString(indexOfColon + 1, len); + double tzHour, tzMin; + strToDouble(tzPart1, tzHour, errorCode); + strToDouble(tzPart2, tzMin, errorCode); + if (U_FAILURE(errorCode)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + int32_t offset = sign * (static_cast(tzHour) * 60 + static_cast(tzMin)) * 60 * 1000; + LocalPointer tz(new SimpleTimeZone(offset, UnicodeString("offset"))); + if (!tz.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return tz.orphan(); +} + +static bool hasTzOffset(const UnicodeString& sourceStr) { + int32_t len = sourceStr.length(); + + if (len <= 6) { + return false; + } + return ((sourceStr[len - 6] == PLUS || sourceStr[len - 6] == HYPHEN) + && sourceStr[len - 3] == COLON); +} + +static GregorianCalendar* createCalendarFromDateString(const UnicodeString& sourceStr, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + UDate absoluteDate; + + // Try time zone part + int32_t indexOfZ = sourceStr.indexOf('Z'); + int32_t indexOfPlus = sourceStr.lastIndexOf('+'); + int32_t indexOfMinus = sourceStr.lastIndexOf('-'); + int32_t indexOfSign = indexOfPlus > -1 ? indexOfPlus : indexOfMinus; + bool isTzOffset = hasTzOffset(sourceStr); + bool isGMT = indexOfZ > 0; + UnicodeString offsetPart; + bool hasTimeZone = isTzOffset || isGMT; + if (!hasTimeZone) { + // No time zone + absoluteDate = tryPatterns(sourceStr, errorCode); + } else { + // Try to split into time zone and non-time-zone parts + UnicodeString noTimeZone; + if (isGMT) { + noTimeZone = sourceStr.tempSubString(0, indexOfZ); + } else { + noTimeZone = sourceStr.tempSubString(0, indexOfSign); + } + tryPatterns(noTimeZone, errorCode); + // Failure -- can't parse this string + NULL_ON_ERROR(errorCode); + // Success -- now handle the time zone part + if (isGMT) { + noTimeZone += UnicodeString("GMT"); + absoluteDate = tryTimeZonePatterns(noTimeZone, errorCode); + } else { + // Finally, try [+-]nn:nn + absoluteDate = tryTimeZonePatterns(sourceStr, errorCode); + offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length()); + } + } + LocalPointer cal(new GregorianCalendar(errorCode)); + NULL_ON_ERROR(errorCode); + cal->setTime(absoluteDate, errorCode); + if (hasTimeZone) { + if (isGMT) { + cal->setTimeZone(*TimeZone::getGMT()); + } else { + LocalPointer tz(createTimeZonePart(offsetPart, errorCode)); + NULL_ON_ERROR(errorCode); + cal->adoptTimeZone(tz.orphan()); + } + } + return cal.orphan(); +} + FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat, FunctionOptions&& opts, UErrorCode& errorCode) const { @@ -1093,39 +1223,32 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& case UFMT_STRING: { const UnicodeString& sourceStr = source.getString(errorCode); U_ASSERT(U_SUCCESS(errorCode)); - // Pattern for ISO 8601 format - datetime - UnicodeString pattern("YYYY-MM-dd'T'HH:mm:ss"); - LocalPointer dateParser(new SimpleDateFormat(pattern, errorCode)); + + LocalPointer cal(createCalendarFromDateString(sourceStr, errorCode)); + if (U_FAILURE(errorCode)) { - errorCode = U_MF_FORMATTING_ERROR; - } else { - // Parse the date - UDate d = dateParser->parse(sourceStr, errorCode); - if (U_FAILURE(errorCode)) { - // Pattern for ISO 8601 format - date - UnicodeString pattern("YYYY-MM-dd"); - errorCode = U_ZERO_ERROR; - dateParser.adoptInstead(new SimpleDateFormat(pattern, errorCode)); - if (U_FAILURE(errorCode)) { - errorCode = U_MF_FORMATTING_ERROR; - } else { - d = dateParser->parse(sourceStr, errorCode); - if (U_FAILURE(errorCode)) { - errorCode = U_MF_OPERAND_MISMATCH_ERROR; - } - } - } - // Use the parsed date as the source value - // in the returned FormattedPlaceholder; this is necessary - // so the date can be re-formatted - toFormat = FormattedPlaceholder(message2::Formattable::forDate(d), - toFormat.getFallback()); - df->format(d, result, 0, errorCode); + errorCode = U_MF_OPERAND_MISMATCH_ERROR; + return {}; } + + // Use the parsed date as the source value + // in the returned FormattedPlaceholder; this is necessary + // so the date can be re-formatted + df->format(*cal, result, 0, errorCode); + toFormat = FormattedPlaceholder(message2::Formattable(std::move(*cal)), + toFormat.getFallback()); break; } case UFMT_DATE: { - df->format(source.asICUFormattable(errorCode), result, 0, errorCode); + const GregorianCalendar* cal = source.getDate(errorCode); + U_ASSERT(U_SUCCESS(errorCode)); + U_ASSERT(cal != nullptr); + LocalPointer copied(new GregorianCalendar(*cal)); + if (!copied.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return {}; + } + df->format(*copied, result, 0, errorCode); if (U_FAILURE(errorCode)) { if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) { errorCode = U_MF_OPERAND_MISMATCH_ERROR; diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 8a779adb9ab3..b329ed9a0984 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -13,8 +13,10 @@ #if !UCONFIG_NO_MF2 #include "unicode/chariter.h" +#include "unicode/gregocal.h" #include "unicode/numberformatter.h" #include "unicode/messageformat2_data_model_names.h" +#include "unicode/smpdtfmt.h" #ifndef U_HIDE_DEPRECATED_API @@ -226,22 +228,24 @@ namespace message2 { } /** - * Gets the Date value of this object. If this object is not of type - * kDate then the result is undefined and the error code is set. + * Gets the calendar representing the date value of this object. + * If this object is not of type kDate then the result is + * undefined and the error code is set. * * @param status Input/output error code. - * @return the Date value of this object. + * @return A non-owned pointer to a GregorianCalendar object + * representing the underlying date of this object. * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - UDate getDate(UErrorCode& status) const { + const GregorianCalendar* getDate(UErrorCode& status) const { if (U_SUCCESS(status)) { if (isDate()) { - return *std::get_if(&contents); + return std::get_if(&contents); } status = U_ILLEGAL_ARGUMENT_ERROR; } - return 0; + return nullptr; } /** @@ -299,7 +303,6 @@ namespace message2 { using std::swap; swap(f1.contents, f2.contents); - swap(f1.holdsDate, f2.holdsDate); } /** * Copy constructor. @@ -351,18 +354,15 @@ namespace message2 { */ Formattable(int64_t i) : contents(i) {} /** - * Date factory method. + * Date constructor. * - * @param d A UDate value to wrap as a Formattable. + * @param c A GregorianCalendar value representing a date, + * to wrap as a Formattable. + * Passed by move * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ - static Formattable forDate(UDate d) { - Formattable f; - f.contents = d; - f.holdsDate = true; - return f; - } + Formattable(GregorianCalendar&& c) : contents(std::move(c)) {} /** * Creates a Formattable object of an appropriate numeric type from a * a decimal number in string form. The Formattable will retain the @@ -422,16 +422,16 @@ namespace message2 { int64_t, UnicodeString, icu::Formattable, // represents a Decimal + GregorianCalendar, const FormattableObject*, std::pair> contents; - bool holdsDate = false; // otherwise, we get type errors about UDate being a duplicate type UnicodeString bogusString; // :(((( UBool isDecimal() const { return std::holds_alternative(contents); } UBool isDate() const { - return std::holds_alternative(contents) && holdsDate; + return std::holds_alternative(contents); } }; // class Formattable diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index a7137d8493a0..a41b8fbb659c 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -91,13 +91,12 @@ void TestMessageFormat2::testAPISimple() { .setLocale(locale) .build(errorCode); - Calendar* cal(Calendar::createInstance(errorCode)); + GregorianCalendar cal(errorCode); // Sunday, October 28, 2136 8:39:12 AM PST - cal->set(2136, Calendar::OCTOBER, 28, 8, 39, 12); - UDate date = cal->getTime(errorCode); + cal.set(2136, Calendar::OCTOBER, 28, 8, 39, 12); argsBuilder.clear(); - argsBuilder["today"] = message2::Formattable::forDate(date); + argsBuilder["today"] = message2::Formattable(std::move(cal)); args = MessageArguments(argsBuilder, errorCode); result = mf.formatToString(args, errorCode); assertEquals("testAPI", "Today is Sunday, October 28, 2136.", result); @@ -119,8 +118,6 @@ void TestMessageFormat2::testAPISimple() { .build(errorCode); result = mf.formatToString(args, errorCode); assertEquals("testAPI", "Maria added 12 photos to her album.", result); - - delete cal; } // Design doc example, with more details @@ -145,7 +142,7 @@ void TestMessageFormat2::testAPI() { test = testBuilder.setName("testAPI") .setPattern("Today is {$today :date style=full}.") - .setDateArgument("today", date) + .setDateArgument("today", date, errorCode) .setExpected("Today is Sunday, October 28, 2136.") .setLocale("en_US") .build(); diff --git a/icu4c/source/test/intltest/messageformat2test_read_json.cpp b/icu4c/source/test/intltest/messageformat2test_read_json.cpp index b52b3c48d4be..25b7bb68a416 100644 --- a/icu4c/source/test/intltest/messageformat2test_read_json.cpp +++ b/icu4c/source/test/intltest/messageformat2test_read_json.cpp @@ -100,7 +100,7 @@ static bool setArguments(TestCase::Builder& test, const json::object_t& params, // from number values auto obj = argsIter->second.template get(); if (obj["date"].is_number()) { - test.setDateArgument(argName, obj["date"]); + test.setDateArgument(argName, obj["date"], errorCode); } else if (obj["decimal"].is_string()) { // Decimal strings: represent in tests as { "decimal" : string }, // to distinguish from string values diff --git a/icu4c/source/test/intltest/messageformat2test_utils.h b/icu4c/source/test/intltest/messageformat2test_utils.h index 08cdd57bfa72..7a6248c7a5dc 100644 --- a/icu4c/source/test/intltest/messageformat2test_utils.h +++ b/icu4c/source/test/intltest/messageformat2test_utils.h @@ -106,8 +106,16 @@ class TestCase : public UMemory { arguments[k] = Formattable(val); return *this; } - Builder& setDateArgument(const UnicodeString& k, UDate date) { - arguments[k] = Formattable::forDate(date); + Builder& setDateArgument(const UnicodeString& k, UDate date, UErrorCode& errorCode) { + // This ignores time zones; the data-driven tests represent date/time values + // as a datestamp, so this code suffices to handle those. + // Date/time literal strings would be handled using `setArgument()` with a string + // argument. + GregorianCalendar cal(errorCode); + THIS_ON_ERROR(errorCode); + cal.setTime(date, errorCode); + THIS_ON_ERROR(errorCode); + arguments[k] = Formattable(std::move(cal)); return *this; } Builder& setDecimalArgument(const UnicodeString& k, std::string_view decimal, UErrorCode& errorCode) { diff --git a/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json b/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json index 6d78ffe4f04d..21f917da6e52 100644 --- a/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json +++ b/icu4c/source/test/testdata/message2/icu4j/icu-test-functions.json @@ -108,13 +108,11 @@ }, { "src": "Expires at {|2024-07-02T19:23:45Z| :datetime timeStyle=long}", - "exp": "Expires at 7:23:45 PM GMT", - "ignoreTest": "ICU-22754 Time zones not working yet (bug)" + "exp": "Expires at 7:23:45 PM GMT" }, { "src": "Expires at {|2024-07-02T19:23:45+03:30| :datetime timeStyle=full}", - "exp": "Expires at 7:23:45 PM GMT+03:30", - "ignoreTest": "ICU-22754 Time zones not working yet (bug)" + "exp": "Expires at 7:23:45 PM GMT+03:30" } ], "Chaining" : [ diff --git a/icu4c/source/test/testdata/message2/more-functions.json b/icu4c/source/test/testdata/message2/more-functions.json index 161379a63c55..bfffcd343359 100644 --- a/icu4c/source/test/testdata/message2/more-functions.json +++ b/icu4c/source/test/testdata/message2/more-functions.json @@ -1,5 +1,5 @@ { - "number": [ + "number_extra": [ { "src": "Format {123456789.9876 :number} number", "locale": "en-IN", @@ -107,5 +107,19 @@ "locale": "ro", "params": {"val": {"decimal": "1234567890123456789.987654321"}} } + ], + "datetime_extra": [ + { + "srcs": [".local $d = {|2024-07-02T19:23:45+03:30| :datetime timeStyle=long}\n", + "{{Expires at {$d} or {$d :datetime timeStyle=full}}}"], + "exp": "Expires at 7:23:45\u202FPM GMT+3:30 or 7:23:45\u202FPM GMT+03:30", + "comment": "This checks that the return value of :datetime preserves the time zone if specified" + }, + { + "src": "{$d :datetime timeStyle=full}", + "exp": "7:23:45\u202FPM GMT+03:30", + "params": {"d": "2024-07-02T19:23:45+03:30"}, + "comment": "This checks that an argument can be a date literal string." + } ] }