Skip to content

Commit

Permalink
ICU-22763 MF2: Handle time zones correctly in :datetime
Browse files Browse the repository at this point in the history
Previously, the time zone components of date/time literals were ignored.
In order to store the time zone properly for re-formatting,
this change also changes the representation of `Formattable` date
values to use a `GregorianCalendar` rather than a `UDate`.
(This is a public API change.)
  • Loading branch information
catamorphism committed May 16, 2024
1 parent 6ac6fdd commit ca6d29d
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 66 deletions.
21 changes: 15 additions & 6 deletions icu4c/source/i18n/messageformat2_formattable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ namespace message2 {

Formattable::Formattable(const Formattable& other) {
contents = other.contents;
holdsDate = other.holdsDate;
}

Formattable Formattable::forDecimal(std::string_view number, UErrorCode &status) {
Expand All @@ -53,7 +52,7 @@ namespace message2 {

UFormattableType Formattable::getType() const {
if (std::holds_alternative<double>(contents)) {
return holdsDate ? UFMT_DATE : UFMT_DOUBLE;
return UFMT_DOUBLE;
}
if (std::holds_alternative<int64_t>(contents)) {
return UFMT_INT64;
Expand All @@ -74,6 +73,9 @@ namespace message2 {
}
}
}
if (isDate()) {
return UFMT_DATE;
}
if (std::holds_alternative<const FormattableObject*>(contents)) {
return UFMT_OBJECT;
}
Expand Down Expand Up @@ -223,12 +225,19 @@ namespace message2 {
return df.orphan();
}

void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString& result, UErrorCode& errorCode) {
void formatDateWithDefaults(const Locale& locale, const GregorianCalendar& cal, UnicodeString& result, UErrorCode& errorCode) {
CHECK_ERROR(errorCode);

LocalPointer<DateFormat> df(defaultDateTimeInstance(locale, errorCode));
CHECK_ERROR(errorCode);
df->format(date, result, 0, errorCode);
// We have to copy the `const` reference that was passed in,
// because DateFormat::format() takes a non-const reference.
LocalPointer<GregorianCalendar> copied(new GregorianCalendar(cal));
if (!copied.isValid()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
df->format(*copied, result, nullptr, errorCode);
}

// Called when output is required and the contents are an unevaluated `Formattable`;
Expand Down Expand Up @@ -259,9 +268,9 @@ namespace message2 {
switch (type) {
case UFMT_DATE: {
UnicodeString result;
UDate d = toFormat.getDate(status);
const GregorianCalendar* cal = toFormat.getDate(status);
U_ASSERT(U_SUCCESS(status));
formatDateWithDefaults(locale, d, result, status);
formatDateWithDefaults(locale, *cal, result, status);
return FormattedPlaceholder(input, FormattedValue(std::move(result)));
}
case UFMT_DOUBLE: {
Expand Down
179 changes: 151 additions & 28 deletions icu4c/source/i18n/messageformat2_function_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "unicode/dtptngen.h"
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_function_registry.h"
#include "unicode/simpletz.h"
#include "unicode/smpdtfmt.h"
#include "charstr.h"
#include "messageformat2_allocation.h"
Expand Down Expand Up @@ -905,6 +906,135 @@ Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& loc
return result;
}

static UDate tryPattern(const char* pat, const UnicodeString& sourceStr, UErrorCode& errorCode) {
LocalPointer<DateFormat> dateParser(new SimpleDateFormat(UnicodeString(pat), errorCode));
if (U_FAILURE(errorCode)) {
return 0;
}
return dateParser->parse(sourceStr, errorCode);
}

static UDate tryPatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return 0;
}
UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:ss", sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_ZERO_ERROR;
d = tryPattern("YYYY-MM-dd", sourceStr, errorCode);
}
return d;
}

static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return 0;
}
UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_ZERO_ERROR;
d = tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode);
}
return d;
}

SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& errorCode) {
NULL_ON_ERROR(errorCode);

int32_t len = offsetStr.length();

// `offsetStr` has a format like +03:30 or -03:30
if (len <= 2) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}

int32_t sign = offsetStr.charAt(0) == '-' ? -1 : 1;
int32_t indexOfColon = offsetStr.indexOf(':');
if (indexOfColon <= 2) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
UnicodeString tzPart1 = offsetStr.tempSubString(1, indexOfColon);
UnicodeString tzPart2 = offsetStr.tempSubString(indexOfColon + 1, len);
double tzHour, tzMin;
strToDouble(tzPart1, tzHour, errorCode);
strToDouble(tzPart2, tzMin, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
int32_t offset = sign * (static_cast<int32_t>(tzHour) * 60 + static_cast<int32_t>(tzMin)) * 60 * 1000;
LocalPointer<SimpleTimeZone> tz(new SimpleTimeZone(offset, UnicodeString("offset")));
if (!tz.isValid()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
return tz.orphan();
}

static bool hasTzOffset(const UnicodeString& sourceStr) {
int32_t len = sourceStr.length();

if (len <= 6) {
return false;
}
return ((sourceStr[len - 6] == PLUS || sourceStr[len - 6] == HYPHEN)
&& sourceStr[len - 3] == COLON);
}

static GregorianCalendar* createCalendarFromDateString(const UnicodeString& sourceStr, UErrorCode& errorCode) {
NULL_ON_ERROR(errorCode);

UDate absoluteDate;

// Try time zone part
int32_t indexOfZ = sourceStr.indexOf('Z');
int32_t indexOfPlus = sourceStr.lastIndexOf('+');
int32_t indexOfMinus = sourceStr.lastIndexOf('-');
int32_t indexOfSign = indexOfPlus > -1 ? indexOfPlus : indexOfMinus;
bool isTzOffset = hasTzOffset(sourceStr);
bool isGMT = indexOfZ > 0;
UnicodeString offsetPart;
bool hasTimeZone = isTzOffset || isGMT;
if (!hasTimeZone) {
// No time zone
absoluteDate = tryPatterns(sourceStr, errorCode);
} else {
// Try to split into time zone and non-time-zone parts
UnicodeString noTimeZone;
if (isGMT) {
noTimeZone = sourceStr.tempSubString(0, indexOfZ);
} else {
noTimeZone = sourceStr.tempSubString(0, indexOfSign);
}
tryPatterns(noTimeZone, errorCode);
// Failure -- can't parse this string
NULL_ON_ERROR(errorCode);
// Success -- now handle the time zone part
if (isGMT) {
noTimeZone += UnicodeString("GMT");
absoluteDate = tryTimeZonePatterns(noTimeZone, errorCode);
} else {
// Finally, try [+-]nn:nn
absoluteDate = tryTimeZonePatterns(sourceStr, errorCode);
offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length());
}
}
LocalPointer<GregorianCalendar> cal(new GregorianCalendar(errorCode));
NULL_ON_ERROR(errorCode);
cal->setTime(absoluteDate, errorCode);
if (hasTimeZone) {
if (isGMT) {
cal->setTimeZone(*TimeZone::getGMT());
} else {
LocalPointer<SimpleTimeZone> tz(createTimeZonePart(offsetPart, errorCode));
NULL_ON_ERROR(errorCode);
cal->adoptTimeZone(tz.orphan());
}
}
return cal.orphan();
}

FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat,
FunctionOptions&& opts,
UErrorCode& errorCode) const {
Expand Down Expand Up @@ -1093,39 +1223,32 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&&
case UFMT_STRING: {
const UnicodeString& sourceStr = source.getString(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
// Pattern for ISO 8601 format - datetime
UnicodeString pattern("YYYY-MM-dd'T'HH:mm:ss");
LocalPointer<DateFormat> dateParser(new SimpleDateFormat(pattern, errorCode));

LocalPointer<GregorianCalendar> cal(createCalendarFromDateString(sourceStr, errorCode));

if (U_FAILURE(errorCode)) {
errorCode = U_MF_FORMATTING_ERROR;
} else {
// Parse the date
UDate d = dateParser->parse(sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
// Pattern for ISO 8601 format - date
UnicodeString pattern("YYYY-MM-dd");
errorCode = U_ZERO_ERROR;
dateParser.adoptInstead(new SimpleDateFormat(pattern, errorCode));
if (U_FAILURE(errorCode)) {
errorCode = U_MF_FORMATTING_ERROR;
} else {
d = dateParser->parse(sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
}
}
}
// Use the parsed date as the source value
// in the returned FormattedPlaceholder; this is necessary
// so the date can be re-formatted
toFormat = FormattedPlaceholder(message2::Formattable::forDate(d),
toFormat.getFallback());
df->format(d, result, 0, errorCode);
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
return {};
}

// Use the parsed date as the source value
// in the returned FormattedPlaceholder; this is necessary
// so the date can be re-formatted
df->format(*cal, result, 0, errorCode);
toFormat = FormattedPlaceholder(message2::Formattable(std::move(*cal)),
toFormat.getFallback());
break;
}
case UFMT_DATE: {
df->format(source.asICUFormattable(errorCode), result, 0, errorCode);
const GregorianCalendar* cal = source.getDate(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
U_ASSERT(cal != nullptr);
LocalPointer<GregorianCalendar> copied(new GregorianCalendar(*cal));
if (!copied.isValid()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return {};
}
df->format(*copied, result, 0, errorCode);
if (U_FAILURE(errorCode)) {
if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
Expand Down
34 changes: 17 additions & 17 deletions icu4c/source/i18n/unicode/messageformat2_formattable.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
#if !UCONFIG_NO_MF2

#include "unicode/chariter.h"
#include "unicode/gregocal.h"
#include "unicode/numberformatter.h"
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/smpdtfmt.h"

#ifndef U_HIDE_DEPRECATED_API

Expand Down Expand Up @@ -226,22 +228,24 @@ namespace message2 {
}

/**
* Gets the Date value of this object. If this object is not of type
* kDate then the result is undefined and the error code is set.
* Gets the calendar representing the date value of this object.
* If this object is not of type kDate then the result is
* undefined and the error code is set.
*
* @param status Input/output error code.
* @return the Date value of this object.
* @return A non-owned pointer to a GregorianCalendar object
* representing the underlying date of this object.
* @internal ICU 75 technology preview
* @deprecated This API is for technology preview only.
*/
UDate getDate(UErrorCode& status) const {
const GregorianCalendar* getDate(UErrorCode& status) const {
if (U_SUCCESS(status)) {
if (isDate()) {
return *std::get_if<double>(&contents);
return std::get_if<GregorianCalendar>(&contents);
}
status = U_ILLEGAL_ARGUMENT_ERROR;
}
return 0;
return nullptr;
}

/**
Expand Down Expand Up @@ -299,7 +303,6 @@ namespace message2 {
using std::swap;

swap(f1.contents, f2.contents);
swap(f1.holdsDate, f2.holdsDate);
}
/**
* Copy constructor.
Expand Down Expand Up @@ -351,18 +354,15 @@ namespace message2 {
*/
Formattable(int64_t i) : contents(i) {}
/**
* Date factory method.
* Date constructor.
*
* @param d A UDate value to wrap as a Formattable.
* @param c A GregorianCalendar value representing a date,
* to wrap as a Formattable.
* Passed by move
* @internal ICU 75 technology preview
* @deprecated This API is for technology preview only.
*/
static Formattable forDate(UDate d) {
Formattable f;
f.contents = d;
f.holdsDate = true;
return f;
}
Formattable(GregorianCalendar&& c) : contents(std::move(c)) {}
/**
* Creates a Formattable object of an appropriate numeric type from a
* a decimal number in string form. The Formattable will retain the
Expand Down Expand Up @@ -422,16 +422,16 @@ namespace message2 {
int64_t,
UnicodeString,
icu::Formattable, // represents a Decimal
GregorianCalendar,
const FormattableObject*,
std::pair<const Formattable*, int32_t>> contents;
bool holdsDate = false; // otherwise, we get type errors about UDate being a duplicate type
UnicodeString bogusString; // :((((

UBool isDecimal() const {
return std::holds_alternative<icu::Formattable>(contents);
}
UBool isDate() const {
return std::holds_alternative<double>(contents) && holdsDate;
return std::holds_alternative<GregorianCalendar>(contents);
}
}; // class Formattable

Expand Down
11 changes: 4 additions & 7 deletions icu4c/source/test/intltest/messageformat2test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,12 @@ void TestMessageFormat2::testAPISimple() {
.setLocale(locale)
.build(errorCode);

Calendar* cal(Calendar::createInstance(errorCode));
GregorianCalendar cal(errorCode);
// Sunday, October 28, 2136 8:39:12 AM PST
cal->set(2136, Calendar::OCTOBER, 28, 8, 39, 12);
UDate date = cal->getTime(errorCode);
cal.set(2136, Calendar::OCTOBER, 28, 8, 39, 12);

argsBuilder.clear();
argsBuilder["today"] = message2::Formattable::forDate(date);
argsBuilder["today"] = message2::Formattable(std::move(cal));
args = MessageArguments(argsBuilder, errorCode);
result = mf.formatToString(args, errorCode);
assertEquals("testAPI", "Today is Sunday, October 28, 2136.", result);
Expand All @@ -119,8 +118,6 @@ void TestMessageFormat2::testAPISimple() {
.build(errorCode);
result = mf.formatToString(args, errorCode);
assertEquals("testAPI", "Maria added 12 photos to her album.", result);

delete cal;
}

// Design doc example, with more details
Expand All @@ -145,7 +142,7 @@ void TestMessageFormat2::testAPI() {

test = testBuilder.setName("testAPI")
.setPattern("Today is {$today :date style=full}.")
.setDateArgument("today", date)
.setDateArgument("today", date, errorCode)
.setExpected("Today is Sunday, October 28, 2136.")
.setLocale("en_US")
.build();
Expand Down
Loading

0 comments on commit ca6d29d

Please sign in to comment.