Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-22763 MF2: Handle time zones correctly in :datetime #3012

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
4bc0354
ICU-22763 MF2: Handle time zones correctly in :datetime
catamorphism Apr 29, 2024
fa6278e
Tests pass with new DateInfo struct
catamorphism Jun 18, 2024
8451a30
Eliminate GregorianCalendar usage
catamorphism Jun 18, 2024
b4cc119
Docs
catamorphism Jun 18, 2024
eeb534b
Remove obsolete comment
catamorphism Jun 18, 2024
8a1638b
Fix comment
catamorphism Jun 18, 2024
60a66d9
Add comments
catamorphism Jun 18, 2024
a65362f
Avoid creating time zone where not necessary
catamorphism Jun 18, 2024
3d78033
Fix doc comment
catamorphism Jun 18, 2024
804ab9e
Fix MSVC compilation errors
catamorphism Jun 18, 2024
1c68d51
Make createTimeZone() a static method instead of a method on DateInfo
catamorphism Jun 24, 2024
5d15835
Update icu4c/source/test/testdata/message2/more-functions.json
catamorphism Jul 17, 2024
82f9ddc
Add comment about meaning of calendarName
catamorphism Jul 17, 2024
34bcb51
Address review feedback
catamorphism Aug 8, 2024
197caef
Add clarifying comments to DateInfo struct; rename zoneName field to …
catamorphism Aug 8, 2024
9800253
Removed calendarName field from DateInfo
catamorphism Aug 8, 2024
2ecf406
Remove calendar name harder
catamorphism Aug 8, 2024
c63c6b5
Detect 'Z' literal correctly; add test case
catamorphism Aug 8, 2024
8d4da42
Cache DateFormat objects in the DateTimeFactory object and lazily ini…
catamorphism Aug 13, 2024
4a7da29
Add comment on hasTzOffset()
catamorphism Aug 13, 2024
da9307c
Share DateFormat objects across threads
catamorphism Sep 11, 2024
1b8e43f
Fix use-after-free in TimeZoneDelegate destructor
catamorphism Sep 11, 2024
0879820
Add null checks
catamorphism Dec 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions icu4c/source/i18n/messageformat2_formattable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "unicode/messageformat2_formattable.h"
#include "unicode/smpdtfmt.h"
#include "messageformat2_allocation.h"
#include "messageformat2_macros.h"

#include "limits.h"
Expand All @@ -35,7 +36,6 @@ namespace message2 {

Formattable::Formattable(const Formattable& other) {
contents = other.contents;
holdsDate = other.holdsDate;
}

Formattable Formattable::forDecimal(std::string_view number, UErrorCode &status) {
Expand All @@ -53,7 +53,7 @@ namespace message2 {

UFormattableType Formattable::getType() const {
if (std::holds_alternative<double>(contents)) {
return holdsDate ? UFMT_DATE : UFMT_DOUBLE;
return UFMT_DOUBLE;
}
if (std::holds_alternative<int64_t>(contents)) {
return UFMT_INT64;
Expand All @@ -74,6 +74,9 @@ namespace message2 {
}
}
}
if (isDate()) {
return UFMT_DATE;
}
if (std::holds_alternative<const FormattableObject*>(contents)) {
return UFMT_OBJECT;
}
Expand Down Expand Up @@ -213,6 +216,22 @@ namespace message2 {
return number::NumberFormatter::withLocale(locale).formatDecimal(toFormat, errorCode);
}

TimeZone* DateInfo::createTimeZone(UErrorCode& errorCode) const {
NULL_ON_ERROR(errorCode);

TimeZone* tz;
if (zoneName.length() == 0) {
// Floating time value -- use default time zone
tz = TimeZone::createDefault();
} else {
tz = TimeZone::createTimeZone(zoneName);
}
if (tz == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
return tz;
}

DateFormat* defaultDateTimeInstance(const Locale& locale, UErrorCode& errorCode) {
NULL_ON_ERROR(errorCode);
LocalPointer<DateFormat> df(DateFormat::createDateTimeInstance(DateFormat::SHORT, DateFormat::SHORT, locale));
Expand All @@ -223,12 +242,20 @@ namespace message2 {
return df.orphan();
}

void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString& result, UErrorCode& errorCode) {
void formatDateWithDefaults(const Locale& locale,
const DateInfo& dateInfo,
UnicodeString& result,
UErrorCode& errorCode) {
CHECK_ERROR(errorCode);

LocalPointer<DateFormat> df(defaultDateTimeInstance(locale, errorCode));
CHECK_ERROR(errorCode);
df->format(date, result, 0, errorCode);

// Non-Gregorian calendars not supported yet
U_ASSERT(dateInfo.calendarName.length() == 0);
df->adoptTimeZone(dateInfo.createTimeZone(errorCode));
CHECK_ERROR(errorCode);
df->format(dateInfo.date, result, nullptr, errorCode);
}

// Called when output is required and the contents are an unevaluated `Formattable`;
Expand Down Expand Up @@ -259,9 +286,9 @@ namespace message2 {
switch (type) {
case UFMT_DATE: {
UnicodeString result;
UDate d = toFormat.getDate(status);
const DateInfo* dateInfo = toFormat.getDate(status);
U_ASSERT(U_SUCCESS(status));
formatDateWithDefaults(locale, d, result, status);
formatDateWithDefaults(locale, *dateInfo, result, status);
catamorphism marked this conversation as resolved.
Show resolved Hide resolved
return FormattedPlaceholder(input, FormattedValue(std::move(result)));
}
case UFMT_DOUBLE: {
Expand Down
189 changes: 161 additions & 28 deletions icu4c/source/i18n/messageformat2_function_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "unicode/dtptngen.h"
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_function_registry.h"
#include "unicode/simpletz.h"
#include "unicode/smpdtfmt.h"
#include "charstr.h"
#include "messageformat2_allocation.h"
Expand Down Expand Up @@ -905,6 +906,149 @@ Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& loc
return result;
}

static UDate tryPattern(const char* pat, const UnicodeString& sourceStr, UErrorCode& errorCode) {
LocalPointer<DateFormat> dateParser(new SimpleDateFormat(UnicodeString(pat), errorCode));
if (U_FAILURE(errorCode)) {
return 0;
}
return dateParser->parse(sourceStr, errorCode);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. dateParser->parse() is a const method, which mean it could be shared across thread, right?
  2. there are only total 4 calls into tryPattern() in this method

tryPattern("YYYY-MM-dd'T'HH:mm:ss"
tryPattern("YYYY-MM-dd"
tryPattern("YYYY-MM-dd'T'HH:mm:ssZ"
tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz"

So there are total 4 possible pat , right?
Currently, the creation of the DateFormat is inside the tryPattern()
Everytime the tryPattern() got called, the code need to create a SimpleDateFormat and later destroy it
but there are only total 4 possible SimpleDateFormat needed to be created and the usage are thread safe (see my point 1 above)
So... should we have an lazy initialization routine which create these four SimpleDateFormat and cache it globally, and reuse them later. The creation need to be protected to be init once and therefore mutex to prevent different threads create them concurrently, but then we can reuse them across thread and destroy upon lib clean up.

In this way, we do not need to repeatly create and destroy the SimpleDateFormat

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that seems reasonable. I'll try it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, done in 8d4da42. I'm not sure of how to unit-test this (i.e. testing that the DateFormats really are initialized only once), but I'll look for examples in the test suite.

}

static UDate tryPatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return 0;
}
UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:ss", sourceStr, errorCode);
catamorphism marked this conversation as resolved.
Show resolved Hide resolved
if (U_FAILURE(errorCode)) {
errorCode = U_ZERO_ERROR;
d = tryPattern("YYYY-MM-dd", sourceStr, errorCode);
}
return d;
}

static UDate tryTimeZonePatterns(const UnicodeString& sourceStr, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return 0;
}
UDate d = tryPattern("YYYY-MM-dd'T'HH:mm:sszzzz", sourceStr, errorCode);
catamorphism marked this conversation as resolved.
Show resolved Hide resolved
if (U_FAILURE(errorCode)) {
errorCode = U_ZERO_ERROR;
d = tryPattern("YYYY-MM-dd'T'HH:mm:ssZ", sourceStr, errorCode);
}
return d;
}

// Parse a string like [+-]nn:nn to a time zone
SimpleTimeZone* createTimeZonePart(const UnicodeString& offsetStr, UErrorCode& errorCode) {
catamorphism marked this conversation as resolved.
Show resolved Hide resolved
NULL_ON_ERROR(errorCode);

int32_t len = offsetStr.length();

// `offsetStr` has a format like +03:30 or -03:30
if (len <= 2) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}

int32_t sign = offsetStr.charAt(0) == '-' ? -1 : 1;
int32_t indexOfColon = offsetStr.indexOf(':');
if (indexOfColon <= 2) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
// Split string into hour and minute parts and parse each part as a double
UnicodeString tzPart1 = offsetStr.tempSubString(1, indexOfColon);
UnicodeString tzPart2 = offsetStr.tempSubString(indexOfColon + 1, len);
double tzHour, tzMin;
strToDouble(tzPart1, tzHour, errorCode);
strToDouble(tzPart2, tzMin, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
// Create integer offset from parsed parts
int32_t offset = sign * (static_cast<int32_t>(tzHour) * 60 + static_cast<int32_t>(tzMin)) * 60 * 1000;
LocalPointer<SimpleTimeZone> tz(new SimpleTimeZone(offset, UnicodeString("GMT") + offsetStr));
if (!tz.isValid()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
}
return tz.orphan();
}

static bool hasTzOffset(const UnicodeString& sourceStr) {
int32_t len = sourceStr.length();

if (len <= 6) {
return false;
}
return ((sourceStr[len - 6] == PLUS || sourceStr[len - 6] == HYPHEN)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment in the beginning of this function to explain what is the expected input value and the output

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in 4a7da29

&& sourceStr[len - 3] == COLON);
}

// Note: `calendar` option to :datetime not implemented yet;
// Gregorian calendar is assumed
static DateInfo createDateInfoFromString(const UnicodeString& sourceStr, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return {};
}

UDate absoluteDate;

// Check if the string has a time zone part
int32_t indexOfZ = sourceStr.indexOf('Z');
int32_t indexOfPlus = sourceStr.lastIndexOf('+');
int32_t indexOfMinus = sourceStr.lastIndexOf('-');
int32_t indexOfSign = indexOfPlus > -1 ? indexOfPlus : indexOfMinus;
bool isTzOffset = hasTzOffset(sourceStr);
bool isGMT = indexOfZ > 0;
UnicodeString offsetPart;
bool hasTimeZone = isTzOffset || isGMT;

if (!hasTimeZone) {
// No time zone; parse the date and time
absoluteDate = tryPatterns(sourceStr, errorCode);
} else {
// Try to split into time zone and non-time-zone parts
UnicodeString dateTimePart;
if (isGMT) {
dateTimePart = sourceStr.tempSubString(0, indexOfZ);
} else {
dateTimePart = sourceStr.tempSubString(0, indexOfSign);
}
// Parse the date from the date/time part
tryPatterns(dateTimePart, errorCode);
// Failure -- can't parse this string
if (U_FAILURE(errorCode)) {
return {};
}
// Success -- now parse the time zone part
if (isGMT) {
dateTimePart += UnicodeString("GMT");
absoluteDate = tryTimeZonePatterns(dateTimePart, errorCode);
} else {
// Try to parse time zone in offset format: [+-]nn:nn
absoluteDate = tryTimeZonePatterns(sourceStr, errorCode);
offsetPart = sourceStr.tempSubString(indexOfSign, sourceStr.length());
}
}

// If the time zone was provided, get its canonical ID,
// in order to return it in the DateInfo
UnicodeString canonicalID;
if (hasTimeZone) {
UnicodeString tzID("GMT");
if (!isGMT) {
tzID += offsetPart;
}
TimeZone::getCanonicalID(tzID, canonicalID, errorCode);
}

// Empty string for Gregorian calendar (default);
// `:datetime` `calendar` option not implemented yet,
// so other calendars aren't implemented
return { absoluteDate, canonicalID, {} };
}

FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat,
FunctionOptions&& opts,
UErrorCode& errorCode) const {
Expand Down Expand Up @@ -1093,39 +1237,28 @@ FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&&
case UFMT_STRING: {
const UnicodeString& sourceStr = source.getString(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
// Pattern for ISO 8601 format - datetime
UnicodeString pattern("YYYY-MM-dd'T'HH:mm:ss");
LocalPointer<DateFormat> dateParser(new SimpleDateFormat(pattern, errorCode));

DateInfo dateInfo = createDateInfoFromString(sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_MF_FORMATTING_ERROR;
} else {
// Parse the date
UDate d = dateParser->parse(sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
// Pattern for ISO 8601 format - date
UnicodeString pattern("YYYY-MM-dd");
errorCode = U_ZERO_ERROR;
dateParser.adoptInstead(new SimpleDateFormat(pattern, errorCode));
if (U_FAILURE(errorCode)) {
errorCode = U_MF_FORMATTING_ERROR;
} else {
d = dateParser->parse(sourceStr, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
}
}
}
// Use the parsed date as the source value
// in the returned FormattedPlaceholder; this is necessary
// so the date can be re-formatted
toFormat = FormattedPlaceholder(message2::Formattable::forDate(d),
toFormat.getFallback());
df->format(d, result, 0, errorCode);
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
return {};
}
df->adoptTimeZone(dateInfo.createTimeZone(errorCode));

// Use the parsed date as the source value
// in the returned FormattedPlaceholder; this is necessary
// so the date can be re-formatted
df->format(dateInfo.date, result, 0, errorCode);
toFormat = FormattedPlaceholder(message2::Formattable(std::move(dateInfo)),
toFormat.getFallback());
break;
}
case UFMT_DATE: {
df->format(source.asICUFormattable(errorCode), result, 0, errorCode);
const DateInfo* dateInfo = source.getDate(errorCode);
U_ASSERT(U_SUCCESS(errorCode));

df->adoptTimeZone(dateInfo->createTimeZone(errorCode));
df->format(dateInfo->date, result, 0, errorCode);
if (U_FAILURE(errorCode)) {
if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
Expand Down
Loading