From 8fd8ec11e3969b8a6c90e8cfba28421d7174ad7c Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Mon, 30 Sep 2024 14:51:15 +0800 Subject: [PATCH 01/14] Invalid format return null --- .../sparksql/tests/DateTimeFunctionsTest.cpp | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index c462758694f5..b4fe9bc7d164 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -51,6 +51,12 @@ class DateTimeFunctionsTest : public SparkFunctionBaseTest { }); } + void enableLegacyFormatter() { + queryCtx_->testingOverrideConfigUnsafe({ + {core::QueryConfig::kSparkLegacyDateFormatter, "true"}, + }); + } + template std::optional evaluateDateFuncOnce( const std::string& expr, @@ -262,6 +268,9 @@ TEST_F(DateTimeFunctionsTest, unixTimestamp) { EXPECT_EQ(std::nullopt, unixTimestamp("00:00:00")); EXPECT_EQ(std::nullopt, unixTimestamp("")); EXPECT_EQ(std::nullopt, unixTimestamp("malformed input")); + enableLegacyFormatter(); + EXPECT_EQ(std::nullopt, unixTimestamp("")); + EXPECT_EQ(std::nullopt, unixTimestamp("malformed input")); } TEST_F(DateTimeFunctionsTest, unixTimestampCurrent) { @@ -803,6 +812,13 @@ TEST_F(DateTimeFunctionsTest, getTimestamp) { VELOX_ASSERT_THROW( getTimestamp("2023-07-13 21:34", "yyyy-MM-dd HH:II"), "Specifier I is not supported"); + + // Invalid format returns null for legacy date formatter. + enableLegacyFormatter(); + // Empty format. + EXPECT_EQ(getTimestamp("0", ""), std::nullopt); + // Unsupported specifier. + EXPECT_EQ(getTimestamp("0", "l"), std::nullopt); } TEST_F(DateTimeFunctionsTest, hour) { @@ -926,6 +942,13 @@ TEST_F(DateTimeFunctionsTest, fromUnixtime) { fromUnixTime(0, "FF/MM/dd"), "Specifier F is not supported"); VELOX_ASSERT_THROW( fromUnixTime(0, "yyyy-MM-dd HH:II"), "Specifier I is not supported"); + + // Invalid format returns null for legacy date formatter. + enableLegacyFormatter(); + // Empty format. + EXPECT_EQ(fromUnixTime(0, ""), std::nullopt); + // Unsupported specifier. + EXPECT_EQ(fromUnixTime(0, "l"), std::nullopt); } TEST_F(DateTimeFunctionsTest, makeYMInterval) { From 82110aa36e04f78b7038d4d5cfb3e4790c565988 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Tue, 15 Oct 2024 11:40:13 +0800 Subject: [PATCH 02/14] update --- velox/functions/sparksql/DateTimeFunctions.h | 84 ++++++++++++++------ 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index e0d44130b8e1..4fddc26b24fa 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -245,17 +245,24 @@ struct FromUnixtimeFunction { legacyFormatter_ = config.sparkLegacyDateFormatter(); sessionTimeZone_ = getTimeZoneFromConfig(config); if (format != nullptr) { - setFormatter(*format); + if (!setFormatter(*format)) { + invalidFormat_ = true; + } isConstantTimeFormat_ = true; } } - FOLLY_ALWAYS_INLINE void call( + FOLLY_ALWAYS_INLINE bool call( out_type& result, const arg_type& second, const arg_type& format) { + if (invalidFormat_) { + return false; + } if (!isConstantTimeFormat_) { - setFormatter(format); + if (!setFormatter(format)) { + return false; + } } const Timestamp timestamp{second, 0}; result.reserve(maxResultSize_); @@ -263,18 +270,26 @@ struct FromUnixtimeFunction { resultSize = formatter_->format( timestamp, sessionTimeZone_, maxResultSize_, result.data(), true); result.resize(resultSize); + return true; } private: - FOLLY_ALWAYS_INLINE void setFormatter(const arg_type& format) { - formatter_ = detail::getDateTimeFormatter( - std::string_view(format.data(), format.size()), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA) - .thenOrThrow(folly::identity, [&](const Status& status) { - VELOX_USER_FAIL("{}", status.message()); - }); + FOLLY_ALWAYS_INLINE bool setFormatter(const arg_type& format) { + auto formatter = detail::getDateTimeFormatter( + std::string_view(format.data(), format.size()), + legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE + : DateTimeFormatterType::JODA); + // Legacy formatter returns null for invalid format but Joda formatter + // throws user error. + if (!legacyFormatter_ && formatter.hasError()) { + VELOX_USER_FAIL(formatter.error().message()); + } + if (formatter.hasError()) { + return false; + } + formatter_ = formatter.value(); maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_); + return true; } const tz::TimeZone* sessionTimeZone_{nullptr}; @@ -282,6 +297,7 @@ struct FromUnixtimeFunction { uint32_t maxResultSize_; bool isConstantTimeFormat_{false}; bool legacyFormatter_{false}; + bool invalidFormat_{false}; }; template @@ -361,14 +377,20 @@ struct GetTimestampFunction { sessionTimeZone_ = tz::locateZone(sessionTimezoneName); } if (format != nullptr) { - formatter_ = detail::getDateTimeFormatter( - std::string_view(*format), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA) - .thenOrThrow(folly::identity, [&](const Status& status) { - VELOX_USER_FAIL("{}", status.message()); - }); - isConstantTimeFormat_ = true; + auto formatter = detail::getDateTimeFormatter( + std::string_view(*format), + legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE + : DateTimeFormatterType::JODA); + // Legacy formatter returns null for invalid format but Joda formatter + // throws user error. + if (!legacyFormatter_ && formatter.hasError()) { + VELOX_USER_FAIL(formatter.error().message()); + } + if (formatter.hasError()) { + invalidFormat_ = true; + } else { + formatter_ = formatter.value(); + } } } @@ -376,14 +398,23 @@ struct GetTimestampFunction { out_type& result, const arg_type& input, const arg_type& format) { + if (invalidFormat_) { + return false; + } if (!isConstantTimeFormat_) { - formatter_ = detail::getDateTimeFormatter( - std::string_view(format), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA) - .thenOrThrow(folly::identity, [&](const Status& status) { - VELOX_USER_FAIL("{}", status.message()); - }); + auto formatter = detail::getDateTimeFormatter( + std::string_view(format), + legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE + : DateTimeFormatterType::JODA); + // Legacy formatter returns null for invalid format but Joda formatter + // throws user error. + if (!legacyFormatter_ && formatter.hasError()) { + VELOX_USER_FAIL(formatter.error().message()); + } + if (formatter.hasError()) { + return false; + } + formatter_ = formatter.value(); } auto dateTimeResult = formatter_->parse(std::string_view(input)); // Null as result for parsing error. @@ -406,6 +437,7 @@ struct GetTimestampFunction { bool isConstantTimeFormat_{false}; const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // default to GMT. bool legacyFormatter_{false}; + bool invalidFormat_{false}; }; template From 3fbd98deb80231ecfe1879f62f227bb9a1baa887 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Wed, 23 Oct 2024 15:55:08 +0800 Subject: [PATCH 03/14] update --- velox/functions/sparksql/DateTimeFunctions.h | 25 ++++++++++---------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 4fddc26b24fa..d3b84d7d203d 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -281,11 +281,11 @@ struct FromUnixtimeFunction { : DateTimeFormatterType::JODA); // Legacy formatter returns null for invalid format but Joda formatter // throws user error. - if (!legacyFormatter_ && formatter.hasError()) { - VELOX_USER_FAIL(formatter.error().message()); - } if (formatter.hasError()) { - return false; + if (legacyFormatter_) { + return false; + } + VELOX_USER_FAIL(formatter.error().message()); } formatter_ = formatter.value(); maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_); @@ -383,11 +383,12 @@ struct GetTimestampFunction { : DateTimeFormatterType::JODA); // Legacy formatter returns null for invalid format but Joda formatter // throws user error. - if (!legacyFormatter_ && formatter.hasError()) { - VELOX_USER_FAIL(formatter.error().message()); - } if (formatter.hasError()) { - invalidFormat_ = true; + if (legacyFormatter_) { + invalidFormat_ = true; + } else { + VELOX_USER_FAIL(formatter.error().message()); + } } else { formatter_ = formatter.value(); } @@ -408,11 +409,11 @@ struct GetTimestampFunction { : DateTimeFormatterType::JODA); // Legacy formatter returns null for invalid format but Joda formatter // throws user error. - if (!legacyFormatter_ && formatter.hasError()) { - VELOX_USER_FAIL(formatter.error().message()); - } if (formatter.hasError()) { - return false; + if (legacyFormatter_) { + return false; + } + VELOX_USER_FAIL(formatter.error().message()); } formatter_ = formatter.value(); } From 2c9aeb6a244cb7906f7d67f0516b812d7d98beeb Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Mon, 28 Oct 2024 11:43:55 +0800 Subject: [PATCH 04/14] update --- velox/functions/sparksql/DateTimeFunctions.h | 55 ++++++++++---------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index d3b84d7d203d..01453fdcd385 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -274,6 +274,14 @@ struct FromUnixtimeFunction { } private: + /// Initialize `formatter_` based on the provided format string. It determines + /// formatter type based on the `legacyFormatter_` flag. The legacy formatter + /// returns false for an invalid format, while the Joda formatter throws a + /// velox user error. + /// + /// @param format The format string to be used for initializing the formatter. + /// @return True if the formatter was successfully set; false if using the + /// legacy formatter and an invalid format was provided. FOLLY_ALWAYS_INLINE bool setFormatter(const arg_type& format) { auto formatter = detail::getDateTimeFormatter( std::string_view(format.data(), format.size()), @@ -377,21 +385,7 @@ struct GetTimestampFunction { sessionTimeZone_ = tz::locateZone(sessionTimezoneName); } if (format != nullptr) { - auto formatter = detail::getDateTimeFormatter( - std::string_view(*format), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); - // Legacy formatter returns null for invalid format but Joda formatter - // throws user error. - if (formatter.hasError()) { - if (legacyFormatter_) { - invalidFormat_ = true; - } else { - VELOX_USER_FAIL(formatter.error().message()); - } - } else { - formatter_ = formatter.value(); - } + invalidFormat_ = !initializeFormatter(std::string_view(*format)); } } @@ -403,19 +397,9 @@ struct GetTimestampFunction { return false; } if (!isConstantTimeFormat_) { - auto formatter = detail::getDateTimeFormatter( - std::string_view(format), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); - // Legacy formatter returns null for invalid format but Joda formatter - // throws user error. - if (formatter.hasError()) { - if (legacyFormatter_) { - return false; - } - VELOX_USER_FAIL(formatter.error().message()); + if (!initializeFormatter(std::string_view(format))) { + return false; } - formatter_ = formatter.value(); } auto dateTimeResult = formatter_->parse(std::string_view(input)); // Null as result for parsing error. @@ -434,6 +418,23 @@ struct GetTimestampFunction { return result.timezone ? result.timezone : sessionTimeZone_; } + bool initializeFormatter(const std::string_view& format) { + auto formatter = detail::getDateTimeFormatter( + std::string_view(format), + legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE + : DateTimeFormatterType::JODA); + // Legacy formatter returns null for invalid format but Joda formatter + // throws user error. + if (formatter.hasError()) { + if (legacyFormatter_) { + return false; + } + VELOX_USER_FAIL(formatter.error().message()); + } + formatter_ = formatter.value(); + return true; + } + std::shared_ptr formatter_{nullptr}; bool isConstantTimeFormat_{false}; const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // default to GMT. From 17e252115e35cac1ab5856bec5fec0a7b6938ef4 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Tue, 29 Oct 2024 11:11:59 +0800 Subject: [PATCH 05/14] update --- velox/functions/sparksql/DateTimeFunctions.h | 98 ++++++++++---------- 1 file changed, 51 insertions(+), 47 deletions(-) diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 01453fdcd385..dd8fc43b596b 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -39,6 +39,33 @@ Expected> getDateTimeFormatter( std::string_view(format.data(), format.size())); } } + +// Initialize `formatter_` based on the provided format string. It determines +// formatter type based on the `legacyFormatter` flag. The legacy formatter +// returns false for an invalid format, while the Joda formatter throws a +// Velox user error. +// +// @param format The format string to be used for initializing the formatter. +// @param legacyFormatter Determines formatter type. +// @return True if the formatter was successfully set; false if using the +// legacy formatter and an invalid format was provided. +std::optional> initializeFormatter( + const std::string_view& format, + bool legacyFormatter) { + auto formatter = detail::getDateTimeFormatter( + std::string_view(format), + legacyFormatter ? DateTimeFormatterType::STRICT_SIMPLE + : DateTimeFormatterType::JODA); + // Legacy formatter returns null for invalid format but Joda formatter + // throws user error. + if (formatter.hasError()) { + if (legacyFormatter) { + return std::nullopt; + } + VELOX_USER_FAIL(formatter.error().message()); + } + return formatter.value(); +} } // namespace detail template @@ -245,7 +272,12 @@ struct FromUnixtimeFunction { legacyFormatter_ = config.sparkLegacyDateFormatter(); sessionTimeZone_ = getTimeZoneFromConfig(config); if (format != nullptr) { - if (!setFormatter(*format)) { + auto formatter = detail::initializeFormatter( + std::string_view(*format), legacyFormatter_); + if (formatter.has_value()) { + formatter_ = formatter.value(); + maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_); + } else { invalidFormat_ = true; } isConstantTimeFormat_ = true; @@ -260,7 +292,12 @@ struct FromUnixtimeFunction { return false; } if (!isConstantTimeFormat_) { - if (!setFormatter(format)) { + auto formatter = detail::initializeFormatter( + std::string_view(format), legacyFormatter_); + if (formatter.has_value()) { + formatter_ = formatter.value(); + maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_); + } else { return false; } } @@ -274,32 +311,6 @@ struct FromUnixtimeFunction { } private: - /// Initialize `formatter_` based on the provided format string. It determines - /// formatter type based on the `legacyFormatter_` flag. The legacy formatter - /// returns false for an invalid format, while the Joda formatter throws a - /// velox user error. - /// - /// @param format The format string to be used for initializing the formatter. - /// @return True if the formatter was successfully set; false if using the - /// legacy formatter and an invalid format was provided. - FOLLY_ALWAYS_INLINE bool setFormatter(const arg_type& format) { - auto formatter = detail::getDateTimeFormatter( - std::string_view(format.data(), format.size()), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); - // Legacy formatter returns null for invalid format but Joda formatter - // throws user error. - if (formatter.hasError()) { - if (legacyFormatter_) { - return false; - } - VELOX_USER_FAIL(formatter.error().message()); - } - formatter_ = formatter.value(); - maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_); - return true; - } - const tz::TimeZone* sessionTimeZone_{nullptr}; std::shared_ptr formatter_; uint32_t maxResultSize_; @@ -385,7 +396,13 @@ struct GetTimestampFunction { sessionTimeZone_ = tz::locateZone(sessionTimezoneName); } if (format != nullptr) { - invalidFormat_ = !initializeFormatter(std::string_view(*format)); + auto formatter = detail::initializeFormatter( + std::string_view(*format), legacyFormatter_); + if (formatter.has_value()) { + formatter_ = formatter.value(); + } else { + invalidFormat_ = true; + } } } @@ -397,7 +414,11 @@ struct GetTimestampFunction { return false; } if (!isConstantTimeFormat_) { - if (!initializeFormatter(std::string_view(format))) { + auto formatter = detail::initializeFormatter( + std::string_view(format), legacyFormatter_); + if (formatter.has_value()) { + formatter_ = formatter.value(); + } else { return false; } } @@ -418,23 +439,6 @@ struct GetTimestampFunction { return result.timezone ? result.timezone : sessionTimeZone_; } - bool initializeFormatter(const std::string_view& format) { - auto formatter = detail::getDateTimeFormatter( - std::string_view(format), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); - // Legacy formatter returns null for invalid format but Joda formatter - // throws user error. - if (formatter.hasError()) { - if (legacyFormatter_) { - return false; - } - VELOX_USER_FAIL(formatter.error().message()); - } - formatter_ = formatter.value(); - return true; - } - std::shared_ptr formatter_{nullptr}; bool isConstantTimeFormat_{false}; const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // default to GMT. From ebb0c8ffb6ae4e44059aa7dc5df119328fd7e249 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Tue, 29 Oct 2024 11:16:51 +0800 Subject: [PATCH 06/14] update --- velox/functions/sparksql/DateTimeFunctions.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index dd8fc43b596b..15393bc381d9 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -47,8 +47,9 @@ Expected> getDateTimeFormatter( // // @param format The format string to be used for initializing the formatter. // @param legacyFormatter Determines formatter type. -// @return True if the formatter was successfully set; false if using the -// legacy formatter and an invalid format was provided. +// @return An optional shared pointer to a DateTimeFormatter. If the formatter +// initialization fails and the legacy formatter is used, std::nullopt is +// returned. std::optional> initializeFormatter( const std::string_view& format, bool legacyFormatter) { From 08ae77f77e6ff7c256b765318d63c476f58f65a8 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Tue, 29 Oct 2024 15:29:01 +0800 Subject: [PATCH 07/14] update --- velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index b4fe9bc7d164..acdb7dad4018 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -813,7 +813,8 @@ TEST_F(DateTimeFunctionsTest, getTimestamp) { getTimestamp("2023-07-13 21:34", "yyyy-MM-dd HH:II"), "Specifier I is not supported"); - // Invalid format returns null for legacy date formatter. + // Returns null for invalid datetime format when legacy date formatter is + // used. enableLegacyFormatter(); // Empty format. EXPECT_EQ(getTimestamp("0", ""), std::nullopt); @@ -943,7 +944,8 @@ TEST_F(DateTimeFunctionsTest, fromUnixtime) { VELOX_ASSERT_THROW( fromUnixTime(0, "yyyy-MM-dd HH:II"), "Specifier I is not supported"); - // Invalid format returns null for legacy date formatter. + // Returns null for invalid datetime format when legacy date formatter is + // used. enableLegacyFormatter(); // Empty format. EXPECT_EQ(fromUnixTime(0, ""), std::nullopt); From 28a8e105f00b0d5509c97524b67dab17792a7e6c Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Tue, 29 Oct 2024 16:51:03 +0800 Subject: [PATCH 08/14] update --- velox/functions/sparksql/DateTimeFunctions.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 15393bc381d9..b539a2db18f5 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -50,7 +50,7 @@ Expected> getDateTimeFormatter( // @return An optional shared pointer to a DateTimeFormatter. If the formatter // initialization fails and the legacy formatter is used, std::nullopt is // returned. -std::optional> initializeFormatter( +std::shared_ptr initializeFormatter( const std::string_view& format, bool legacyFormatter) { auto formatter = detail::getDateTimeFormatter( @@ -61,7 +61,7 @@ std::optional> initializeFormatter( // throws user error. if (formatter.hasError()) { if (legacyFormatter) { - return std::nullopt; + return nullptr; } VELOX_USER_FAIL(formatter.error().message()); } @@ -275,8 +275,8 @@ struct FromUnixtimeFunction { if (format != nullptr) { auto formatter = detail::initializeFormatter( std::string_view(*format), legacyFormatter_); - if (formatter.has_value()) { - formatter_ = formatter.value(); + if (formatter) { + formatter_ = formatter; maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_); } else { invalidFormat_ = true; @@ -295,8 +295,8 @@ struct FromUnixtimeFunction { if (!isConstantTimeFormat_) { auto formatter = detail::initializeFormatter( std::string_view(format), legacyFormatter_); - if (formatter.has_value()) { - formatter_ = formatter.value(); + if (formatter) { + formatter_ = formatter; maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_); } else { return false; @@ -399,8 +399,8 @@ struct GetTimestampFunction { if (format != nullptr) { auto formatter = detail::initializeFormatter( std::string_view(*format), legacyFormatter_); - if (formatter.has_value()) { - formatter_ = formatter.value(); + if (formatter) { + formatter_ = formatter; } else { invalidFormat_ = true; } @@ -417,8 +417,8 @@ struct GetTimestampFunction { if (!isConstantTimeFormat_) { auto formatter = detail::initializeFormatter( std::string_view(format), legacyFormatter_); - if (formatter.has_value()) { - formatter_ = formatter.value(); + if (formatter) { + formatter_ = formatter; } else { return false; } From a89c2d49f04f797001215df21293f8dbdee28e84 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Wed, 30 Oct 2024 12:03:45 +0800 Subject: [PATCH 09/14] update cmt --- velox/functions/sparksql/DateTimeFunctions.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index b539a2db18f5..11aa52fc708e 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -40,16 +40,14 @@ Expected> getDateTimeFormatter( } } -// Initialize `formatter_` based on the provided format string. It determines -// formatter type based on the `legacyFormatter` flag. The legacy formatter -// returns false for an invalid format, while the Joda formatter throws a -// Velox user error. +// Creates datetime formatter based on the provided format string. When legacy +// formatter is used, returns nullptr for invalid format; otherwise, throws a +// user error. // // @param format The format string to be used for initializing the formatter. -// @param legacyFormatter Determines formatter type. -// @return An optional shared pointer to a DateTimeFormatter. If the formatter -// initialization fails and the legacy formatter is used, std::nullopt is -// returned. +// @param legacyFormatter Whether legacy formatter is used. +// @return A shared pointer to a DateTimeFormatter. If the formatter +// initialization fails and the legacy formatter is used, nullptr is returned. std::shared_ptr initializeFormatter( const std::string_view& format, bool legacyFormatter) { @@ -57,8 +55,8 @@ std::shared_ptr initializeFormatter( std::string_view(format), legacyFormatter ? DateTimeFormatterType::STRICT_SIMPLE : DateTimeFormatterType::JODA); - // Legacy formatter returns null for invalid format but Joda formatter - // throws user error. + // When legacy formatter is used, returns nullptr for invalid format; + // otherwise, throws a user error. if (formatter.hasError()) { if (legacyFormatter) { return nullptr; From 1431976c0dd9fced66d1b9fce2c98c8d2cbb7039 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Tue, 5 Nov 2024 11:20:12 +0800 Subject: [PATCH 10/14] add doc --- velox/docs/functions/spark/datetime.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/velox/docs/functions/spark/datetime.rst b/velox/docs/functions/spark/datetime.rst index 4b3305891dae..5423ba0306fd 100644 --- a/velox/docs/functions/spark/datetime.rst +++ b/velox/docs/functions/spark/datetime.rst @@ -86,10 +86,10 @@ These functions support TIMESTAMP and DATE input types. date formatter in lenient mode that is align with Spark legacy date parser behavior or `Joda ` date formatter depends on ``spark.legacy_date_formatter`` configuration. `Valid patterns for date format - `_. Throws exception for - invalid ``format``. This function will convert input to milliseconds, and integer overflow is - allowed in the conversion, which aligns with Spark. See the below third example where INT64_MAX - is used, -1000 milliseconds are produced by INT64_MAX * 1000 due to integer overflow. :: + `_. When `Simple` Date Formatter is used, + it returns null for invalid ``format``; if not, it throws exception. This function will convert input to + milliseconds, and integer overflow is allowed in the conversion, which aligns with Spark. See the below third + example where INT64_MAX is used, -1000 milliseconds are produced by INT64_MAX * 1000 due to integer overflow. :: SELECT from_unixtime(100, 'yyyy-MM-dd HH:mm:ss'); -- '1970-01-01 00:01:40' SELECT from_unixtime(3600, 'yyyy'); -- '1970' From 114ca06fb30ddf90cf803e4e69547dee335bfd1f Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Tue, 5 Nov 2024 11:48:59 +0800 Subject: [PATCH 11/14] add doc --- velox/docs/functions/spark/datetime.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/velox/docs/functions/spark/datetime.rst b/velox/docs/functions/spark/datetime.rst index 5423ba0306fd..44f32c5d39df 100644 --- a/velox/docs/functions/spark/datetime.rst +++ b/velox/docs/functions/spark/datetime.rst @@ -113,7 +113,11 @@ These functions support TIMESTAMP and DATE input types. The format follows Spark's `Datetime patterns `_. - Returns NULL for parsing error or NULL input. Throws exception for invalid format. :: + Using `Simple ` + date formatter in lenient mode that is align with Spark legacy date parser behavior or + `Joda ` date formatter depends on ``spark.legacy_date_formatter`` configuration. + Returns NULL for parsing error or NULL input. When `Simple` Date Formatter is used, + it returns null for invalid ``dateFormat``; if not, it throws exception. :: SELECT get_timestamp('1970-01-01', 'yyyy-MM-dd); -- timestamp `1970-01-01` SELECT get_timestamp('1970-01-01', 'yyyy-MM'); -- NULL (parsing error) From fc5d2dae93f07acaa3ab94812a32ef8ffac3e87a Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Tue, 5 Nov 2024 16:18:31 +0800 Subject: [PATCH 12/14] update --- velox/docs/functions/spark/datetime.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/velox/docs/functions/spark/datetime.rst b/velox/docs/functions/spark/datetime.rst index 44f32c5d39df..17d016b86cec 100644 --- a/velox/docs/functions/spark/datetime.rst +++ b/velox/docs/functions/spark/datetime.rst @@ -82,12 +82,12 @@ These functions support TIMESTAMP and DATE input types. Adjusts ``unixTime`` (elapsed seconds since UNIX epoch) to configured session timezone, then converts it to a formatted time string according to ``format``. Only supports BIGINT type for - ``unixTime``. Using `Simple ` + ``unixTime``. Using `Simple `_ date formatter in lenient mode that is align with Spark legacy date parser behavior or - `Joda ` date formatter depends on ``spark.legacy_date_formatter`` configuration. + `Joda `_ date formatter depends on ``spark.legacy_date_formatter`` configuration. `Valid patterns for date format - `_. When `Simple` Date Formatter is used, - it returns null for invalid ``format``; if not, it throws exception. This function will convert input to + `_. When `Simple` date formatter is used, + null is returned for invalid ``format``; otherwise, exception is thrown. This function will convert input to milliseconds, and integer overflow is allowed in the conversion, which aligns with Spark. See the below third example where INT64_MAX is used, -1000 milliseconds are produced by INT64_MAX * 1000 due to integer overflow. :: @@ -113,11 +113,11 @@ These functions support TIMESTAMP and DATE input types. The format follows Spark's `Datetime patterns `_. - Using `Simple ` + Using `Simple `_ date formatter in lenient mode that is align with Spark legacy date parser behavior or - `Joda ` date formatter depends on ``spark.legacy_date_formatter`` configuration. - Returns NULL for parsing error or NULL input. When `Simple` Date Formatter is used, - it returns null for invalid ``dateFormat``; if not, it throws exception. :: + `Joda `_ date formatter depends on ``spark.legacy_date_formatter`` configuration. + Returns NULL for parsing error or NULL input. When `Simple` date formatter is used, null is returned for invalid + ``dateFormat``; otherwise, exception is thrown. :: SELECT get_timestamp('1970-01-01', 'yyyy-MM-dd); -- timestamp `1970-01-01` SELECT get_timestamp('1970-01-01', 'yyyy-MM'); -- NULL (parsing error) @@ -292,8 +292,8 @@ These functions support TIMESTAMP and DATE input types. .. spark:function:: unix_timestamp() -> integer Returns the current UNIX timestamp in seconds. Using - `Simple ` date formatter in lenient mode - that is align with Spark legacy date parser behavior or `Joda ` date formatter + `Simple `_ date formatter in lenient mode + that is align with Spark legacy date parser behavior or `Joda `_ date formatter depends on the ``spark.legacy_date_formatter`` configuration. .. spark:function:: unix_timestamp(string) -> integer From 1119cbb91a5303fa5b38ba91081e7a4eafbe94f8 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Wed, 6 Nov 2024 10:21:38 +0800 Subject: [PATCH 13/14] update --- velox/functions/sparksql/DateTimeFunctions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 11aa52fc708e..3cd2b7afb7ce 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -49,7 +49,7 @@ Expected> getDateTimeFormatter( // @return A shared pointer to a DateTimeFormatter. If the formatter // initialization fails and the legacy formatter is used, nullptr is returned. std::shared_ptr initializeFormatter( - const std::string_view& format, + const std::string_view format, bool legacyFormatter) { auto formatter = detail::getDateTimeFormatter( std::string_view(format), From 3ec615141cf8edb0c17f9d6fbec9fc6dde1579ba Mon Sep 17 00:00:00 2001 From: Jaime Pan <33685703+NEUpanning@users.noreply.github.com> Date: Thu, 7 Nov 2024 13:02:13 +0800 Subject: [PATCH 14/14] Update DateTimeFunctions.h --- velox/functions/sparksql/DateTimeFunctions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 3cd2b7afb7ce..4fa631701d5c 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -48,7 +48,7 @@ Expected> getDateTimeFormatter( // @param legacyFormatter Whether legacy formatter is used. // @return A shared pointer to a DateTimeFormatter. If the formatter // initialization fails and the legacy formatter is used, nullptr is returned. -std::shared_ptr initializeFormatter( +inline std::shared_ptr initializeFormatter( const std::string_view format, bool legacyFormatter) { auto formatter = detail::getDateTimeFormatter(