From 6d4cea57ede2786baabc16ffe5a0711f4058e657 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Thu, 11 Apr 2024 11:07:06 -0700 Subject: [PATCH] ICU-22727 fix addLikelySubtags for 5-8 chars lang codes Fix C++ code, add tests for C, C++ and Java API --- icu4c/source/common/loclikely.cpp | 7 +++-- icu4c/source/test/cintltst/cloctst.c | 31 +++++++++++++++++++ icu4c/source/test/intltest/loctest.cpp | 25 +++++++++++++++ .../ibm/icu/dev/test/util/ULocaleTest.java | 20 ++++++++++++ 4 files changed, 81 insertions(+), 2 deletions(-) diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index e8fd91ca79ee..5aa929dc4b15 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -157,14 +157,17 @@ _uloc_addLikelySubtags(const char* localeID, return; } - if (lang.length() > 3) { - if (lang.length() == 4 && script.isEmpty()) { + if (lang.length() == 4) { + if (script.isEmpty()) { script = std::move(lang); lang.clear(); } else { err = U_ILLEGAL_ARGUMENT_ERROR; return; } + } else if (lang.length() > 8) { + err = U_ILLEGAL_ARGUMENT_ERROR; + return; } int32_t trailingLength = (int32_t)uprv_strlen(trailing); diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c index 99187b70877a..77278619fee6 100644 --- a/icu4c/source/test/cintltst/cloctst.c +++ b/icu4c/source/test/cintltst/cloctst.c @@ -3867,6 +3867,27 @@ const char* const basic_maximize_data[][2] = { // so "aaaa" is a well-formed unicode_language_id "aaaa", "aaaa", + }, { + // ICU-22727 + // unicode_language_subtag = alpha{2,3} | alpha{5,8}; + // so "bbbbb", "cccccc", "ddddddd", "eeeeeeee" are + // well-formed unicode_language_subtag and therefore + // well-formed unicode_language_id + // but "fffffffff" is not. + "bbbbb", + "bbbbb", + }, { + // ICU-22727 + "cccccc", + "cccccc", + }, { + // ICU-22727 + "ddddddd", + "ddddddd", + }, { + // ICU-22727 + "eeeeeeee", + "eeeeeeee", }, { // ICU-22546 "und-Zzzz", @@ -6048,6 +6069,16 @@ const errorData maximizeErrors[] = { "en_Latn_US_POSIX@currency=EURO", U_STRING_NOT_TERMINATED_WARNING, 30 + }, + { + // ICU-22727 + // unicode_language_subtag = alpha{2,3} | alpha{5,8}; + // so "bbbbb", "cccccc", "ddddddd", "eeeeeeee" are + // well-formed unicode_language_id but "fffffffff" is not. + "fffffffff", + NULL, + U_ILLEGAL_ARGUMENT_ERROR, + 0 } }; diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp index 50f3d7075a51..8ba259c32bf8 100644 --- a/icu4c/source/test/intltest/loctest.cpp +++ b/icu4c/source/test/intltest/loctest.cpp @@ -3915,6 +3915,31 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() { "aaaa", "aaaa", "aaaa", + }, { + // ICU-22727 + // unicode_language_subtag = alpha{2,3} | alpha{5,8}; + // so "bbbbb", "cccccc", "ddddddd", "eeeeeeee" are + // well-formed unicode_language_subtag and therefore + // well-formed unicode_language_id + // but "fffffffff" is not. + "bbbbb", + "bbbbb", + "bbbbb", + }, { + // ICU-22727 + "cccccc", + "cccccc", + "cccccc", + }, { + // ICU-22727 + "ddddddd", + "ddddddd", + "ddddddd", + }, { + // ICU-22727 + "eeeeeeee", + "eeeeeeee", + "eeeeeeee", }, { // ICU-22546 "und-Zzzz", diff --git a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/util/ULocaleTest.java index e4376270d9b1..c7a8c1c889e0 100644 --- a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/util/ULocaleTest.java +++ b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/util/ULocaleTest.java @@ -1925,6 +1925,26 @@ public void TestAddLikelySubtags() { // so "aaaa" is a well-formed unicode_language_id "aaaa", "aaaa", + }, { + // ICU-22727 + // unicode_language_subtag = alpha{2,3} | alpha{5,8}; + // so "bbbbb", "cccccc", "ddddddd", "eeeeeeee" are + // well-formed unicode_language_subtag and therefore + // well-formed unicode_language_id + "bbbbb", + "bbbbb", + }, { + // ICU-22727 + "cccccc", + "cccccc", + }, { + // ICU-22727 + "ddddddd", + "ddddddd", + }, { + // ICU-22727 + "eeeeeeee", + "eeeeeeee", }, { // ICU-22546 "und-Zzzz",