Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-22696 Update ulocimp_to*{Key,Type}() to use std::string_view #3073

Merged
merged 1 commit into from
Aug 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions icu4c/source/common/localebuilder.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include <optional>
#include <string_view>
#include <utility>

#include "bytesinkutil.h" // StringByteSink<CharString>
@@ -162,12 +164,15 @@ _isKeywordValue(const char* key, const char* value, int32_t value_len)
// otherwise: unicode extension value
// We need to convert from legacy key/value to unicode
// key/value
const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);

return unicode_locale_key && unicode_locale_type &&
ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
ultag_isUnicodeLocaleType(unicode_locale_type, -1);
std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key);
std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value);

return unicode_locale_key.has_value() &&
unicode_locale_type.has_value() &&
ultag_isUnicodeLocaleKey(unicode_locale_key->data(),
static_cast<int32_t>(unicode_locale_key->size())) &&
ultag_isUnicodeLocaleType(unicode_locale_type->data(),
static_cast<int32_t>(unicode_locale_type->size()));
}

void
52 changes: 20 additions & 32 deletions icu4c/source/common/locid.cpp
Original file line number Diff line number Diff line change
@@ -31,6 +31,8 @@
******************************************************************************
*/

#include <optional>
#include <string_view>
#include <utility>

#include "unicode/bytestream.h"
@@ -1570,8 +1572,8 @@ AliasReplacer::replaceTransformedExtensions(
// Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
*const_cast<char*>(tvalue++) = '\0'; // NUL terminate tkey
output.append(tfield, status).append('-', status);
const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue);
output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
std::optional<std::string_view> bcpTValue = ulocimp_toBcpType(tfield, tvalue);
output.append(bcpTValue.has_value() ? *bcpTValue : tvalue, status);
}
}
if (U_FAILURE(status)) {
@@ -2608,33 +2610,26 @@ Locale::getUnicodeKeywordValue(StringPiece keywordName,
return;
}

// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
if (U_FAILURE(status)) {
return;
}

const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
if (legacy_key == nullptr) {
std::optional<std::string_view> legacy_key = ulocimp_toLegacyKeyWithFallback(keywordName);
if (!legacy_key.has_value()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}

auto legacy_value = getKeywordValue<CharString>(legacy_key, status);
auto legacy_value = getKeywordValue<CharString>(*legacy_key, status);

if (U_FAILURE(status)) {
return;
}

const char* unicode_value = uloc_toUnicodeLocaleType(
keywordName_nul.data(), legacy_value.data());

if (unicode_value == nullptr) {
std::optional<std::string_view> unicode_value =
ulocimp_toBcpTypeWithFallback(keywordName, legacy_value.toStringPiece());
if (!unicode_value.has_value()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}

sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
sink.Append(unicode_value->data(), static_cast<int32_t>(unicode_value->size()));
}

void
@@ -2699,32 +2694,25 @@ Locale::setUnicodeKeywordValue(StringPiece keywordName,
return;
}

// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
const CharString keywordValue_nul(keywordValue, status);
if (U_FAILURE(status)) {
return;
}

const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
if (legacy_key == nullptr) {
std::optional<std::string_view> legacy_key = ulocimp_toLegacyKeyWithFallback(keywordName);
if (!legacy_key.has_value()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}

const char* legacy_value = nullptr;

if (!keywordValue_nul.isEmpty()) {
legacy_value =
uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
std::string_view value;

if (legacy_value == nullptr) {
if (!keywordValue.empty()) {
std::optional<std::string_view> legacy_value =
ulocimp_toLegacyTypeWithFallback(keywordName, keywordValue);
if (!legacy_value.has_value()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
value = *legacy_value;
}

setKeywordValue(legacy_key, legacy_value, status);
setKeywordValue(*legacy_key, value, status);
}

const char *
84 changes: 54 additions & 30 deletions icu4c/source/common/uloc.cpp
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@
l = lang, C = ctry, M = charmap, V = variant
*/

#include <algorithm>
#include <optional>
#include <string_view>

@@ -2291,8 +2292,17 @@ uloc_getISOCountries()
U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleKey(const char* keyword)
{
const char* bcpKey = ulocimp_toBcpKey(keyword);
if (bcpKey == nullptr && ultag_isUnicodeLocaleKey(keyword, -1)) {
if (keyword == nullptr || *keyword == '\0') { return nullptr; }
std::optional<std::string_view> result = ulocimp_toBcpKeyWithFallback(keyword);
return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated.
}

U_EXPORT std::optional<std::string_view>
ulocimp_toBcpKeyWithFallback(std::string_view keyword)
{
std::optional<std::string_view> bcpKey = ulocimp_toBcpKey(keyword);
if (!bcpKey.has_value() &&
ultag_isUnicodeLocaleKey(keyword.data(), static_cast<int32_t>(keyword.size()))) {
// unknown keyword, but syntax is fine..
return keyword;
}
@@ -2302,8 +2312,18 @@ uloc_toUnicodeLocaleKey(const char* keyword)
U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
{
const char* bcpType = ulocimp_toBcpType(keyword, value);
if (bcpType == nullptr && ultag_isUnicodeLocaleType(value, -1)) {
if (keyword == nullptr || *keyword == '\0' ||
value == nullptr || *value == '\0') { return nullptr; }
std::optional<std::string_view> result = ulocimp_toBcpTypeWithFallback(keyword, value);
return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated.
}

U_EXPORT std::optional<std::string_view>
ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value)
{
std::optional<std::string_view> bcpType = ulocimp_toBcpType(keyword, value);
if (!bcpType.has_value() &&
ultag_isUnicodeLocaleType(value.data(), static_cast<int32_t>(value.size()))) {
// unknown keyword, but syntax is fine..
return value;
}
@@ -2313,65 +2333,71 @@ uloc_toUnicodeLocaleType(const char* keyword, const char* value)
namespace {

bool
isWellFormedLegacyKey(const char* legacyKey)
isWellFormedLegacyKey(std::string_view key)
{
const char* p = legacyKey;
while (*p) {
if (!UPRV_ISALPHANUM(*p)) {
return false;
}
p++;
}
return true;
return std::all_of(key.begin(), key.end(), UPRV_ISALPHANUM);
}

bool
isWellFormedLegacyType(const char* legacyType)
isWellFormedLegacyType(std::string_view legacyType)
{
const char* p = legacyType;
int32_t alphaNumLen = 0;
while (*p) {
if (*p == '_' || *p == '/' || *p == '-') {
for (char c : legacyType) {
if (c == '_' || c == '/' || c == '-') {
if (alphaNumLen == 0) {
return false;
}
alphaNumLen = 0;
} else if (UPRV_ISALPHANUM(*p)) {
} else if (UPRV_ISALPHANUM(c)) {
alphaNumLen++;
} else {
return false;
}
p++;
}
return (alphaNumLen != 0);
return alphaNumLen != 0;
}

} // namespace

U_CAPI const char* U_EXPORT2
uloc_toLegacyKey(const char* keyword)
{
const char* legacyKey = ulocimp_toLegacyKey(keyword);
if (legacyKey == nullptr) {
if (keyword == nullptr || *keyword == '\0') { return nullptr; }
std::optional<std::string_view> result = ulocimp_toLegacyKeyWithFallback(keyword);
return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated.
}

U_EXPORT std::optional<std::string_view>
ulocimp_toLegacyKeyWithFallback(std::string_view keyword)
{
std::optional<std::string_view> legacyKey = ulocimp_toLegacyKey(keyword);
if (!legacyKey.has_value() && isWellFormedLegacyKey(keyword)) {
// Checks if the specified locale key is well-formed with the legacy locale syntax.
//
// Note:
// LDML/CLDR provides some definition of keyword syntax in
// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
// Keys can only consist of [0-9a-zA-Z].
if (isWellFormedLegacyKey(keyword)) {
return keyword;
}
return keyword;
}
return legacyKey;
}

U_CAPI const char* U_EXPORT2
uloc_toLegacyType(const char* keyword, const char* value)
{
const char* legacyType = ulocimp_toLegacyType(keyword, value);
if (legacyType == nullptr) {
if (keyword == nullptr || *keyword == '\0' ||
value == nullptr || *value == '\0') { return nullptr; }
std::optional<std::string_view> result = ulocimp_toLegacyTypeWithFallback(keyword, value);
return result.has_value() ? result->data() : nullptr; // Known to be NUL terminated.
}

U_EXPORT std::optional<std::string_view>
ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value)
{
std::optional<std::string_view> legacyType = ulocimp_toLegacyType(keyword, value);
if (!legacyType.has_value() && isWellFormedLegacyType(value)) {
// Checks if the specified locale type is well-formed with the legacy locale syntax.
//
// Note:
@@ -2380,9 +2406,7 @@ uloc_toLegacyType(const char* keyword, const char* value)
// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
// Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
// we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
if (isWellFormedLegacyType(value)) {
return value;
}
return value;
}
return legacyType;
}
Loading