Skip to content

Commit

Permalink
ICU-22696 Update ulocimp_to*{Key,Type}() to use std::string_view.
Browse files Browse the repository at this point in the history
  • Loading branch information
roubert committed Jul 31, 2024
1 parent dd65ee3 commit e1544fc
Show file tree
Hide file tree
Showing 6 changed files with 234 additions and 225 deletions.
17 changes: 11 additions & 6 deletions icu4c/source/common/localebuilder.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include <optional>
#include <string_view>
#include <utility>

#include "bytesinkutil.h" // StringByteSink<CharString>
Expand Down Expand Up @@ -162,12 +164,15 @@ _isKeywordValue(const char* key, const char* value, int32_t value_len)
// otherwise: unicode extension value
// We need to convert from legacy key/value to unicode
// key/value
const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);

return unicode_locale_key && unicode_locale_type &&
ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
ultag_isUnicodeLocaleType(unicode_locale_type, -1);
std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKey(key);
std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpType(key, value);

return unicode_locale_key.has_value() &&
unicode_locale_type.has_value() &&
ultag_isUnicodeLocaleKey(unicode_locale_key->data(),
static_cast<int32_t>(unicode_locale_key->size())) &&
ultag_isUnicodeLocaleType(unicode_locale_type->data(),
static_cast<int32_t>(unicode_locale_type->size()));
}

void
Expand Down
64 changes: 26 additions & 38 deletions icu4c/source/common/locid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
******************************************************************************
*/

#include <optional>
#include <string_view>
#include <utility>

#include "unicode/bytestream.h"
Expand Down Expand Up @@ -1570,8 +1572,8 @@ AliasReplacer::replaceTransformedExtensions(
// Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
*const_cast<char*>(tvalue++) = '\0'; // NUL terminate tkey
output.append(tfield, status).append('-', status);
const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue);
output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
std::optional<std::string_view> bcpTValue = ulocimp_toBcpType(tfield, tvalue);
output.append(bcpTValue.has_value() ? *bcpTValue : tvalue, status);
}
}
if (U_FAILURE(status)) {
Expand Down Expand Up @@ -2486,7 +2488,7 @@ const char KeywordEnumeration::fgClassID = '\0';
// Out-of-line virtual destructor to serve as the "key function".
KeywordEnumeration::~KeywordEnumeration() = default;

// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
// A wrapper around KeywordEnumeration that calls ulocimp_toBcpKey() in
// the next() method for each keyword before returning it.
class UnicodeKeywordEnumeration : public KeywordEnumeration {
public:
Expand All @@ -2496,12 +2498,12 @@ class UnicodeKeywordEnumeration : public KeywordEnumeration {
virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
const char* legacy_key = KeywordEnumeration::next(nullptr, status);
while (U_SUCCESS(status) && legacy_key != nullptr) {
const char* key = uloc_toUnicodeLocaleKey(legacy_key);
if (key != nullptr) {
std::optional<std::string_view> key = ulocimp_toBcpKey(legacy_key);
if (key.has_value()) {
if (resultLength != nullptr) {
*resultLength = static_cast<int32_t>(uprv_strlen(key));
*resultLength = static_cast<int32_t>(key->size());
}
return key;
return key->data(); // This is known to be NUL terminated.
}
// Not a Unicode keyword, could be a t, x or other, continue to look at the next one.
legacy_key = KeywordEnumeration::next(nullptr, status);
Expand All @@ -2514,7 +2516,7 @@ class UnicodeKeywordEnumeration : public KeywordEnumeration {
const char *kw = keywords.data();
int32_t result = 0;
while(*kw) {
if (uloc_toUnicodeLocaleKey(kw) != nullptr) {
if (ulocimp_toBcpKey(kw).has_value()) {
result++;
}
kw += uprv_strlen(kw)+1;
Expand Down Expand Up @@ -2608,33 +2610,26 @@ Locale::getUnicodeKeywordValue(StringPiece keywordName,
return;
}

// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
if (U_FAILURE(status)) {
return;
}

const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
if (legacy_key == nullptr) {
std::optional<std::string_view> legacy_key = ulocimp_toLegacyKey(keywordName);
if (!legacy_key.has_value()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}

auto legacy_value = getKeywordValue<CharString>(legacy_key, status);
auto legacy_value = getKeywordValue<CharString>(*legacy_key, status);

if (U_FAILURE(status)) {
return;
}

const char* unicode_value = uloc_toUnicodeLocaleType(
keywordName_nul.data(), legacy_value.data());

if (unicode_value == nullptr) {
std::optional<std::string_view> unicode_value =
ulocimp_toBcpType(keywordName, legacy_value.toStringPiece());
if (!unicode_value.has_value()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}

sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
sink.Append(unicode_value->data(), static_cast<int32_t>(unicode_value->size()));
}

void
Expand Down Expand Up @@ -2699,32 +2694,25 @@ Locale::setUnicodeKeywordValue(StringPiece keywordName,
return;
}

// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
const CharString keywordValue_nul(keywordValue, status);
if (U_FAILURE(status)) {
return;
}

const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
if (legacy_key == nullptr) {
std::optional<std::string_view> legacy_key = ulocimp_toLegacyKey(keywordName);
if (!legacy_key.has_value()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}

const char* legacy_value = nullptr;

if (!keywordValue_nul.isEmpty()) {
legacy_value =
uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
std::string_view value;

if (legacy_value == nullptr) {
if (!keywordValue.empty()) {
std::optional<std::string_view> legacy_value =
ulocimp_toLegacyType(keywordName, keywordValue);
if (!legacy_value.has_value()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
value = *legacy_value;
}

setKeywordValue(legacy_key, legacy_value, status);
setKeywordValue(*legacy_key, value, status);
}

const char *
Expand Down
93 changes: 14 additions & 79 deletions icu4c/source/common/uloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2291,100 +2291,35 @@ uloc_getISOCountries()
U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleKey(const char* keyword)
{
const char* bcpKey = ulocimp_toBcpKey(keyword);
if (bcpKey == nullptr && ultag_isUnicodeLocaleKey(keyword, -1)) {
// unknown keyword, but syntax is fine..
return keyword;
}
return bcpKey;
if (keyword == nullptr || *keyword == '\0') { return nullptr; }
std::optional<std::string_view> bcpKey = ulocimp_toBcpKey(keyword);
return bcpKey.has_value() ? bcpKey->data() : nullptr;
}

U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
{
const char* bcpType = ulocimp_toBcpType(keyword, value);
if (bcpType == nullptr && ultag_isUnicodeLocaleType(value, -1)) {
// unknown keyword, but syntax is fine..
return value;
}
return bcpType;
}

namespace {

bool
isWellFormedLegacyKey(const char* legacyKey)
{
const char* p = legacyKey;
while (*p) {
if (!UPRV_ISALPHANUM(*p)) {
return false;
}
p++;
}
return true;
}

bool
isWellFormedLegacyType(const char* legacyType)
{
const char* p = legacyType;
int32_t alphaNumLen = 0;
while (*p) {
if (*p == '_' || *p == '/' || *p == '-') {
if (alphaNumLen == 0) {
return false;
}
alphaNumLen = 0;
} else if (UPRV_ISALPHANUM(*p)) {
alphaNumLen++;
} else {
return false;
}
p++;
}
return (alphaNumLen != 0);
if (keyword == nullptr || *keyword == '\0' ||
value == nullptr || *value == '\0') { return nullptr; }
std::optional<std::string_view> bcpType = ulocimp_toBcpType(keyword, value);
return bcpType.has_value() ? bcpType->data() : nullptr;
}

} // namespace

U_CAPI const char* U_EXPORT2
uloc_toLegacyKey(const char* keyword)
{
const char* legacyKey = ulocimp_toLegacyKey(keyword);
if (legacyKey == nullptr) {
// Checks if the specified locale key is well-formed with the legacy locale syntax.
//
// Note:
// LDML/CLDR provides some definition of keyword syntax in
// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
// Keys can only consist of [0-9a-zA-Z].
if (isWellFormedLegacyKey(keyword)) {
return keyword;
}
}
return legacyKey;
if (keyword == nullptr || *keyword == '\0') { return nullptr; }
std::optional<std::string_view> legacyKey = ulocimp_toLegacyKey(keyword);
return legacyKey.has_value() ? legacyKey->data() : nullptr;
}

U_CAPI const char* U_EXPORT2
uloc_toLegacyType(const char* keyword, const char* value)
{
const char* legacyType = ulocimp_toLegacyType(keyword, value);
if (legacyType == nullptr) {
// Checks if the specified locale type is well-formed with the legacy locale syntax.
//
// Note:
// LDML/CLDR provides some definition of keyword syntax in
// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
// Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
// we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
if (isWellFormedLegacyType(value)) {
return value;
}
}
return legacyType;
if (keyword == nullptr || *keyword == '\0' ||
value == nullptr || *value == '\0') { return nullptr; }
std::optional<std::string_view> legacyType = ulocimp_toLegacyType(keyword, value);
return legacyType.has_value() ? legacyType->data() : nullptr;
}

/*eof*/
Loading

0 comments on commit e1544fc

Please sign in to comment.