Skip to content

Commit

Permalink
ICU-22559 Hardcode the macroregions and add a test
Browse files Browse the repository at this point in the history
  • Loading branch information
sffc committed Oct 27, 2023
1 parent 3d1dee6 commit 96bf4bf
Showing 1 changed file with 74 additions and 16 deletions.
90 changes: 74 additions & 16 deletions icu4c/source/common/loclikelysubtags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,52 @@ UBool U_CALLCONV cleanup() {
return true;
}

static const char16_t* MACROREGION_HARDCODE[] = {
u"001~3",
u"005",
u"009",
u"011",
u"013~5",
u"017~9",
u"021",
u"029",
u"030",
u"034~5",
u"039",
u"053~4",
u"057",
u"061",
u"142~3",
u"145",
u"150~1",
u"154~5",
u"202",
u"419",
u"EU",
u"EZ",
u"QO",
u"UN",
};

static const char16_t RANGE_MARKER = 0x7E; /* '~' */
static void processMacroregionRange(const UnicodeString& regionName, UVector* newMacroRegions, UErrorCode& status) {
int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
char16_t buf[6];
regionName.extract(buf,6,status);
if ( rangeMarkerLocation > 0 ) {
char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
buf[rangeMarkerLocation] = 0;
while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) {
LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
newMacroRegions->adoptElement(newRegion.orphan(),status);
buf[rangeMarkerLocation-1]++;
}
} else {
LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
newMacroRegions->adoptElement(newRegion.orphan(),status);
}
}

UVector* loadMacroregions(UErrorCode &status) {
LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);

Expand All @@ -365,24 +410,32 @@ UVector* loadMacroregions(UErrorCode &status) {
return nullptr;
}

while (U_SUCCESS(status) && ures_hasNext(regionMacro.getAlias())) {
while (ures_hasNext(regionMacro.getAlias())) {
UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),nullptr,&status);
int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
char16_t buf[6];
regionName.extract(buf,6,status);
if ( rangeMarkerLocation > 0 ) {
char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
buf[rangeMarkerLocation] = 0;
while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) {
LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
newMacroRegions->adoptElement(newRegion.orphan(),status);
buf[rangeMarkerLocation-1]++;
}
} else {
LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
newMacroRegions->adoptElement(newRegion.orphan(),status);
processMacroregionRange(regionName, newMacroRegions.getAlias(), status);
if (U_FAILURE(status)) {
return nullptr;
}
}

return newMacroRegions.orphan();
}

UVector* getStaticMacroregions(UErrorCode &status) {
LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);

if (U_FAILURE(status)) {
return nullptr;
}

for (size_t i=0; i<UPRV_LENGTHOF(MACROREGION_HARDCODE); i++) {
UnicodeString regionName(MACROREGION_HARDCODE[i]);
processMacroregionRange(regionName, newMacroRegions.getAlias(), status);
if (U_FAILURE(status)) {
return nullptr;
}
}

return newMacroRegions.orphan();
}

Expand All @@ -395,7 +448,12 @@ void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
data.load(errorCode);
if (U_FAILURE(errorCode)) { return; }
gLikelySubtags = new XLikelySubtags(data);
gMacroregions = loadMacroregions(errorCode);
gMacroregions = getStaticMacroregions(errorCode);
#if U_DEBUG
auto macroregionsFromData = loadMacroregions(errorCode);
U_ASSERT((*gMacroregions) == (*macroregionsFromData));
delete macroregionsFromData;
#endif
if (U_FAILURE(errorCode) || gLikelySubtags == nullptr || gMacroregions == nullptr) {
delete gLikelySubtags;
delete gMacroregions;
Expand Down

0 comments on commit 96bf4bf

Please sign in to comment.