diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 2741b84aabf0..75fb64711bb2 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2400,6 +2400,7 @@ MeasureUnitImpl MeasureUnitImpl::copy(UErrorCode &status) const { MeasureUnitImpl result; result.complexity = complexity; result.identifier.append(identifier, status); + result.constantDenominator = constantDenominator; for (int32_t i = 0; i < singleUnits.length(); i++) { SingleUnitImpl *item = result.singleUnits.emplaceBack(*singleUnits[i]); if (!item) { diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index a6348422738b..52b7d277bafc 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -467,37 +467,55 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { class Token { public: - Token(int32_t match) : fMatch(match) {} - - enum Type { - TYPE_UNDEFINED, - TYPE_PREFIX, - // Token type for "-per-", "-", and "-and-". - TYPE_COMPOUND_PART, - // Token type for "per-". - TYPE_INITIAL_COMPOUND_PART, - TYPE_POWER_PART, - TYPE_SIMPLE_UNIT, - }; - - // Calling getType() is invalid, resulting in an assertion failure, if Token - // value isn't positive. - Type getType() const { - U_ASSERT(fMatch > 0); - if (fMatch < kCompoundPartOffset) { - return TYPE_PREFIX; - } - if (fMatch < kInitialCompoundPartOffset) { - return TYPE_COMPOUND_PART; - } - if (fMatch < kPowerPartOffset) { - return TYPE_INITIAL_COMPOUND_PART; - } - if (fMatch < kSimpleUnitOffset) { - return TYPE_POWER_PART; - } - return TYPE_SIMPLE_UNIT; - } + Token(int64_t match) : fMatch(match) { + if (fMatch < kCompoundPartOffset) { + this->fType = TYPE_PREFIX; + } else if (fMatch < kInitialCompoundPartOffset) { + this->fType = TYPE_COMPOUND_PART; + } else if (fMatch < kPowerPartOffset) { + this->fType = TYPE_INITIAL_COMPOUND_PART; + } else if (fMatch < kSimpleUnitOffset) { + this->fType = TYPE_POWER_PART; + } else { + this->fType = TYPE_SIMPLE_UNIT; + } + } + + static Token constantToken(StringPiece str, UErrorCode &status) { + Token result; + auto value = result.parseStrigToLong(str, status); + if (U_FAILURE(status)) { + return result; + } + result.fMatch = value; + result.fType = TYPE_CONSTANT_DENOMINATOR; + return result; + } + + enum Type { + TYPE_UNDEFINED, + TYPE_PREFIX, + // Token type for "-per-", "-", and "-and-". + TYPE_COMPOUND_PART, + // Token type for "per-". + TYPE_INITIAL_COMPOUND_PART, + TYPE_POWER_PART, + TYPE_SIMPLE_UNIT, + TYPE_CONSTANT_DENOMINATOR, + }; + + // Calling getType() is invalid, resulting in an assertion failure, if Token + // value isn't positive. + Type getType() const { + U_ASSERT(fMatch >= 0); + return this->fType; + } + + // Retrieve the value of the constant denominator if the token is of type TYPE_CONSTANT_DENOMINATOR. + uint64_t getConstantDenominator() const { + U_ASSERT(getType() == TYPE_CONSTANT_DENOMINATOR); + return static_cast(fMatch); + } UMeasurePrefix getUnitPrefix() const { U_ASSERT(getType() == TYPE_PREFIX); @@ -530,8 +548,116 @@ class Token { return fMatch - kSimpleUnitOffset; } + // TODO: Consider split function as a utility function. + // Parse the given string to a unsigned long value. + // If the value is not positive integer, it will return `kUnitIdentifierSyntaxError`. + uint64_t parseStrigToLong(const StringPiece str, UErrorCode &status) { + uint64_t result = 0; + uint64_t max_int64 = ~((int64_t)1 << 63); // the maximum number of int64_t + + // Check for empty string + if (str.empty()) { + status = kUnitIdentifierSyntaxError; + return result; + } + + int32_t exponent = 0; + int32_t exponentIndex = str.length(); + + // Iterate through the string + for (int32_t i = 0; i < str.length(); ++i) { + char c = str.data()[i]; + + // Handle sign + if (i == 0 && c == '+') { + continue; // Skip leading plus sign + } + if (i == 0 && c == '-') { + status = kUnitIdentifierSyntaxError; + return result; + } + + // Handle digits + if (c >= '0' && c <= '9') { + uint16_t digit = c - '0'; + result = result * 10 + digit; + + // Check if the result is within the valid range (0 to int64_t::MAX) + if (result > max_int64) { + status = kUnitIdentifierSyntaxError; + return result; + } + continue; + } + + // Handle 'e' or 'E' + if (c == 'e' || c == 'E') { + exponentIndex = i + 1; + break; + } + + // Invalid character + status = kUnitIdentifierSyntaxError; + return result; + } + + // Handle exponent + for (int i = exponentIndex; i < str.length(); ++i) { + char c = str.data()[i]; + + // handle sign + if (i == exponentIndex && c == '+') { + continue; // Skip leading plus sign + } + + if (i == exponentIndex && c == '-') { + // Negative sign is not allowed for the exponent. + status = kUnitIdentifierSyntaxError; + return result; + } + + // Handle digits + if (c >= '0' && c <= '9') { + uint16_t digit = c - '0'; + exponent = exponent * 10 + digit; + + // Check if the exponent is within the valid range (0 to int32_t::MAX) + if (exponent > 18) { + status = kUnitIdentifierSyntaxError; + return result; + } + + continue; + } + + // Invalid character + status = kUnitIdentifierSyntaxError; + return result; + } + + // Apply the exponent + for (int32_t i = 0; i < exponent; ++i) { + result *= 10; + + if (result > max_int64) { + status = kUnitIdentifierSyntaxError; + return result; + } + } + + // Check if the result is within the valid range (1 to max_int64) + if ( result < 1 || result > max_int64) { + status = kUnitIdentifierSyntaxError; + return result; // add it for code consistency. + } + + return result; + } + private: - int32_t fMatch; + Token() = default; + int64_t fMatch; + Type fType = TYPE_UNDEFINED; }; class Parser { @@ -555,6 +681,50 @@ class Parser { return {source}; } + /** + * A single unit or a constant denominator. + */ + struct SingleUnitOrConstant { + enum ValueType { + kSingleUnit, + kConstantDenominator, + }; + + ValueType type = kSingleUnit; + SingleUnitImpl singleUnit; + uint64_t constantDenominator; + + static SingleUnitOrConstant singleUnitValue(SingleUnitImpl singleUnit) { + SingleUnitOrConstant result; + result.type = kSingleUnit; + result.singleUnit = singleUnit; + result.constantDenominator = 0; + return result; + } + + static SingleUnitOrConstant constantDenominatorValue(uint64_t constant) { + SingleUnitOrConstant result; + result.type = kConstantDenominator; + result.singleUnit = {}; + result.constantDenominator = constant; + return result; + } + + uint64_t getConstantDenominator() const { + U_ASSERT(type == kConstantDenominator); + return constantDenominator; + } + + SingleUnitImpl getSingleUnit() const { + U_ASSERT(type == kSingleUnit); + return singleUnit; + } + + bool isSingleUnit() const { return type == kSingleUnit; } + + bool isConstantDenominator() const { return type == kConstantDenominator; } + }; + MeasureUnitImpl parse(UErrorCode& status) { MeasureUnitImpl result; @@ -569,12 +739,19 @@ class Parser { while (hasNext()) { bool sawAnd = false; - SingleUnitImpl singleUnit = nextSingleUnit(sawAnd, status); + auto singleUnitOrConstant = nextSingleUnitOrConstant(sawAnd, status); if (U_FAILURE(status)) { return result; } - bool added = result.appendSingleUnit(singleUnit, status); + if (singleUnitOrConstant.isConstantDenominator()) { + result.constantDenominator = singleUnitOrConstant.getConstantDenominator(); + result.complexity = UMEASURE_UNIT_COMPOUND; + continue; + } + + U_ASSERT(singleUnitOrConstant.isSingleUnit()); + bool added = result.appendSingleUnit(singleUnitOrConstant.getSingleUnit(), status); if (U_FAILURE(status)) { return result; } @@ -604,6 +781,12 @@ class Parser { } } + if (result.singleUnits.length() == 0) { + // The identifier was empty or only had a constant denominator. + status = kUnitIdentifierSyntaxError; + return result; // add it for code consistency. + } + return result; } @@ -622,6 +805,10 @@ class Parser { // identifier is invalid pending TODO(CLDR-13701). bool fAfterPer = false; + // Set to true when we've just seen a "per-". This is used to determine if + // the next token can be a constant denominator token. + bool fJustSawPer = false; + Parser() : fSource(""), fTrie(u"") {} Parser(StringPiece source) @@ -640,6 +827,10 @@ class Parser { // Saves the position in the fSource string for the end of the most // recent matching token. int32_t previ = -1; + + // Saves the position in the fSource string for later use in case of unit constant found. + int32_t currentFIndex = fIndex; + // Find the longest token that matches a value in the trie: while (fIndex < fSource.length()) { auto result = fTrie.next(fSource.data()[fIndex++]); @@ -658,12 +849,33 @@ class Parser { // continue; } - if (match < 0) { - status = kUnitIdentifierSyntaxError; - } else { + if (match >= 0) { fIndex = previ; + return {match}; } - return {match}; + + // the index of the character after the last character of the constant denominator. + int32_t endOfConstantIndex = -1; + // If no match was found, we check if the token is a constant denominator. + // 1. find the first `-` from the `currentFIndex` to the end. + for (int32_t i = currentFIndex; i < fSource.length(); ++i) { + if (fSource.data()[i] == '-') { + endOfConstantIndex = i; + break; + } + } + if (endOfConstantIndex == -1) { + endOfConstantIndex = fSource.length(); + } + if (endOfConstantIndex <= currentFIndex) { + status = kUnitIdentifierSyntaxError; + return {match}; + } + + StringPiece constantDenominatorStr = + fSource.substr(currentFIndex, endOfConstantIndex - currentFIndex); + fIndex = endOfConstantIndex; + return Token::constantToken(constantDenominatorStr, status); } /** @@ -680,10 +892,10 @@ class Parser { * unit", sawAnd is set to true. If not, it is left as is. * @param status ICU error code. */ - SingleUnitImpl nextSingleUnit(bool &sawAnd, UErrorCode &status) { - SingleUnitImpl result; + SingleUnitOrConstant nextSingleUnitOrConstant(bool &sawAnd, UErrorCode &status) { + SingleUnitImpl singleUnitResult; if (U_FAILURE(status)) { - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } // state: @@ -695,19 +907,22 @@ class Parser { bool atStart = fIndex == 0; Token token = nextToken(status); if (U_FAILURE(status)) { - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } + fJustSawPer = false; + if (atStart) { // Identifiers optionally start with "per-". if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) { U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER); fAfterPer = true; - result.dimensionality = -1; + fJustSawPer = true; + singleUnitResult.dimensionality = -1; token = nextToken(status); if (U_FAILURE(status)) { - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } } } else { @@ -715,7 +930,7 @@ class Parser { // via a compound part: if (token.getType() != Token::TYPE_COMPOUND_PART) { status = kUnitIdentifierSyntaxError; - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } switch (token.getMatch()) { @@ -724,15 +939,16 @@ class Parser { // Mixed compound units not yet supported, // TODO(CLDR-13701). status = kUnitIdentifierSyntaxError; - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } fAfterPer = true; - result.dimensionality = -1; + fJustSawPer = true; + singleUnitResult.dimensionality = -1; break; case COMPOUND_PART_TIMES: if (fAfterPer) { - result.dimensionality = -1; + singleUnitResult.dimensionality = -1; } break; @@ -741,7 +957,7 @@ class Parser { // Can't start with "-and-", and mixed compound units // not yet supported, TODO(CLDR-13701). status = kUnitIdentifierSyntaxError; - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } sawAnd = true; break; @@ -749,52 +965,61 @@ class Parser { token = nextToken(status); if (U_FAILURE(status)) { - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); + } + } + + if (token.getType() == Token::TYPE_CONSTANT_DENOMINATOR) { + if (!fJustSawPer) { + status = kUnitIdentifierSyntaxError; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } + + return SingleUnitOrConstant::constantDenominatorValue(token.getConstantDenominator()); } // Read tokens until we have a complete SingleUnit or we reach the end. while (true) { switch (token.getType()) { - case Token::TYPE_POWER_PART: - if (state > 0) { - status = kUnitIdentifierSyntaxError; - return result; - } - result.dimensionality *= token.getPower(); - state = 1; - break; - - case Token::TYPE_PREFIX: - if (state > 1) { - status = kUnitIdentifierSyntaxError; - return result; - } - result.unitPrefix = token.getUnitPrefix(); - state = 2; - break; - - case Token::TYPE_SIMPLE_UNIT: - result.index = token.getSimpleUnitIndex(); - return result; + case Token::TYPE_POWER_PART: + if (state > 0) { + status = kUnitIdentifierSyntaxError; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); + } + singleUnitResult.dimensionality *= token.getPower(); + state = 1; + break; - default: + case Token::TYPE_PREFIX: + if (state > 1) { status = kUnitIdentifierSyntaxError; - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); + } + singleUnitResult.unitPrefix = token.getUnitPrefix(); + state = 2; + break; + + case Token::TYPE_SIMPLE_UNIT: + singleUnitResult.index = token.getSimpleUnitIndex(); + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); + + default: + status = kUnitIdentifierSyntaxError; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } if (!hasNext()) { // We ran out of tokens before finding a complete single unit. status = kUnitIdentifierSyntaxError; - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } token = nextToken(status); if (U_FAILURE(status)) { - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } } - return result; + return SingleUnitOrConstant::singleUnitValue(singleUnitResult); } }; @@ -1145,6 +1370,7 @@ void MeasureUnitImpl::serialize(UErrorCode &status) { CharString result; bool beforePer = true; bool firstTimeNegativeDimension = false; + bool constantDenominatorAppended = false; for (int32_t i = 0; i < this->singleUnits.length(); i++) { if (beforePer && (*this->singleUnits[i]).dimensionality < 0) { beforePer = false; @@ -1168,43 +1394,95 @@ void MeasureUnitImpl::serialize(UErrorCode &status) { } else { result.append(StringPiece("-per-"), status); } - } else { - if (result.length() != 0) { + + if (this->constantDenominator != 0) { + result.appendNumber(this->constantDenominator, status); result.append(StringPiece("-"), status); + constantDenominatorAppended = true; } + + } else if (result.length() != 0) { + result.append(StringPiece("-"), status); } } this->singleUnits[i]->appendNeutralIdentifier(result, status); } + if (!constantDenominatorAppended && this->constantDenominator != 0) { + result.append(StringPiece("-per-"), status); + result.appendNumber(this->constantDenominator, status); + } + + if (U_FAILURE(status)) { + return; + } this->identifier = CharString(result, status); } -MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && { +MeasureUnit MeasureUnitImpl::build(UErrorCode &status) && { this->serialize(status); return MeasureUnit(std::move(*this)); } -MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) { +MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode &status) { return Parser::from(identifier, status).parse(status).build(status); } -UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { +UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode &status) const { MeasureUnitImpl temp; return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity; } -UMeasurePrefix MeasureUnit::getPrefix(UErrorCode& status) const { +UMeasurePrefix MeasureUnit::getPrefix(UErrorCode &status) const { return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix; } -MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, UErrorCode& status) const UPRV_NO_SANITIZE_UNDEFINED { +MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, + UErrorCode &status) const UPRV_NO_SANITIZE_UNDEFINED { SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); singleUnit.unitPrefix = prefix; return singleUnit.build(status); } +int64_t MeasureUnit::getConstantDenominator(UErrorCode &status) const { + auto complexity = this->getComplexity(status); + if (U_FAILURE(status)) { + return 0; + } + + if (complexity != UMEASURE_UNIT_SINGLE && complexity != UMEASURE_UNIT_COMPOUND) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if (this->fImpl == nullptr) { + return 0; + } + + return this->fImpl->constantDenominator; +} + +MeasureUnit MeasureUnit::withConstantDenominator(int64_t denominator, UErrorCode &status) const { + auto complexity = this->getComplexity(status); + if (U_FAILURE(status)) { + return {}; + } + if (complexity != UMEASURE_UNIT_SINGLE && complexity != UMEASURE_UNIT_COMPOUND) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } + + MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); + if (U_FAILURE(status)) { + return {}; + } + + impl.constantDenominator = denominator; + impl.complexity = UMEASURE_UNIT_COMPOUND; + return std::move(impl).build(status); +} + int32_t MeasureUnit::getDimensionality(UErrorCode& status) const { SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); if (U_FAILURE(status)) { return 0; } @@ -1222,6 +1500,11 @@ MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); + // The reciprocal of a unit that has a constant denominator is not allowed. + if (impl.constantDenominator != 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } impl.takeReciprocal(status); return std::move(impl).build(status); } diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h index f6a8f90dc94f..db31435944c2 100644 --- a/icu4c/source/i18n/measunit_impl.h +++ b/icu4c/source/i18n/measunit_impl.h @@ -328,6 +328,14 @@ class U_I18N_API MeasureUnitImpl : public UMemory { */ CharString identifier; + /** + * Represents the unit constant denominator. + * + * NOTE: + * if set to 0, it means that the constant is not set. + */ + uint64_t constantDenominator = 0; + // For calling serialize // TODO(icu-units#147): revisit serialization friend class number::impl::LongNameHandler; diff --git a/icu4c/source/i18n/number_fluent.cpp b/icu4c/source/i18n/number_fluent.cpp index 0ce01c854cae..5f2f740df71b 100644 --- a/icu4c/source/i18n/number_fluent.cpp +++ b/icu4c/source/i18n/number_fluent.cpp @@ -654,9 +654,6 @@ void LocalizedNumberFormatter::formatImpl(impl::UFormattedNumberData* results, U } else { NumberFormatterImpl::formatStatic(fMacros, results, status); } - if (U_FAILURE(status)) { - return; - } results->getStringRef().writeTerminator(status); } diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index b23897192eb4..917e57a38d6f 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -552,6 +552,40 @@ class U_I18N_API MeasureUnit: public UObject { */ UMeasurePrefix getPrefix(UErrorCode& status) const; + /** + * Creates a new MeasureUnit with a specified constant denominator. + * + * This method is applicable only to COMPOUND and SINGLE units. If invoked on a + * MIXED unit, an error will be set in the status. + * + * NOTE: If the constant denominator is set to 0, it means that you are removing + * the constant denominator. + * + * @param denominator The constant denominator to set. + * @param status Set if this is not a COMPOUND or SINGLE unit or if another error occurs. + * @return A new MeasureUnit with the specified constant denominator. + * @draft ICU 77 + */ + MeasureUnit withConstantDenominator(int64_t denominator, UErrorCode &status) const; + + /** + * Retrieves the constant denominator for this COMPOUND unit. + * + * Examples: + * - For the unit "liter-per-1000-kiloliter", the constant denominator is 1000. + * - For the unit "liter-per-kilometer", the constant denominator is zero. + * + * This method is applicable only to COMPOUND and SINGLE units. If invoked on + * a MIXED unit, an error will be set in the status. + * + * NOTE: If no constant denominator exists, the method returns 0. + * + * @param status Set if this is not a COMPOUND or SINGLE unit or if another error occurs. + * @return The value of the constant denominator. + * @draft ICU 77 + */ + int64_t getConstantDenominator(UErrorCode &status) const; + /** * Creates a MeasureUnit which is this SINGLE unit augmented with the specified dimensionality * (power). For example, if dimensionality is 2, the unit will be squared. @@ -591,7 +625,9 @@ class U_I18N_API MeasureUnit: public UObject { * NOTE: Only works on SINGLE and COMPOUND units. If this is a MIXED unit, an error will * occur. For more information, see UMeasureUnitComplexity. * - * @param status Set if this is a MIXED unit or if another error occurs. + * NOTE: An Error will be returned for units that have a constant denominator. + * + * @param status Set if this is a MIXED unit, has a constant denominator or if another error occurs. * @return The reciprocal of the target unit. * @stable ICU 67 */ @@ -627,6 +663,10 @@ class U_I18N_API MeasureUnit: public UObject { * * If this is a SINGLE unit, an array of length 1 will be returned. * + * NOTE: For units with a constant denominator, the returned single units will + * not include the constant denominator. To obtain the constant denominator, + * retrieve it from the original unit. + * * @param status Set if an error occurs. * @return A pair with the list of units as a LocalArray and the number of units in the list. * @stable ICU 68 diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index fbe71bdf8873..e302f75eaba6 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -5450,7 +5450,7 @@ void MeasureFormatTest::TestUnitPerUnitResolution() { actual, pos, status); - assertEquals("", "50 psi", actual); + assertEquals("TestUnitPerUnitResolution", "50 psi", actual); } void MeasureFormatTest::TestIndividualPluralFallback() { @@ -5708,6 +5708,19 @@ void MeasureFormatTest::TestInvalidIdentifiers() { // Compound units not supported in mixed units yet. TODO(CLDR-13701). "kilonewton-meter-and-newton-meter", + + // Invalid identifiers with constants. + "meter-per--20--second", + "meter-per-1000-1e9-second", + "meter-per-1e20-second", + "per-1000", + "meter-per-1000-1000", + "meter-per-1000-second-1000-kilometer", + "1000-meter", + "meter-1000", + "meter-per-1000-1000", + "meter-per-1000-second-1000-kilometer", + "per-1000-and-per-1000", }; for (const auto& input : inputs) { diff --git a/icu4c/source/test/intltest/units_test.cpp b/icu4c/source/test/intltest/units_test.cpp index add612c27678..be3184cdd2a5 100644 --- a/icu4c/source/test/intltest/units_test.cpp +++ b/icu4c/source/test/intltest/units_test.cpp @@ -50,6 +50,7 @@ class UnitsTest : public IntlTest { void testComplexUnitsConverter(); void testComplexUnitsConverterSorting(); void testUnitPreferencesWithCLDRTests(); + void testUnitsConstantsDenomenator(); void testConverter(); }; @@ -67,6 +68,7 @@ void UnitsTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha TESTCASE_AUTO(testComplexUnitsConverter); TESTCASE_AUTO(testComplexUnitsConverterSorting); TESTCASE_AUTO(testUnitPreferencesWithCLDRTests); + TESTCASE_AUTO(testUnitsConstantsDenomenator); TESTCASE_AUTO(testConverter); TESTCASE_AUTO_END; } @@ -1157,4 +1159,113 @@ void UnitsTest::testUnitPreferencesWithCLDRTests() { } } +void UnitsTest::testUnitsConstantsDenomenator() { + IcuTestErrorCode status(*this, "UnitTests::testUnitsConstantsDenomenator"); + + // Test Cases + struct TestCase { + const char *source; + const uint64_t expectedConstant; + } testCases[]{ + {"meter-per-1000", 1000}, + {"liter-per-1000-kiloliter", 1000}, + {"liter-per-kilometer", 0}, + {"second-per-1000-minute", 1000}, + {"gram-per-1000-kilogram", 1000}, + {"meter-per-100", 100}, + {"portion-per-1", 1}, + {"portion-per-2", 2}, + {"portion-per-3", 3}, + {"portion-per-4", 4}, + {"portion-per-5", 5}, + {"portion-per-6", 6}, + {"portion-per-7", 7}, + {"portion-per-8", 8}, + {"portion-per-9", 9}, + // Test for constant denominators that are powers of 10 + {"portion-per-10", 10}, + {"portion-per-100", 100}, + {"portion-per-1000", 1000}, + {"portion-per-10000", 10000}, + {"portion-per-100000", 100000}, + {"portion-per-1000000", 1000000}, + {"portion-per-10000000", 10000000}, + {"portion-per-100000000", 100000000}, + {"portion-per-1000000000", 1000000000}, + {"portion-per-10000000000", 10000000000}, + {"portion-per-100000000000", 100000000000}, + {"portion-per-1000000000000", 1000000000000}, + {"portion-per-10000000000000", 10000000000000}, + {"portion-per-100000000000000", 100000000000000}, + {"portion-per-1000000000000000", 1000000000000000}, + {"portion-per-10000000000000000", 10000000000000000}, + {"portion-per-100000000000000000", 100000000000000000}, + {"portion-per-1000000000000000000", 1000000000000000000}, + // Test for constant denominators that are represented as scientific notation + // numbers. + {"portion-per-1e1", 10}, + {"portion-per-1E1", 10}, + {"portion-per-1e2", 100}, + {"portion-per-1E2", 100}, + {"portion-per-1e3", 1000}, + {"portion-per-1E3", 1000}, + {"portion-per-1e4", 10000}, + {"portion-per-1E4", 10000}, + {"portion-per-1e5", 100000}, + {"portion-per-1E5", 100000}, + {"portion-per-1e6", 1000000}, + {"portion-per-1E6", 1000000}, + {"portion-per-1e10", 10000000000}, + {"portion-per-1E10", 10000000000}, + {"portion-per-1e18", 1000000000000000000}, + {"portion-per-1E18", 1000000000000000000}, + // Test for constant denominators that are randomly selected. + {"liter-per-12345-kilometer", 12345}, + {"per-1000-kilometer", 1000}, + {"liter-per-1000-kiloliter", 1000}, + // Test for constant denominators that give 0. + {"meter", 0}, + {"meter-per-second", 0}, + {"meter-per-square-second", 0}, + // NOTE: The following constant denominator should be 0. However, since + // `100-kilometer` is treated as a unit in CLDR, + // the unit does not have a constant denominator. + // This issue should be addressed in CLDR. + {"meter-per-100-kilometer", 0}, + // NOTE: the following CLDR identifier should be invalid, but because + // `100-kilometer` is considered a unit in CLDR, + // one `100` will be considered as a unit constant denominator and the other + // `100` will be considered part of the unit. + // This issue should be addressed in CLDR. + {"meter-per-100-100-kilometer", 100}, + }; + + for (const auto &testCase : testCases) { + MeasureUnit unit = MeasureUnit::forIdentifier(testCase.source, status); + if (status.errIfFailureAndReset("forIdentifier(\"%s\")", testCase.source)) { + continue; + } + + uint64_t constant = unit.getConstantDenominator(status); + if (status.errIfFailureAndReset("getConstantDenominator(\"%s\")", testCase.source)) { + continue; + } + + auto complexity = unit.getComplexity(status); + if (status.errIfFailureAndReset("getComplexity(\"%s\")", testCase.source)) { + continue; + } + + if (constant != testCase.expectedConstant) { + status.set(U_PARSE_ERROR); + if (status.errIfFailureAndReset("getConstantDenominator(\"%s\")", testCase.source)) { + continue; + } + } + if (constant != 0) { + assertEquals("getComplexity(\"%s\")", UMEASURE_UNIT_COMPOUND, complexity); + } + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */