Skip to content

Commit

Permalink
Use existing number parsing code
Browse files Browse the repository at this point in the history
  • Loading branch information
catamorphism committed Sep 17, 2024
1 parent 1a8fdb5 commit 176a232
Showing 1 changed file with 22 additions and 142 deletions.
164 changes: 22 additions & 142 deletions icu4c/source/i18n/messageformat2_function_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "unicode/messageformat2_function_registry.h"
#include "unicode/smpdtfmt.h"
#include "charstr.h"
#include "double-conversion.h"
#include "messageformat2_allocation.h"
#include "messageformat2_function_registry_internal.h"
#include "messageformat2_macros.h"
Expand Down Expand Up @@ -423,74 +424,6 @@ static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) {
return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN")));
}

// Returns true if `c` is in the interval [`first`, `last`]
static bool inRange(UChar32 c, UChar32 first, UChar32 last) {
U_ASSERT(first < last);
return c >= first && c <= last;
}

static bool isDigit(UChar32 c) { return inRange(c, 0x0030, 0x0039); }

static int32_t parseDigit(UChar32 c) {
switch(c) {
case u'0': {
return 0;
}
case u'1': {
return 1;
}
case u'2': {
return 2;
}
case u'3': {
return 3;
}
case u'4': {
return 4;
}
case u'5': {
return 5;
}
case u'6': {
return 6;
}
case u'7': {
return 7;
}
case u'8': {
return 8;
}
case u'9': {
return 9;
}
default: {
// Should be unreachable
return 0;
}
}
}

static int32_t parseDigits(const UnicodeString& s, int32_t& i) {
int32_t result = 0;
while (isDigit(s[i])) {
result *= 10;
result += parseDigit(s[i]);
i++;
}
return result;
}

static double parseDecimalPart(const UnicodeString& s, int32_t& i) {
int32_t firstDigit = i;
double result = (double) parseDigits(s, i);
int32_t denominator = 1;
for (int32_t j = i; j > firstDigit; j--) {
denominator *= 10;
}
result /= denominator;
return result;
}

static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return {};
Expand All @@ -504,84 +437,31 @@ static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode&
return {};
}

int32_t index = 0;

#define ERROR() errorCode = U_MF_OPERAND_MISMATCH_ERROR; return 0;
#define IN_BOUNDS(inputStr, index) (index < inputStr.length())
#define CHECK_BOUNDS(inputStr, index) if (!IN_BOUNDS(inputStr, index)) { ERROR(); }

CHECK_BOUNDS(inputStr, index);

// Parse the sign if present
double sign = 1;
if (inputStr[index] == HYPHEN) {
sign = -1;
index++;
}

CHECK_BOUNDS(inputStr, index);

// Parse the integer part
if (!isDigit(inputStr[index])) {
// Non-numeric first character after sign -- not valid
ERROR();
}

// First, check for leading zero with no decimal point
if (inputStr[index] == u'0') {
bool isZero = inputStr.length() == index + 1;
bool hasDecimalPart = inputStr.length() > index && inputStr[index + 1] == PERIOD;
if (!(isZero || hasDecimalPart)) {
ERROR();
}
}

double result = parseDigits(inputStr, index);
// Hack: Check for cases that are forbidden by the MF2 grammar
// but allowed by StringToDouble
int32_t len = inputStr.length();

if (IN_BOUNDS(inputStr, index) && inputStr[index] == PERIOD) {
index++;
CHECK_BOUNDS(inputStr, index);
if (isDigit(inputStr[index])) {
result += parseDecimalPart(inputStr, index);
} else {
// '.' not followed by a digit is an error
ERROR();
}
}

result *= sign;

if (IN_BOUNDS(inputStr, index) &&
(inputStr[index] == UPPERCASE_E || inputStr[index] == LOWERCASE_E)) {
double exponent;
bool positive = true;
index++;
CHECK_BOUNDS(inputStr, index);
// Parse sign if present
if (inputStr[index] == PLUS) {
index++;
} else if (inputStr[index] == HYPHEN) {
positive = false;
index++;
}
// Parse exponent digits
CHECK_BOUNDS(inputStr, index);
if (!isDigit(inputStr[index])) {
ERROR();
}
exponent = parseDigits(inputStr, index);
if (positive) {
result *= (exponent * 10);
} else {
result *= (exponent * -10);
}
if (len > 0 && ((inputStr[0] == '+')
|| (inputStr[0] == '0' && len > 1 && inputStr[1] != '.')
|| (inputStr[len - 1] == '.')
|| (inputStr[0] == '.'))) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
return 0;
}

// Make sure entire input is consumed
if (index != inputStr.length()) {
ERROR();
// Otherwise, convert to double using double_conversion::StringToDoubleConverter
using namespace double_conversion;
int processedCharactersCount = 0;
StringToDoubleConverter converter(0, 0, 0, "", "");
LocalArray<char> target(new char(len + 1));
int32_t bufferLen = inputStr.extract(0, len, target.getAlias());
U_ASSERT(bufferLen == len);
double result = converter.StringToDouble(target.getAlias(),
len,
&processedCharactersCount);
if (processedCharactersCount != len) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
}

return result;
}

Expand Down

0 comments on commit 176a232

Please sign in to comment.