Skip to content

Commit

Permalink
fix float parsing
Browse files Browse the repository at this point in the history
Numbers which were fractions using scientific notation with a non-zero
exponent with trailing zeros were not correctly parsed, e.g. 1.2e10.

Also tightened up scientific notation validation a bit.
  • Loading branch information
wojciech-adaptive committed May 30, 2024
1 parent beccae2 commit 432f85a
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 88 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public DecimalFloat(final long value, final int scale)
}

/**
* Resets the encoder to the NAN value. This can checked using the {@link #isNaNValue()} method.
* Resets the encoder to the NAN value. This can be checked using the {@link #isNaNValue()} method.
*/
public void reset()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,28 @@ public static <Data> DecimalFloat extract(
final int offset,
final int length)
{
// Throw away trailing spaces or zeros
// Throw away trailing spaces
int workingOffset = offset;
int end = workingOffset + length;
for (int index = end - 1; charReader.isSpace(data, index) && index > workingOffset; index--)
{
end--;
}

int endDiff = 0;
for (int index = end - 1; charReader.isZero(data, index) && index > workingOffset; index--)
{
endDiff++;
}
int endOfSignificand = findEndOfSignificand(charReader, workingOffset, end, data);
final int startOfExponent = endOfSignificand + 1;

if (isFloatingPoint(charReader, workingOffset, end, endDiff, data))
if (isFloatingPoint(charReader, workingOffset, endOfSignificand, data))
{
end -= endDiff;
// Throw away trailing zeros
for (int index = endOfSignificand - 1; charReader.isZero(data, index) && index > workingOffset; index--)
{
endOfSignificand--;
}
}

// Throw away leading spaces
for (int index = workingOffset; charReader.isSpace(data, index) && index < end; index++)
for (int index = workingOffset; index < endOfSignificand && charReader.isSpace(data, index); index++)
{
workingOffset++;
}
Expand All @@ -53,78 +54,117 @@ public static <Data> DecimalFloat extract(
}

// Throw away leading zeros
for (int index = workingOffset; index < end && charReader.isZero(data, index); index++)
for (int index = workingOffset; index < endOfSignificand && charReader.isZero(data, index); index++)
{
workingOffset++;
}

int workingScale = 0;
long value = 0;
int base10exponent = 0;
boolean isScientificNotation = false;
short scaleDecrementValue = 0;
short scientificExponentMultiplier = -1;
for (int index = workingOffset; index < end; index++)
for (int index = workingOffset; index < endOfSignificand; index++)
{
final char charValue = charReader.charAt(data, index);
if (charValue == DOT)
{
// number of digits after the dot
workingScale = end - (index + 1);
scaleDecrementValue = 1;
}
else if (charValue == LOWER_CASE_E || charValue == UPPER_CASE_E)
{
isScientificNotation = true;

workingScale -= scaleDecrementValue;
}
else if (isScientificNotation && charValue == PLUS)
{
workingScale -= scaleDecrementValue;
}
else if (isScientificNotation && charValue == MINUS)
{
workingScale -= scaleDecrementValue;
scientificExponentMultiplier = 1;
workingScale = endOfSignificand - (index + 1);
}
else
{
final int digit = charReader.getDigit(data, index, charValue);
if (isScientificNotation)
{
base10exponent = base10exponent * 10 + digit;
workingScale -= scaleDecrementValue;
}
else
value = value * 10 + digit;
if (value < 0)
{
value = value * 10 + digit;
if (value < 0)
{
throw new ArithmeticException(
"Out of range: when parsing " + charReader.asString(data, offset, length));
}
throw new ArithmeticException(
"Out of range: when parsing " + charReader.asString(data, offset, length));
}
}
}

final int scale = workingScale + (scientificExponentMultiplier * base10exponent);
int exponent = 0;
final int exponentLength = end - startOfExponent;
if (exponentLength > 0)
{
// scientific notation
exponent = parseExponent(charReader, data, offset, length, startOfExponent, end);
}
else if (exponentLength == 0)
{
throw new NumberFormatException(charReader.asString(data, offset, length).toString());
}

final int scale = workingScale - exponent;
final long signedValue = negative ? -1 * value : value;
return number.set(
(scale >= 0) ? signedValue : signedValue * pow10(-scale),
Math.max(scale, 0)
(scale >= 0) ? signedValue : signedValue * pow10(-scale),
Math.max(scale, 0)
);
}

private static <Data> int parseExponent(
final CharReader<Data> charReader,
final Data data,
final int offset,
final int length,
final int startOfExponent,
final int end)
{
int exponent = 0;
boolean negative = false;
int position = startOfExponent;

final char firstChar = charReader.charAt(data, position);
if (firstChar == MINUS)
{
position++;
negative = true;
}
else if (firstChar == PLUS)
{
position++;
}

while (position < end)
{
final char charValue = charReader.charAt(data, position);
final int digit = charReader.getDigit(data, position, charValue);
position++;
exponent = exponent * 10 + digit;
if (exponent > 1000) // overflow and arbitrary limit check
{
throw new NumberFormatException(charReader.asString(data, offset, length).toString());
}
}

return negative ? -exponent : exponent;
}

private static <Data> int findEndOfSignificand(
final CharReader<Data> dataExtractor,
final int offset,
final int end,
final Data data
)
{
for (int index = end - 1; index > offset; index--)
{
final char charValue = dataExtractor.charAt(data, index);
if (charValue == LOWER_CASE_E || charValue == UPPER_CASE_E)
{
return index;
}
}
return end;
}

private static <Data> boolean isFloatingPoint(
final CharReader<Data> dataExtractor,
final int offset,
final int end,
final int endDiff,
final Data data
)
{
for (int index = end - endDiff - 1; index > offset; index--)
for (int index = end - 1; index >= offset; index--)
{
if (dataExtractor.charAt(data, index) == '.')
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ public static Iterable<Object[]> decimalFloatCodecData()
{"-0.9950", -995L, 3},
{"-25", -25L, 0},
{".6", 6L, 1},
{".600", 6L, 1},
{".6e0", 6L, 1},
{".6e2", 60L, 0},
{".06", 6L, 2},
Expand Down Expand Up @@ -92,6 +93,22 @@ public static Iterable<Object[]> decimalFloatCodecData()
{"0.00000001", 1, 8},
{"6456.123456789", 6456123456789L, 9},
{"6456.000000001", 6456000000001L, 9},

{"0", 0L, 0},
{"00", 0L, 0},
{"0.", 0L, 0},
{".0", 0L, 0},
{"0.0", 0L, 0},
{"00.00", 0L, 0},
{"0e0", 0L, 0},
{"00e00", 0L, 0},
{"00.00e00", 0L, 0},

{"1.0e0", 1L, 0},
{"1.0e10", 10_000_000_000L, 0},
{"1.0e+00010", 10_000_000_000L, 0},
{"1.0e-10", 1L, 10},
{"1.0e-100", 1L, 100},
});
}

Expand Down
Loading

0 comments on commit 432f85a

Please sign in to comment.