fix float parsing

Numbers which were fractions using scientific notation with a non-zero exponent with trailing zeros were not correctly parsed, e.g. 1.2e10. Also tightened up scientific notation validation a bit.
real-logic · May 30, 2024 · 432f85a · 432f85a
1 parent beccae2
commit 432f85a
Show file tree

Hide file tree

Showing 4 changed files with 158 additions and 88 deletions.
diff --git a/artio-codecs/src/main/java/uk/co/real_logic/artio/fields/DecimalFloat.java b/artio-codecs/src/main/java/uk/co/real_logic/artio/fields/DecimalFloat.java
@@ -81,7 +81,7 @@ public DecimalFloat(final long value, final int scale)
     }
 
     /**
-     * Resets the encoder to the NAN value. This can checked using the {@link #isNaNValue()} method.
+     * Resets the encoder to the NAN value. This can be checked using the {@link #isNaNValue()} method.
      */
     public void reset()
     {

diff --git a/artio-codecs/src/main/java/uk/co/real_logic/artio/util/float_parsing/DecimalFloatParser.java b/artio-codecs/src/main/java/uk/co/real_logic/artio/util/float_parsing/DecimalFloatParser.java
@@ -20,27 +20,28 @@ public static <Data> DecimalFloat extract(
         final int offset,
         final int length)
     {
-        // Throw away trailing spaces or zeros
+        // Throw away trailing spaces
         int workingOffset = offset;
         int end = workingOffset + length;
         for (int index = end - 1; charReader.isSpace(data, index) && index > workingOffset; index--)
         {
             end--;
         }
 
-        int endDiff = 0;
-        for (int index = end - 1; charReader.isZero(data, index) && index > workingOffset; index--)
-        {
-            endDiff++;
-        }
+        int endOfSignificand = findEndOfSignificand(charReader, workingOffset, end, data);
+        final int startOfExponent = endOfSignificand + 1;
 
-        if (isFloatingPoint(charReader, workingOffset, end, endDiff, data))
+        if (isFloatingPoint(charReader, workingOffset, endOfSignificand, data))
         {
-            end -= endDiff;
+            // Throw away trailing zeros
+            for (int index = endOfSignificand - 1; charReader.isZero(data, index) && index > workingOffset; index--)
+            {
+                endOfSignificand--;
+            }
         }
 
         // Throw away leading spaces
-        for (int index = workingOffset; charReader.isSpace(data, index) && index < end; index++)
+        for (int index = workingOffset; index < endOfSignificand && charReader.isSpace(data, index); index++)
         {
             workingOffset++;
         }
@@ -53,78 +54,117 @@ public static <Data> DecimalFloat extract(
         }
 
         // Throw away leading zeros
-        for (int index = workingOffset; index < end && charReader.isZero(data, index); index++)
+        for (int index = workingOffset; index < endOfSignificand && charReader.isZero(data, index); index++)
         {
             workingOffset++;
         }
 
         int workingScale = 0;
         long value = 0;
-        int base10exponent = 0;
-        boolean isScientificNotation = false;
-        short scaleDecrementValue = 0;
-        short scientificExponentMultiplier = -1;
-        for (int index = workingOffset; index < end; index++)
+        for (int index = workingOffset; index < endOfSignificand; index++)
         {
             final char charValue = charReader.charAt(data, index);
             if (charValue == DOT)
             {
                 // number of digits after the dot
-                workingScale = end - (index + 1);
-                scaleDecrementValue = 1;
-            }
-            else if (charValue == LOWER_CASE_E || charValue == UPPER_CASE_E)
-            {
-                isScientificNotation = true;
-
-                workingScale -= scaleDecrementValue;
-            }
-            else if (isScientificNotation && charValue == PLUS)
-            {
-                workingScale -= scaleDecrementValue;
-            }
-            else if (isScientificNotation && charValue == MINUS)
-            {
-                workingScale -= scaleDecrementValue;
-                scientificExponentMultiplier = 1;
+                workingScale = endOfSignificand - (index + 1);
             }
             else
             {
                 final int digit = charReader.getDigit(data, index, charValue);
-                if (isScientificNotation)
-                {
-                    base10exponent = base10exponent * 10 + digit;
-                    workingScale -= scaleDecrementValue;
-                }
-                else
+                value = value * 10 + digit;
+                if (value < 0)
                 {
-                    value = value * 10 + digit;
-                    if (value < 0)
-                    {
-                        throw new ArithmeticException(
-                                "Out of range: when parsing " + charReader.asString(data, offset, length));
-                    }
+                    throw new ArithmeticException(
+                        "Out of range: when parsing " + charReader.asString(data, offset, length));
                 }
             }
         }
 
-        final int scale = workingScale + (scientificExponentMultiplier * base10exponent);
+        int exponent = 0;
+        final int exponentLength = end - startOfExponent;
+        if (exponentLength > 0)
+        {
+            // scientific notation
+            exponent = parseExponent(charReader, data, offset, length, startOfExponent, end);
+        }
+        else if (exponentLength == 0)
+        {
+            throw new NumberFormatException(charReader.asString(data, offset, length).toString());
+        }
+
+        final int scale = workingScale - exponent;
         final long signedValue = negative ? -1 * value : value;
         return number.set(
-                (scale >= 0) ? signedValue : signedValue * pow10(-scale),
-                Math.max(scale, 0)
+            (scale >= 0) ? signedValue : signedValue * pow10(-scale),
+            Math.max(scale, 0)
         );
     }
 
+    private static <Data> int parseExponent(
+        final CharReader<Data> charReader,
+        final Data data,
+        final int offset,
+        final int length,
+        final int startOfExponent,
+        final int end)
+    {
+        int exponent = 0;
+        boolean negative = false;
+        int position = startOfExponent;
+
+        final char firstChar = charReader.charAt(data, position);
+        if (firstChar == MINUS)
+        {
+            position++;
+            negative = true;
+        }
+        else if (firstChar == PLUS)
+        {
+            position++;
+        }
+
+        while (position < end)
+        {
+            final char charValue = charReader.charAt(data, position);
+            final int digit = charReader.getDigit(data, position, charValue);
+            position++;
+            exponent = exponent * 10 + digit;
+            if (exponent > 1000) // overflow and arbitrary limit check
+            {
+                throw new NumberFormatException(charReader.asString(data, offset, length).toString());
+            }
+        }
+
+        return negative ? -exponent : exponent;
+    }
+
+    private static <Data> int findEndOfSignificand(
+        final CharReader<Data> dataExtractor,
+        final int offset,
+        final int end,
+        final Data data
+    )
+    {
+        for (int index = end - 1; index > offset; index--)
+        {
+            final char charValue = dataExtractor.charAt(data, index);
+            if (charValue == LOWER_CASE_E || charValue == UPPER_CASE_E)
+            {
+                return index;
+            }
+        }
+        return end;
+    }
+
     private static <Data> boolean isFloatingPoint(
         final CharReader<Data> dataExtractor,
         final int offset,
         final int end,
-        final int endDiff,
         final Data data
     )
     {
-        for (int index = end - endDiff - 1; index > offset; index--)
+        for (int index = end - 1; index >= offset; index--)
         {
             if (dataExtractor.charAt(data, index) == '.')
             {

diff --git a/artio-codecs/src/test/java/uk/co/real_logic/artio/fields/DecimalFloatDecodingTest.java b/artio-codecs/src/test/java/uk/co/real_logic/artio/fields/DecimalFloatDecodingTest.java
@@ -62,6 +62,7 @@ public static Iterable<Object[]> decimalFloatCodecData()
             {"-0.9950", -995L, 3},
             {"-25", -25L, 0},
             {".6", 6L, 1},
+            {".600", 6L, 1},
             {".6e0", 6L, 1},
             {".6e2", 60L, 0},
             {".06", 6L, 2},
@@ -92,6 +93,22 @@ public static Iterable<Object[]> decimalFloatCodecData()
             {"0.00000001", 1, 8},
             {"6456.123456789", 6456123456789L, 9},
             {"6456.000000001", 6456000000001L, 9},
+
+            {"0", 0L, 0},
+            {"00", 0L, 0},
+            {"0.", 0L, 0},
+            {".0", 0L, 0},
+            {"0.0", 0L, 0},
+            {"00.00", 0L, 0},
+            {"0e0", 0L, 0},
+            {"00e00", 0L, 0},
+            {"00.00e00", 0L, 0},
+
+            {"1.0e0", 1L, 0},
+            {"1.0e10", 10_000_000_000L, 0},
+            {"1.0e+00010", 10_000_000_000L, 0},
+            {"1.0e-10", 1L, 10},
+            {"1.0e-100", 1L, 100},
         });
     }
-Original file line number
+Diff line change
@@ Expand Up / @@ -81,7 +81,7 @@ public DecimalFloat(final long value, final int scale) @@
         }
         /**
-         * Resets the encoder to the NAN value. This can checked using the {@link #isNaNValue()} method.
+         * Resets the encoder to the NAN value. This can be checked using the {@link #isNaNValue()} method.
          */
         public void reset()
         {
@@ Expand Down @@