fastfloat · lemire · Nov 21, 2024 · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024
diff --git a/README.md b/README.md
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
@@ -283,19 +283,18 @@ template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
 parse_number_string(UC const *p, UC const *pend,
                     parse_options_t<UC> options) noexcept {
-  chars_format const fmt = options.format;
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
   UC const decimal_point = options.decimal_point;
 
   parsed_number_string_t<UC> answer;
   answer.valid = false;
   answer.too_many_digits = false;
+  // assume p < pend, so dereference without checks;
   answer.negative = (*p == UC('-'));
-#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
+  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
   if ((*p == UC('-')) ||
-      (!uint64_t(fmt & detail::basic_json_fmt) && *p == UC('+'))) {
-#else
-  if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
-#endif
+      (uint64_t(fmt & chars_format::allow_leading_plus) &&
+       !uint64_t(fmt & detail::basic_json_fmt) && *p == UC('+'))) {
     ++p;
     if (p == pend) {
       return report_parse_error<UC>(
@@ -473,7 +472,11 @@ parse_number_string(UC const *p, UC const *pend,
 
 template <typename T, typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
-parse_int_string(UC const *p, UC const *pend, T &value, int base) {
+parse_int_string(UC const *p, UC const *pend, T &value,
+                 parse_options_t<UC> options) {
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  int const base = options.base;
+
   from_chars_result_t<UC> answer;
 
   UC const *const first = p;
@@ -484,11 +487,8 @@ parse_int_string(UC const *p, UC const *pend, T &value, int base) {
     answer.ptr = first;
     return answer;
   }
-#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
-  if ((*p == UC('-')) || (*p == UC('+'))) {
-#else
-  if (*p == UC('-')) {
-#endif
+  if ((*p == UC('-')) ||
+      (uint64_t(fmt & chars_format::allow_leading_plus) && (*p == UC('+')))) {
     ++p;
   }
 

diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
@@ -38,11 +38,13 @@ from_chars(UC const *first, UC const *last, T &value,
 
 /**
  * Like from_chars, but accepts an `options` argument to govern number parsing.
+ * Both for floating-point types and integer types.
  */
 template <typename T, typename UC = char>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_advanced(UC const *first, UC const *last, T &value,
                     parse_options_t<UC> options) noexcept;
+
 /**
  * from_chars for integer types.
  */

diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
@@ -34,6 +34,8 @@ enum class chars_format : uint64_t {
   json_or_infnan = uint64_t(detail::basic_json_fmt) | fixed | scientific,
   fortran = uint64_t(detail::basic_fortran_fmt) | fixed | scientific,
   general = fixed | scientific,
+  allow_leading_plus = 1 << 7,
+  skip_white_space = 1 << 8,
 };
 
 template <typename UC> struct from_chars_result_t {
@@ -44,13 +46,15 @@ using from_chars_result = from_chars_result_t<char>;
 
 template <typename UC> struct parse_options_t {
   constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
-                                     UC dot = UC('.'))
-      : format(fmt), decimal_point(dot) {}
+                                     UC dot = UC('.'), int b = 10)
+      : format(fmt), decimal_point(dot), base(b) {}
 
   /** Which number formats are accepted */
   chars_format format;
   /** The character used as decimal point */
   UC decimal_point;
+  /** The base used for integers */
+  int base;
 };
 using parse_options = parse_options_t<char>;
 
@@ -218,12 +222,15 @@ fastfloat_really_inline constexpr bool is_supported_char_type() {
 // Compares two ASCII strings in a case insensitive manner.
 template <typename UC>
 inline FASTFLOAT_CONSTEXPR14 bool
-fastfloat_strncasecmp(UC const *input1, UC const *input2, size_t length) {
-  char running_diff{0};
+fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase,
+                      size_t length) {
   for (size_t i = 0; i < length; ++i) {
-    running_diff |= (char(input1[i]) ^ char(input2[i]));
+    UC const actual = actual_mixedcase[i];
+    if ((actual < 256 ? actual | 32 : actual) != expected_lowercase[i]) {
+      return false;
+    }
   }
-  return (running_diff == 0) || (running_diff == 32);
+  return true;
 }
 
 #ifndef FLT_EVAL_METHOD
@@ -674,7 +681,6 @@ to_float(bool negative, adjusted_mantissa am, T &value) {
 #endif
 }
 
-#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default
 template <typename = void> struct space_lut {
   static constexpr bool value[] = {
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -696,8 +702,9 @@ template <typename T> constexpr bool space_lut<T>::value[];
 
 #endif
 
-inline constexpr bool is_space(uint8_t c) { return space_lut<>::value[c]; }
-#endif
+template <typename UC> constexpr bool is_space(UC c) {
+  return c < 256 && space_lut<>::value[uint8_t(c)];
+}
 
 template <typename UC> static constexpr uint64_t int_cmp_zeros() {
   static_assert((sizeof(UC) == 1) || (sizeof(UC) == 2) || (sizeof(UC) == 4),
@@ -839,6 +846,20 @@ operator^=(chars_format &lhs, chars_format rhs) noexcept {
   return lhs = (lhs ^ rhs);
 }
 
+namespace detail {
+// adjust for deprecated feature macros
+constexpr chars_format adjust_for_feature_macros(chars_format fmt) {
+  return fmt
+#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS
+         | chars_format::allow_leading_plus
+#endif
+#ifdef FASTFLOAT_SKIP_WHITE_SPACE
+         | chars_format::skip_white_space
+#endif
+      ;
+}
+} // namespace detail
+
 } // namespace fast_float
 
 #endif
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
@@ -19,20 +19,18 @@ namespace detail {
  * strings a null-free and fixed.
  **/
 template <typename T, typename UC>
-from_chars_result_t<UC> FASTFLOAT_CONSTEXPR14 parse_infnan(UC const *first,
-                                                           UC const *last,
-                                                           T &value) noexcept {
+from_chars_result_t<UC>
+    FASTFLOAT_CONSTEXPR14 parse_infnan(UC const *first, UC const *last,
+                                       T &value, chars_format fmt) noexcept {
   from_chars_result_t<UC> answer{};
   answer.ptr = first;
   answer.ec = std::errc(); // be optimistic
   // assume first < last, so dereference without checks;
   bool const minusSign = (*first == UC('-'));
-#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
-  if ((*first == UC('-')) || (*first == UC('+'))) {
-#else
   // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
-  if (*first == UC('-')) {
-#endif
+  if ((*first == UC('-')) ||
+      (uint64_t(fmt & chars_format::allow_leading_plus) &&
+       (*first == UC('+')))) {
     ++first;
   }
   if (last - first >= 3) {
@@ -284,22 +282,22 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
 
 template <typename T, typename UC>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
-from_chars_advanced(UC const *first, UC const *last, T &value,
-                    parse_options_t<UC> options) noexcept {
+from_chars_float_advanced(UC const *first, UC const *last, T &value,
+                          parse_options_t<UC> options) noexcept {
 
   static_assert(is_supported_float_type<T>(),
                 "only some floating-point types are supported");
   static_assert(is_supported_char_type<UC>(),
                 "only char, wchar_t, char16_t and char32_t are supported");
 
-  chars_format const fmt = options.format;
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
 
   from_chars_result_t<UC> answer;
-#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default
-  while ((first != last) && fast_float::is_space(uint8_t(*first))) {
-    first++;
+  if (uint64_t(fmt & chars_format::skip_white_space)) {
+    while ((first != last) && fast_float::is_space(*first)) {
+      first++;
+    }
   }
-#endif
   if (first == last) {
     answer.ec = std::errc::invalid_argument;
     answer.ptr = first;
@@ -313,7 +311,7 @@ from_chars_advanced(UC const *first, UC const *last, T &value,
       answer.ptr = first;
       return answer;
     } else {
-      return detail::parse_infnan(first, last, value);
+      return detail::parse_infnan(first, last, value, fmt);
     }
   }
 
@@ -324,21 +322,67 @@ from_chars_advanced(UC const *first, UC const *last, T &value,
 template <typename T, typename UC, typename>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars(UC const *first, UC const *last, T &value, int base) noexcept {
+
+  static_assert(std::is_integral<T>::value, "only integer types are supported");
+  static_assert(is_supported_char_type<UC>(),
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  parse_options_t<UC> options;
+  options.base = base;
+  return from_chars_advanced(first, last, value, options);
+}
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_int_advanced(UC const *first, UC const *last, T &value,
+                        parse_options_t<UC> options) noexcept {
+
+  static_assert(std::is_integral<T>::value, "only integer types are supported");
   static_assert(is_supported_char_type<UC>(),
                 "only char, wchar_t, char16_t and char32_t are supported");
 
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  int const base = options.base;
+
   from_chars_result_t<UC> answer;
-#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default
-  while ((first != last) && fast_float::is_space(uint8_t(*first))) {
-    first++;
+  if (uint64_t(fmt & chars_format::skip_white_space)) {
+    while ((first != last) && fast_float::is_space(*first)) {
+      first++;
+    }
   }
-#endif
   if (first == last || base < 2 || base > 36) {
     answer.ec = std::errc::invalid_argument;
     answer.ptr = first;
     return answer;
   }
-  return parse_int_string(first, last, value, base);
+
+  return parse_int_string(first, last, value, options);
+}
+
+template <bool> struct from_chars_advanced_caller {
+  template <typename T, typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_float_advanced(first, last, value, options);
+  }
+};
+
+template <> struct from_chars_advanced_caller<false> {
+  template <typename T, typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_int_advanced(first, last, value, options);
+  }
+};
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_advanced(UC const *first, UC const *last, T &value,
+                    parse_options_t<UC> options) noexcept {
+  return from_chars_advanced_caller<is_supported_float_type<T>()>::call(
+      first, last, value, options);
 }
 
 } // namespace fast_float

diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel
@@ -88,6 +88,15 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "wide_char_test",
+    srcs = ["wide_char_test.cpp"],
+    deps = [
+        "//:fast_float",
+        "@doctest//doctest",
+    ],
+)
+
 cc_test(
     name = "string_test",
     srcs = ["string_test.cpp"],

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -67,6 +67,7 @@ function(fast_float_add_cpp_test TEST_NAME)
 endfunction(fast_float_add_cpp_test)
 
 fast_float_add_cpp_test(rcppfastfloat_test)
+fast_float_add_cpp_test(wide_char_test)
 fast_float_add_cpp_test(example_test)
 fast_float_add_cpp_test(example_comma_test)
 fast_float_add_cpp_test(basictest)

diff --git a/tests/fortran.cpp b/tests/fortran.cpp
@@ -4,15 +4,14 @@
 #include <cstdlib>
 #include <iostream>
 #include <vector>
-
-#define FASTFLOAT_ALLOWS_LEADING_PLUS
-
 #include "fast_float/fast_float.h"
 
 int main_readme() {
   const std::string input = "1d+4";
   double result;
-  fast_float::parse_options options{fast_float::chars_format::fortran};
+  fast_float::parse_options options{
+      fast_float::chars_format::fortran |
+      fast_float::chars_format::allow_leading_plus};
   auto answer = fast_float::from_chars_advanced(
       input.data(), input.data() + input.size(), result, options);
   if ((answer.ec != std::errc()) || ((result != 10000))) {
@@ -32,7 +31,9 @@ int main() {
                                       "1d-1", "1d-2", "1d-3", "1d-4"};
   const std::vector<std::string> fmt3{"+1+4", "+1+3", "+1+2", "+1+1", "+1+0",
                                       "+1-1", "+1-2", "+1-3", "+1-4"};
-  const fast_float::parse_options options{fast_float::chars_format::fortran};
+  const fast_float::parse_options options{
+      fast_float::chars_format::fortran |
+      fast_float::chars_format::allow_leading_plus};
 
   for (auto const &f : fmt1) {
     auto d{std::distance(&fmt1[0], &f)};