From 6b5811ca127afb42954a51844333e0d7a999f34e Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 10:38:33 -0500 Subject: [PATCH 01/14] Add a benchmark for writing special numbers. --- lexical-benchmark/write-float/Cargo.toml | 5 +++ lexical-benchmark/write-float/special.rs | 47 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 lexical-benchmark/write-float/special.rs diff --git a/lexical-benchmark/write-float/Cargo.toml b/lexical-benchmark/write-float/Cargo.toml index 1543c3c0..8cf01416 100644 --- a/lexical-benchmark/write-float/Cargo.toml +++ b/lexical-benchmark/write-float/Cargo.toml @@ -44,3 +44,8 @@ harness = false name = "random" path = "random.rs" harness = false + +[[bench]] +name = "special" +path = "special.rs" +harness = false diff --git a/lexical-benchmark/write-float/special.rs b/lexical-benchmark/write-float/special.rs new file mode 100644 index 00000000..d896162a --- /dev/null +++ b/lexical-benchmark/write-float/special.rs @@ -0,0 +1,47 @@ +#[macro_use] +mod input; + +use core::mem; +use core::time::Duration; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use lexical_write_float::ToLexical; + +// Default random data size. +const COUNT: usize = 1000; + +// BENCHES + +macro_rules! gen_vec { + ($exp_mask:expr, $i:ident, $f:ident) => {{ + let mut vec: Vec<$f> = Vec::with_capacity(COUNT); + for _ in 0..COUNT { + let value = fastrand::$i($exp_mask..); + // NOTE: We want mem::transmute, not from_bits because we + // don't want the special handling of from_bits + vec.push(unsafe { mem::transmute::<$i, $f>(value) }); + } + vec + }}; +} + +macro_rules! bench { + ($fn:ident, $name:literal) => { + fn $fn(criterion: &mut Criterion) { + let mut group = criterion.benchmark_group($name); + group.measurement_time(Duration::from_secs(5)); + let exp32_mask: u32 = 0x7F800000; + let exp64_mask: u64 = 0x7FF0000000000000; + + let f32_data = gen_vec!(exp32_mask, u32, f32); + let f64_data = gen_vec!(exp64_mask, u64, f64); + + write_float_generator!(group, "f32", f32_data.iter(), format32); + write_float_generator!(group, "f64", f64_data.iter(), format64); + } + }; +} + +bench!(random_special, "random:special"); +criterion_group!(special_benches, random_special); +criterion_main!(special_benches); From 4fe330aff3955d06c855172159b6e11c9093aa6e Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 10:54:47 -0500 Subject: [PATCH 02/14] Make writing special floats have no unsafety. --- CHANGELOG | 1 + lexical-write-float/src/write.rs | 75 +++++++++++++++++--------------- 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 24ab8422..b3b52dd8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Improved performance of integer and float parsing, particularly with small integers. - Removed almost all unsafety in `lexical-util` and clearly documented the preconditions to use safely. - Removed almost all unsafety in `lexical-write-integer` and clearly documented the preconditions to use safely. +- Writing special numbers even with invalid float formats is now always memory safe. ### Removed diff --git a/lexical-write-float/src/write.rs b/lexical-write-float/src/write.rs index e5b63c68..ff8a81fe 100644 --- a/lexical-write-float/src/write.rs +++ b/lexical-write-float/src/write.rs @@ -4,19 +4,17 @@ #[cfg(feature = "f16")] use lexical_util::bf16::bf16; -use lexical_util::constants::FormattedSize; +use lexical_util::{algorithm::copy_to_dst, constants::FormattedSize}; #[cfg(feature = "f16")] use lexical_util::f16::f16; use lexical_util::format::NumberFormat; use lexical_write_integer::write::WriteInteger; +/// Select the back-end. #[cfg(not(feature = "compact"))] use crate::algorithm::write_float as write_float_decimal; #[cfg(feature = "power-of-two")] use crate::binary; -/// Select the back-end. -#[cfg(feature = "compact")] -use crate::compact::write_float as write_float_decimal; use crate::float::RawFloat; #[cfg(feature = "power-of-two")] use crate::hex; @@ -24,6 +22,39 @@ use crate::options::Options; #[cfg(feature = "radix")] use crate::radix; +/// Write an special string to the buffer. +fn write_special(bytes: &mut [u8], special: Option<&[u8]>, error: &'static str) -> usize { + // The NaN string must be <= 50 characters, so this should never panic. + if let Some(special_str) = special { + debug_assert!(special_str.len() <= 50, "special_str.len() must be <= 50"); + copy_to_dst(bytes, special_str) + } else { + // PANIC: the format does not support serializing that special. + panic!("{}", error); + } +} + + +/// Write an NaN string to the buffer. +#[inline(always)] +fn write_nan(bytes: &mut[u8], options: &Options, count: usize) -> usize { + count + write_special( + bytes, + options.nan_string(), + "NaN explicitly disabled but asked to write NaN as string." + ) +} + +/// Write an Inf string to the buffer. +#[inline(always)] +fn write_inf(bytes: &mut[u8], options: &Options, count: usize) -> usize { + count + write_special( + bytes, + options.inf_string(), + "Inf explicitly disabled but asked to write Inf as string." + ) +} + /// Write float trait. pub trait WriteFloat: RawFloat { /// Forward write integer parameters to an unoptimized backend. @@ -54,6 +85,8 @@ pub trait WriteFloat: RawFloat { where Self::Unsigned: FormattedSize + WriteInteger, { + // TODO: Make safe + // Validate our format options. let format = NumberFormat:: {}; assert!(format.is_valid()); @@ -122,38 +155,9 @@ pub trait WriteFloat: RawFloat { count + unsafe { write_float_decimal::<_, FORMAT>(float, bytes, options) } } } else if self.is_nan() { - // SAFETY: safe if the buffer is longer than the NaN string. - // The NaN string must be <= 50 characters, so safe as long as - // the options were build using safe methods. - if let Some(nan_string) = options.nan_string() { - let length = nan_string.len(); - unsafe { - let src = nan_string.as_ptr(); - let dst = &mut index_unchecked_mut!(bytes[..length]); - copy_nonoverlapping_unchecked!(dst, src, length); - } - count + length - } else { - // PANIC: cannot serialize NaN. - panic!("NaN explicitly disabled but asked to write NaN as string."); - } + write_nan(bytes, options, count) } else { - // is_inf - // SAFETY: safe if the buffer is longer than the Inf string. - // The Inf string must be <= 50 characters, so safe as long as - // the options were build using safe methods. - if let Some(inf_string) = options.inf_string() { - let length = inf_string.len(); - unsafe { - let src = inf_string.as_ptr(); - let dst = &mut index_unchecked_mut!(bytes[..length]); - copy_nonoverlapping_unchecked!(dst, src, length); - } - count + length - } else { - // PANIC: cannot serialize inf. - panic!("Inf explicitly disabled but asked to write Inf as string."); - } + write_inf(bytes, options, count) } } } @@ -174,6 +178,7 @@ macro_rules! write_float_as_f32 { unsafe fn write_float(self, bytes: &mut [u8], options: &Options) -> usize { // SAFETY: safe if `bytes` is large enough to hold the written bytes. + // TODO: Make safe unsafe { self.as_f32().write_float::(bytes, options) } } } From 1a76b3235f593ec4160e112f202e6fc60f18cf63 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 11:38:10 -0500 Subject: [PATCH 03/14] Add better handling of -0 floats. --- lexical-util/src/num.rs | 9 +++++++++ lexical-write-float/src/api.rs | 30 ++++++------------------------ lexical-write-float/src/lib.rs | 9 +++++++++ lexical-write-float/src/shared.rs | 12 +++++------- lexical-write-float/src/write.rs | 31 ++++++++++++++++++++++--------- 5 files changed, 51 insertions(+), 40 deletions(-) diff --git a/lexical-util/src/num.rs b/lexical-util/src/num.rs index 5f32d659..522ba0b6 100644 --- a/lexical-util/src/num.rs +++ b/lexical-util/src/num.rs @@ -713,6 +713,15 @@ pub trait Float: Number + ops::Neg { !self.is_odd() } + /// Returns true if the float needs a negative sign when serializing it. + /// + /// This is true if it's `-0.0` or it's below 0 and not NaN. But inf values + /// need the sign. + #[inline(always)] + fn needs_negative_sign(self) -> bool { + self.is_sign_negative() && !self.is_nan() + } + /// Get exponent component from the float. #[inline(always)] fn exponent(self) -> i32 { diff --git a/lexical-write-float/src/api.rs b/lexical-write-float/src/api.rs index d118a34b..d359cd9a 100644 --- a/lexical-write-float/src/api.rs +++ b/lexical-write-float/src/api.rs @@ -4,26 +4,14 @@ #[cfg(feature = "f16")] use lexical_util::bf16::bf16; -use lexical_util::constants::FormattedSize; #[cfg(feature = "f16")] use lexical_util::f16::f16; use lexical_util::format::STANDARD; -use lexical_util::options::WriteOptions; use lexical_util::{to_lexical, to_lexical_with_options}; use crate::options::Options; use crate::write::WriteFloat; -/// Check if a buffer is sufficiently large. -#[inline(always)] -fn check_buffer(len: usize, options: &Options) -> bool -where - T: FormattedSize, -{ - let size = Options::buffer_size::(options); - len >= size -} - // API const DEFAULT_OPTIONS: Options = Options::new(); @@ -36,13 +24,10 @@ macro_rules! float_to_lexical { fn to_lexical(self, bytes: &mut [u8]) -> &mut [u8] { - // TODO: Remove, move inside - assert!(check_buffer::(bytes.len(), &DEFAULT_OPTIONS)); + let len = self.write_float::<{ STANDARD }>(bytes, &DEFAULT_OPTIONS); // SAFETY: safe since `check_buffer::(bytes.len(), &options)` passes. - unsafe { - let len = self.write_float::<{ STANDARD }>(bytes, &DEFAULT_OPTIONS); - &mut index_unchecked_mut!(bytes[..len]) - } + // TODO: Make safe + unsafe { &mut index_unchecked_mut!(bytes[..len]) } } } @@ -55,13 +40,10 @@ macro_rules! float_to_lexical { options: &Self::Options, ) -> &'a mut [u8] { - // TODO: Remove, move inside - assert!(check_buffer::(bytes.len(), &options)); + let len = self.write_float::<{ FORMAT }>(bytes, &options); // SAFETY: safe since `check_buffer::(bytes.len(), &options)` passes. - unsafe { - let len = self.write_float::<{ FORMAT }>(bytes, &options); - &mut index_unchecked_mut!(bytes[..len]) - } + // TODO: Make safe + unsafe { &mut index_unchecked_mut!(bytes[..len]) } } } )*) diff --git a/lexical-write-float/src/lib.rs b/lexical-write-float/src/lib.rs index 3e2c95d9..0664c01a 100644 --- a/lexical-write-float/src/lib.rs +++ b/lexical-write-float/src/lib.rs @@ -58,6 +58,15 @@ //! The minimum, standard, required version is 1.63.0, for const generic //! support. Older versions of lexical support older Rust versions. //! +//! # Safety +//! +//! TODO: Validate this +//! The `unsafe` usage in the options does not actually have any actual safety +//! concerns unless the format is not validated: the float formatters assume +//! NaN and Infinite strings are <= 50 characters. Creating custom format +//! specification with longer special strings and not validating the : incorrectly using the API however can cause incorrect floating +//! point specifications due to conflicting requirements. +//! //! # Design //! //! - [Algorithm Approach](https://github.com/Alexhuszagh/rust-lexical/blob/main/lexical-write-float/docs/Algorithm.md) diff --git a/lexical-write-float/src/shared.rs b/lexical-write-float/src/shared.rs index eaedfbd8..0c5d60f7 100644 --- a/lexical-write-float/src/shared.rs +++ b/lexical-write-float/src/shared.rs @@ -175,6 +175,9 @@ pub unsafe fn write_exponent( /// Detect the notation to use for the float formatter and call the appropriate /// function. /// +/// The float must be positive. This doesn't affect the safety guarantees but all +/// algorithms assume a float >0 or that is not negative 0. +/// /// - `float` - The float to write to string. /// - `format` - The formatting specification for the float. /// - `sci_exp` - The scientific exponents describing the float. @@ -199,6 +202,8 @@ macro_rules! write_float { ) => {{ use lexical_util::format::NumberFormat; + debug_assert!($float.is_sign_positive()); + let format = NumberFormat::<{ $format }> {}; let min_exp = $options.negative_exponent_break().map_or(-5, |x| x.get()); let max_exp = $options.positive_exponent_break().map_or(9, |x| x.get()); @@ -213,13 +218,6 @@ macro_rules! write_float { // Write negative exponent without scientific notation. // SAFETY: safe as long as bytes is large enough to hold all the digits. unsafe { $write_negative::<$($generic,)? FORMAT>($bytes, $($args,)*) } - } else if $float.is_sign_negative() { - // handle this as a positive, just write a leading '-' and then add 1 to our count - // # Safety: This is always safe since our buffer is much larger than 1 byte. - unsafe { index_unchecked_mut!($bytes[0]) = b'-'; } - // # Safety: This is always safe since our buffer is much larger than 1 byte. - let bytes = unsafe { &mut index_unchecked_mut!($bytes[1..]) }; - unsafe { $write_positive::<$($generic,)? FORMAT>(bytes, $($args,)*) + 1 } } else { // Write positive exponent without scientific notation. // SAFETY: safe as long as bytes is large enough to hold all the digits. diff --git a/lexical-write-float/src/write.rs b/lexical-write-float/src/write.rs index ff8a81fe..6c2e7c97 100644 --- a/lexical-write-float/src/write.rs +++ b/lexical-write-float/src/write.rs @@ -8,11 +8,14 @@ use lexical_util::{algorithm::copy_to_dst, constants::FormattedSize}; #[cfg(feature = "f16")] use lexical_util::f16::f16; use lexical_util::format::NumberFormat; +use lexical_util::options::WriteOptions; use lexical_write_integer::write::WriteInteger; /// Select the back-end. #[cfg(not(feature = "compact"))] use crate::algorithm::write_float as write_float_decimal; +#[cfg(feature = "compact")] +use crate::compact::write_float as write_float_decimal; #[cfg(feature = "power-of-two")] use crate::binary; use crate::float::RawFloat; @@ -34,7 +37,6 @@ fn write_special(bytes: &mut [u8], special: Option<&[u8]>, error: &'static str) } } - /// Write an NaN string to the buffer. #[inline(always)] fn write_nan(bytes: &mut[u8], options: &Options, count: usize) -> usize { @@ -55,8 +57,18 @@ fn write_inf(bytes: &mut[u8], options: &Options, count: usize) -> usize { ) } +/// Check if a buffer is sufficiently large. +#[inline(always)] +fn check_buffer(len: usize, options: &Options) -> bool +where + T: FormattedSize, +{ + let size = Options::buffer_size::(options); + len >= size +} + /// Write float trait. -pub trait WriteFloat: RawFloat { +pub trait WriteFloat: RawFloat + FormattedSize { /// Forward write integer parameters to an unoptimized backend. /// /// # Safety @@ -81,13 +93,12 @@ pub trait WriteFloat: RawFloat { /// [`FORMATTED_SIZE`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE /// [`FORMATTED_SIZE_DECIMAL`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE_DECIMAL #[cfg_attr(not(feature = "compact"), inline(always))] - unsafe fn write_float(self, bytes: &mut [u8], options: &Options) -> usize + fn write_float(self, bytes: &mut [u8], options: &Options) -> usize where Self::Unsigned: FormattedSize + WriteInteger, { - // TODO: Make safe - // Validate our format options. + assert!(check_buffer::(bytes.len(), options)); let format = NumberFormat:: {}; assert!(format.is_valid()); // Avoid any false assumptions for 128-bit floats. @@ -103,7 +114,10 @@ pub trait WriteFloat: RawFloat { } } - let (float, count, bytes) = if self < Self::ZERO { + // TODO: This doesn't handle **negative** zero... + // Well, it does it inside which defeats the point... + // TODO: Make safe + let (float, count, bytes) = if self.needs_negative_sign() { // SAFETY: safe if `bytes.len() > 1`. unsafe { index_unchecked_mut!(bytes[0]) = b'-' }; (-self, 1, unsafe { &mut index_unchecked_mut!(bytes[1..]) }) @@ -175,11 +189,10 @@ macro_rules! write_float_as_f32 { ($($t:ty)*) => ($( impl WriteFloat for $t { #[inline(always)] - unsafe fn write_float(self, bytes: &mut [u8], options: &Options) -> usize + fn write_float(self, bytes: &mut [u8], options: &Options) -> usize { // SAFETY: safe if `bytes` is large enough to hold the written bytes. - // TODO: Make safe - unsafe { self.as_f32().write_float::(bytes, options) } + self.as_f32().write_float::(bytes, options) } } )*) From e919a4171edfc7ee19b9ba5d19dc3482fa571368 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 12:14:18 -0500 Subject: [PATCH 04/14] Make the dragonbox algorithm safe. This converts the core dragonbox algorithm for the float decimal writers to be completely safe. This ensures effectively all non-trivial uses of unsafe that could cause unsoundness are removed for 99% of users. --- lexical-benchmark/write-float/special.rs | 1 + lexical-write-float/src/algorithm.rs | 84 ++++++++------------ lexical-write-float/src/lib.rs | 3 +- lexical-write-float/src/shared.rs | 4 +- lexical-write-float/src/write.rs | 46 +++++------ lexical-write-float/tests/algorithm_tests.rs | 4 +- 6 files changed, 64 insertions(+), 78 deletions(-) diff --git a/lexical-benchmark/write-float/special.rs b/lexical-benchmark/write-float/special.rs index d896162a..36fa15b9 100644 --- a/lexical-benchmark/write-float/special.rs +++ b/lexical-benchmark/write-float/special.rs @@ -19,6 +19,7 @@ macro_rules! gen_vec { let value = fastrand::$i($exp_mask..); // NOTE: We want mem::transmute, not from_bits because we // don't want the special handling of from_bits + #[allow(clippy::transmute_int_to_float)] vec.push(unsafe { mem::transmute::<$i, $f>(value) }); } vec diff --git a/lexical-write-float/src/algorithm.rs b/lexical-write-float/src/algorithm.rs index 4e72313a..58b44390 100644 --- a/lexical-write-float/src/algorithm.rs +++ b/lexical-write-float/src/algorithm.rs @@ -169,24 +169,22 @@ pub unsafe fn write_float_negative_exponent= 2); - // SAFETY: safe, if we have enough bytes to write the significant digits. - unsafe { - // We write 0 digits even over the decimal point, since we might have - // to round carry over. This is rare, but it could happen, and would - // require a shift after. The good news is: if we have a shift, we - // only need to move 1 digit. - let digits = &mut index_unchecked_mut!(bytes[..cursor]); - slice_fill_unchecked!(digits, b'0'); - } + // We write 0 digits even over the decimal point, since we might have + // to round carry over. This is rare, but it could happen, and would + // require a shift after. The good news is: if we have a shift, we + // only need to move 1 digit. + let digits: &mut [u8] = &mut bytes[..cursor]; + digits.fill(b'0'); // Write out our significant digits. // SAFETY: safe, if we have enough bytes to write the significant digits. - let digits = unsafe { &mut index_unchecked_mut!(bytes[cursor..]) }; + let digits = &mut bytes[cursor..]; let digit_count = unsafe { F::write_digits(digits, fp.mant) }; // Truncate and round the significant digits. // SAFETY: safe since `cursor > 0 && cursor < digits.len()`. debug_assert!(cursor > 0); + // TODO: make safe let (digit_count, carried) = unsafe { shared::truncate_and_round_decimal(digits, digit_count, options) }; @@ -227,9 +225,10 @@ pub unsafe fn write_float_negative_exponent(float: F, shorter: bool) -> Ex // DIGITS // ------ -// NOTE: -// Dragonbox has a heavily-branched, dubiously optimized algorithm using -// fast division, that leads to no practical performance benefits in my -// benchmarks, and the division algorithm is at best ~3% faster. It also -// tries to avoid writing digits extensively, but requires division -// operations for each step regardless, which means the **actual** overhead -// of said branching likely exceeds any benefits. The code is also -// impossible to maintain, and in my benchmarks is slower (by a small -// amount) for a 32-bit mantissa, and a **lot** slower for a 64-bit -// mantissa, where we need to trim trailing zeros. +// NOTE: Dragonbox has a heavily-branched, dubiously optimized algorithm using +// fast division, that leads to no practical performance benefits in my +// benchmarks, and the division algorithm is at best ~3% faster. It also tries +// to avoid writing digits extensively, but requires division operations for +// each step regardless, which means the **actual** overhead of said branching +// likely exceeds any benefits. The code is also impossible to maintain, and in +// my benchmarks is slower (by a small amount) for a 32-bit mantissa, and a +// **lot** slower for a 64-bit mantissa, where we need to trim trailing zeros. /// Write the significant digits, when the significant digits can fit in a -/// 32-bit integer. Returns the number of digits written. This assumes any -/// trailing zeros have been removed. +/// 32-bit integer. `log10(2**32-1) < 10`, so 10 digits is always enough. /// -/// # Safety -/// -/// Safe if `bytes.len() >= 10`, since `u32::MAX` is at most 10 digits. +/// Returns the number of digits written. This assumes any trailing zeros have +/// been removed. #[inline(always)] -pub unsafe fn write_digits_u32(bytes: &mut [u8], mantissa: u32) -> usize { +pub fn write_digits_u32(bytes: &mut [u8], mantissa: u32) -> usize { debug_assert!(bytes.len() >= 10); - unsafe { mantissa.write_mantissa::(bytes) } + mantissa.write_mantissa::(bytes) } /// Write the significant digits, when the significant digits cannot fit in a @@ -808,16 +803,12 @@ pub unsafe fn write_digits_u32(bytes: &mut [u8], mantissa: u32) -> usize { /// /// Returns the number of digits written. Note that this might not be the /// same as the number of digits in the mantissa, since trailing zeros will -/// be removed. -/// -/// # Safety -/// -/// Safe if `bytes.len() >= 20`, since `u64::MAX` is at most 20 digits. +/// be removed. `log10(2**64-1) < 20`, so 20 digits is always enough. #[inline(always)] #[allow(clippy::branches_sharing_code)] -pub unsafe fn write_digits_u64(bytes: &mut [u8], mantissa: u64) -> usize { +pub fn write_digits_u64(bytes: &mut [u8], mantissa: u64) -> usize { debug_assert!(bytes.len() >= 20); - unsafe { mantissa.write_mantissa::(bytes) } + mantissa.write_mantissa::(bytes) } // EXTENDED @@ -1201,12 +1192,9 @@ pub trait DragonboxFloat: Float { fn digit_count(mantissa: u64) -> usize; /// Write the significant digits to a buffer. - /// Does not handle rounding or truncated digits. /// - /// # Safety - /// - /// Safe if `bytes` is large enough to hold a decimal string for mantissa. - unsafe fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize; + /// Does not handle rounding or truncated digits. + fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize; /// Get the pre-computed Dragonbox power from the exponent. /// @@ -1268,10 +1256,9 @@ impl DragonboxFloat for f32 { } #[inline(always)] - unsafe fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize { + fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize { debug_assert!(mantissa <= u32::MAX as u64); - // SAFETY: safe is `bytes.len() >= 10`. - unsafe { write_digits_u32(bytes, mantissa as u32) } + write_digits_u32(bytes, mantissa as u32) } #[inline(always)] @@ -1379,9 +1366,8 @@ impl DragonboxFloat for f64 { } #[inline(always)] - unsafe fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize { - // SAFETY: safe if `bytes.len() >= 20`. - unsafe { write_digits_u64(bytes, mantissa) } + fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize { + write_digits_u64(bytes, mantissa) } #[inline(always)] @@ -1528,7 +1514,7 @@ macro_rules! dragonbox_unimpl { } #[inline(always)] - unsafe fn write_digits(_: &mut [u8], _: u64) -> usize { + fn write_digits(_: &mut [u8], _: u64) -> usize { unimplemented!() } diff --git a/lexical-write-float/src/lib.rs b/lexical-write-float/src/lib.rs index 0664c01a..e1f71059 100644 --- a/lexical-write-float/src/lib.rs +++ b/lexical-write-float/src/lib.rs @@ -64,7 +64,8 @@ //! The `unsafe` usage in the options does not actually have any actual safety //! concerns unless the format is not validated: the float formatters assume //! NaN and Infinite strings are <= 50 characters. Creating custom format -//! specification with longer special strings and not validating the : incorrectly using the API however can cause incorrect floating +//! specification with longer special strings and not validating the : +//! incorrectly using the API however can cause incorrect floating //! point specifications due to conflicting requirements. //! //! # Design diff --git a/lexical-write-float/src/shared.rs b/lexical-write-float/src/shared.rs index 0c5d60f7..a8d1e4a2 100644 --- a/lexical-write-float/src/shared.rs +++ b/lexical-write-float/src/shared.rs @@ -175,8 +175,8 @@ pub unsafe fn write_exponent( /// Detect the notation to use for the float formatter and call the appropriate /// function. /// -/// The float must be positive. This doesn't affect the safety guarantees but all -/// algorithms assume a float >0 or that is not negative 0. +/// The float must be positive. This doesn't affect the safety guarantees but +/// all algorithms assume a float >0 or that is not negative 0. /// /// - `float` - The float to write to string. /// - `format` - The formatting specification for the float. diff --git a/lexical-write-float/src/write.rs b/lexical-write-float/src/write.rs index 6c2e7c97..e18599ff 100644 --- a/lexical-write-float/src/write.rs +++ b/lexical-write-float/src/write.rs @@ -4,20 +4,20 @@ #[cfg(feature = "f16")] use lexical_util::bf16::bf16; -use lexical_util::{algorithm::copy_to_dst, constants::FormattedSize}; #[cfg(feature = "f16")] use lexical_util::f16::f16; use lexical_util::format::NumberFormat; use lexical_util::options::WriteOptions; +use lexical_util::{algorithm::copy_to_dst, constants::FormattedSize}; use lexical_write_integer::write::WriteInteger; /// Select the back-end. #[cfg(not(feature = "compact"))] use crate::algorithm::write_float as write_float_decimal; -#[cfg(feature = "compact")] -use crate::compact::write_float as write_float_decimal; #[cfg(feature = "power-of-two")] use crate::binary; +#[cfg(feature = "compact")] +use crate::compact::write_float as write_float_decimal; use crate::float::RawFloat; #[cfg(feature = "power-of-two")] use crate::hex; @@ -39,22 +39,24 @@ fn write_special(bytes: &mut [u8], special: Option<&[u8]>, error: &'static str) /// Write an NaN string to the buffer. #[inline(always)] -fn write_nan(bytes: &mut[u8], options: &Options, count: usize) -> usize { - count + write_special( - bytes, - options.nan_string(), - "NaN explicitly disabled but asked to write NaN as string." - ) +fn write_nan(bytes: &mut [u8], options: &Options, count: usize) -> usize { + count + + write_special( + bytes, + options.nan_string(), + "NaN explicitly disabled but asked to write NaN as string.", + ) } /// Write an Inf string to the buffer. #[inline(always)] -fn write_inf(bytes: &mut[u8], options: &Options, count: usize) -> usize { - count + write_special( - bytes, - options.inf_string(), - "Inf explicitly disabled but asked to write Inf as string." - ) +fn write_inf(bytes: &mut [u8], options: &Options, count: usize) -> usize { + count + + write_special( + bytes, + options.inf_string(), + "Inf explicitly disabled but asked to write Inf as string.", + ) } /// Check if a buffer is sufficiently large. @@ -106,6 +108,7 @@ pub trait WriteFloat: RawFloat + FormattedSize { #[cfg(feature = "power-of-two")] { + // FIXME: I believe this incorrectly handles a few cases. if format.radix() != format.exponent_base() { assert!(matches!( (format.radix(), format.exponent_base()), @@ -114,17 +117,12 @@ pub trait WriteFloat: RawFloat + FormattedSize { } } - // TODO: This doesn't handle **negative** zero... - // Well, it does it inside which defeats the point... - // TODO: Make safe let (float, count, bytes) = if self.needs_negative_sign() { - // SAFETY: safe if `bytes.len() > 1`. - unsafe { index_unchecked_mut!(bytes[0]) = b'-' }; - (-self, 1, unsafe { &mut index_unchecked_mut!(bytes[1..]) }) + bytes[0] = b'-'; + (-self, 1, &mut bytes[1..]) } else if cfg!(feature = "format") && format.required_mantissa_sign() { - // SAFETY: safe if `bytes.len() > 1`. - unsafe { index_unchecked_mut!(bytes[0]) = b'+' }; - (self, 1, unsafe { &mut index_unchecked_mut!(bytes[1..]) }) + bytes[0] = b'+'; + (self, 1, &mut bytes[1..]) } else { (self, 0, bytes) }; diff --git a/lexical-write-float/tests/algorithm_tests.rs b/lexical-write-float/tests/algorithm_tests.rs index 84df4a2d..f65c3e6a 100644 --- a/lexical-write-float/tests/algorithm_tests.rs +++ b/lexical-write-float/tests/algorithm_tests.rs @@ -220,7 +220,7 @@ fn compute_right_closed_directed_test() { } fn write_digits_f32(buffer: &mut [u8], value: u64, expected: &str) { - let count = unsafe { f32::write_digits(buffer, value) }; + let count = f32::write_digits(buffer, value); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -237,7 +237,7 @@ fn write_digits_f32_test() { } fn write_digits_f64(buffer: &mut [u8], value: u64, expected: &str) { - let count = unsafe { f64::write_digits(buffer, value) }; + let count = f64::write_digits(buffer, value); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } From ac23885ffb920ca83aa56eb7a1d17c1ea4013528 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 13:08:05 -0500 Subject: [PATCH 05/14] Remove more unsafe code. --- lexical-write-float/src/algorithm.rs | 35 +++++++++++++++++++++------- lexical-write-float/src/shared.rs | 20 ++++++---------- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/lexical-write-float/src/algorithm.rs b/lexical-write-float/src/algorithm.rs index 58b44390..11c285e9 100644 --- a/lexical-write-float/src/algorithm.rs +++ b/lexical-write-float/src/algorithm.rs @@ -65,6 +65,7 @@ pub unsafe fn write_float( // and write the significant digits without using an intermediate buffer // in most cases. + // TODO: This should be safe now write_float!( float, FORMAT, @@ -102,7 +103,7 @@ pub unsafe fn write_float_scientific( // for the decimal point without intermediate buffers. // SAFETY: safe, if we have enough bytes to write the significant digits. let digits = unsafe { &mut index_unchecked_mut!(bytes[1..]) }; - let digit_count = unsafe { F::write_digits(digits, fp.mant) }; + let digit_count = F::write_digits(digits, fp.mant); // Truncate and round the significant digits. // SAFETY: safe since `digit_count < digits.len()`. @@ -127,7 +128,7 @@ pub unsafe fn write_float_scientific( cursor = digit_count + 1; let zeros = exact_count - digit_count; unsafe { - slice_fill_unchecked!(index_unchecked_mut!(bytes[cursor..cursor + zeros]), b'0'); + index_unchecked_mut!(bytes[cursor..cursor + zeros]).fill(b'0'); } cursor += zeros; } else if digit_count == 1 { @@ -179,7 +180,7 @@ pub unsafe fn write_float_negative_exponent 0 && cursor < digits.len()`. @@ -437,6 +438,10 @@ pub fn compute_nearest_shorter(float: F) -> ExtendedFloat80 { // Compute xi and zi. // SAFETY: safe, since value must be finite and therefore in the correct range. + // `-324 <= exponent <= 308`, so `x * log10(2) - log10(4 / 3)` must be in + // `-98 <= x <= 93`, so the final value must be in [-93, 98] (for f64). We have + // precomputed powers for [-292, 326] for f64 (same logic applies for f32) so this + // is **ALWAYS** safe. let pow5 = unsafe { F::dragonbox_power(-minus_k) }; let mut xi = F::compute_left_endpoint(&pow5, beta); let mut zi = F::compute_right_endpoint(&pow5, beta); @@ -501,6 +506,10 @@ pub fn compute_nearest_normal(float: F) -> ExtendedFloat80 { // Compute k and beta. let minus_k = floor_log10_pow2(exponent) - F::KAPPA as i32; // SAFETY: safe, since value must be finite and therefore in the correct range. + // `-324 <= exponent <= 308`, so `x * log10(2)` must be in + // `-98 <= x <= 93`, so the final value must be in [-93, 98] (for f64). We have + // precomputed powers for [-292, 326] for f64 (same logic applies for f32) so this + // is **ALWAYS** safe. let pow5 = unsafe { F::dragonbox_power(-minus_k) }; let beta = exponent + floor_log2_pow10(-minus_k); @@ -632,6 +641,9 @@ pub fn compute_left_closed_directed(float: F) -> ExtendedFloat80 { // Compute k and beta. let minus_k = floor_log10_pow2(exponent) - F::KAPPA as i32; // SAFETY: safe, since value must be finite and therefore in the correct range. + // `-324 <= exponent <= 308`, so `x * log10(2)` must be in [-98, 93] (for f64). + // We have precomputed powers for [-292, 326] for f64 (same logic applies for f32) + // so this is **ALWAYS** safe. let pow5 = unsafe { F::dragonbox_power(-minus_k) }; let beta = exponent + floor_log2_pow10(-minus_k); @@ -711,14 +723,22 @@ pub fn compute_left_closed_directed(float: F) -> ExtendedFloat80 { /// Compute the interval I = (w−,w].. #[allow(clippy::comparison_chain, clippy::if_same_then_else)] pub fn compute_right_closed_directed(float: F, shorter: bool) -> ExtendedFloat80 { + // ensure our floats have a maximum exp in the range [-324, 308]. + assert!(F::BITS <= 64, "cannot guarantee safety invariants with 128-bit floats"); + let mantissa = float.mantissa().as_u64(); let exponent = float.exponent(); // Step 1: Schubfach multiplier calculation + // Exponent must be in the range [-324, 308] // Compute k and beta. let minus_k = floor_log10_pow2(exponent - shorter as i32) - F::KAPPA as i32; + assert!(F::KAPPA <= 2); // SAFETY: safe, since value must be finite and therefore in the correct range. - let pow5 = unsafe { F::dragonbox_power(-minus_k) }; + // `-324 <= exponent <= 308`, so `x * log10(2)` must be in [-100, 92] (for f64). + // We have precomputed powers for [-292, 326] for f64 (same logic applies for f32) + // so this is **ALWAYS** safe. + let pow5: ::Power = unsafe { F::dragonbox_power(-minus_k) }; let beta = exponent + floor_log2_pow10(-minus_k); // Compute zi and deltai. @@ -766,10 +786,9 @@ pub fn compute_right_closed_directed(float: F, shorter: bool) -> Ex significand *= 10; significand -= F::div_pow10(r) as u64; - // Ensure we haven't re-assigned exponent or minus_k, since this - // is a massive potential security vulnerability. - debug_assert!(float.exponent() == exponent); - debug_assert!(minus_k == floor_log10_pow2(exponent - shorter as i32) - F::KAPPA as i32); + // Ensure we haven't re-assigned exponent or minus_k. + assert!(float.exponent() == exponent); + debug_assert!(minus_k == floor_log10_pow2(float.exponent() - shorter as i32) - F::KAPPA as i32); extended_float(significand, minus_k + F::KAPPA as i32) } diff --git a/lexical-write-float/src/shared.rs b/lexical-write-float/src/shared.rs index a8d1e4a2..1aa0e1b8 100644 --- a/lexical-write-float/src/shared.rs +++ b/lexical-write-float/src/shared.rs @@ -20,25 +20,19 @@ pub fn min_exact_digits(digit_count: usize, options: &Options) -> usize { /// /// Round up the last digit, incrementally handling all subsequent /// digits in case of overflow. -/// -/// # Safety -/// -/// Safe as long as `count <= digits.len()`. #[cfg_attr(not(feature = "compact"), inline(always))] -pub unsafe fn round_up(digits: &mut [u8], count: usize, radix: u32) -> (usize, bool) { - debug_assert!(count <= digits.len()); +pub fn round_up(digits: &mut [u8], count: usize, radix: u32) -> (usize, bool) { + debug_assert!(count <= digits.len(), "rounding up requires digits.len() >= count"); let mut index = count; let max_char = digit_to_char_const(radix - 1, radix); while index != 0 { - // SAFETY: safe if `count <= digits.len()`, since then - // `index > 0 && index <= digits.len()`. - let c = unsafe { index_unchecked!(digits[index - 1]) }; + let c = digits[index - 1]; if c < max_char { // SAFETY: safe since `index > 0 && index <= digits.len()`. let digit = char_to_valid_digit_const(c, radix); let rounded = digit_to_char_const(digit + 1, radix); - unsafe { index_unchecked_mut!(digits[index - 1]) = rounded }; + digits[index - 1] = rounded; return (index, false); } // Don't have to assign b'0' otherwise, since we're just carrying @@ -47,8 +41,7 @@ pub unsafe fn round_up(digits: &mut [u8], count: usize, radix: u32) -> (usize, b } // Means all digits were max digit: we need to round up. - // SAFETY: safe since `digits.len() > 1`. - unsafe { index_unchecked_mut!(digits[0]) = b'1' }; + digits[0] = b'1'; (1, true) } @@ -93,6 +86,7 @@ pub unsafe fn truncate_and_round_decimal( // Get the last non-truncated digit, and the remaining ones. // SAFETY: safe if `digit_count < digits.len()`, since `max_digits < // digit_count`. + // TODO: Make safe let truncated = unsafe { index_unchecked!(digits[max_digits]) }; let (digits, carried) = if truncated < b'5' { // Just truncate, going to round-down anyway. @@ -167,7 +161,7 @@ pub unsafe fn write_exponent( *cursor += unsafe { index_unchecked_mut!(bytes[*cursor]) = exponent_character; *cursor += 1; - let positive_exp = write_exponent_sign::(bytes, cursor, exp); + let positive_exp: u32 = write_exponent_sign::(bytes, cursor, exp); positive_exp.write_exponent::(&mut index_unchecked_mut!(bytes[*cursor..])) }; } From 41fd62a3d468def52ccc85c56a9a0510bf73b654 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 13:39:50 -0500 Subject: [PATCH 06/14] Migrate to much more safe code. This causes ~0.5-1% regression in performance but this is well within an acceptable range for better safety guarantees. Most of the bounds checking is ellided by the compiler. --- lexical-write-float/src/algorithm.rs | 19 ++++++------------- lexical-write-float/src/shared.rs | 19 ++++++------------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/lexical-write-float/src/algorithm.rs b/lexical-write-float/src/algorithm.rs index 11c285e9..0ad05aa1 100644 --- a/lexical-write-float/src/algorithm.rs +++ b/lexical-write-float/src/algorithm.rs @@ -102,13 +102,11 @@ pub unsafe fn write_float_scientific( // Write the significant digits. Write at index 1, so we can shift 1 // for the decimal point without intermediate buffers. // SAFETY: safe, if we have enough bytes to write the significant digits. - let digits = unsafe { &mut index_unchecked_mut!(bytes[1..]) }; + let digits = &mut bytes[1..]; let digit_count = F::write_digits(digits, fp.mant); // Truncate and round the significant digits. - // SAFETY: safe since `digit_count < digits.len()`. - let (digit_count, carried) = - unsafe { shared::truncate_and_round_decimal(digits, digit_count, options) }; + let (digit_count, carried) = shared::truncate_and_round_decimal(digits, digit_count, options); let sci_exp = sci_exp + carried as i32; // Determine the exact number of digits to write. @@ -183,11 +181,8 @@ pub unsafe fn write_float_negative_exponent 0 && cursor < digits.len()`. - debug_assert!(cursor > 0); - // TODO: make safe - let (digit_count, carried) = - unsafe { shared::truncate_and_round_decimal(digits, digit_count, options) }; + debug_assert!(cursor > 0, "underflowed our digits"); + let (digit_count, carried) = shared::truncate_and_round_decimal(digits, digit_count, options); // Handle any trailing digits. let mut trimmed = false; @@ -261,10 +256,8 @@ pub unsafe fn write_float_positive_exponent b'5' { // Round-up always. - // SAFETY: safe if `digit_count <= digits.len()`, because `max_digits < - // digit_count`. - unsafe { round_up(digits, max_digits, 10) } + round_up(digits, max_digits, 10) } else { // Have a near-halfway case, resolve it. - // SAFETY: safe if `digit_count < digits.len()`. - let (is_odd, is_above) = unsafe { - let to_round = &index_unchecked!(digits[max_digits - 1..digit_count]); - let is_odd = index_unchecked!(to_round[0]) % 2 == 1; - let is_above = index_unchecked!(to_round[2..]).iter().any(|&x| x != b'0'); - (is_odd, is_above) - }; + let to_round = &digits[max_digits - 1..digit_count]; + let is_odd = to_round[0] % 2 == 1; + let is_above = to_round[2..].iter().any(|&x| x != b'0'); if is_odd || is_above { // SAFETY: safe if `digit_count <= digits.len()`, because `max_digits < // digit_count`. - unsafe { round_up(digits, max_digits, 10) } + round_up(digits, max_digits, 10) } else { (max_digits, false) } From 4f6a67ab64fa497016fef95406926bf477576690 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 14:12:43 -0500 Subject: [PATCH 07/14] Remove all remaining potential unsoundness in the decimal writer. This in general for simple cases due to slight refactoring improves performance, however, it does have a noticeable (+2-7%) penalty in some corner cases which invoke the close-to-halfway rounding cases. That said, performance of this implementation of the Dragonbox algorithm without any unsafety always outperforms Ryu, which means it's still the fastest implementation out there even with slight regressions in corner cases. --- lexical-write-float/src/algorithm.rs | 173 ++++++++----------- lexical-write-float/src/compact.rs | 3 +- lexical-write-float/src/shared.rs | 37 ++-- lexical-write-float/tests/algorithm_tests.rs | 18 +- 4 files changed, 92 insertions(+), 139 deletions(-) diff --git a/lexical-write-float/src/algorithm.rs b/lexical-write-float/src/algorithm.rs index 0ad05aa1..fc8a11e7 100644 --- a/lexical-write-float/src/algorithm.rs +++ b/lexical-write-float/src/algorithm.rs @@ -48,7 +48,7 @@ use crate::table::*; /// is large enough to hold the significant digits. // TODO: Migrate to safe #[inline(always)] -pub unsafe fn write_float( +pub fn write_float( float: F, bytes: &mut [u8], options: &Options, @@ -65,29 +65,27 @@ pub unsafe fn write_float( // and write the significant digits without using an intermediate buffer // in most cases. + // SAFETY: All the underlying methods are safe, this just needs to have other + // API compatibility and this will remove the unsafe block. // TODO: This should be safe now - write_float!( - float, - FORMAT, - sci_exp, - options, - write_float_scientific, - write_float_positive_exponent, - write_float_negative_exponent, - generic => F, - bytes => bytes, - args => fp, sci_exp, options, - ) + unsafe { + write_float!( + float, + FORMAT, + sci_exp, + options, + write_float_scientific, + write_float_positive_exponent, + write_float_negative_exponent, + generic => F, + bytes => bytes, + args => fp, sci_exp, options, + ) + } } /// Write float to string in scientific notation. -/// -/// # Safety -/// -/// Safe as long as `bytes` is large enough to hold the number of digits -/// and the scientific notation's exponent digits. -// TODO: Migrate to safe -pub unsafe fn write_float_scientific( +pub fn write_float_scientific( bytes: &mut [u8], fp: ExtendedFloat80, sci_exp: i32, @@ -113,46 +111,35 @@ pub unsafe fn write_float_scientific( let exact_count = shared::min_exact_digits(digit_count, options); // Write any trailing digits. - // SAFETY: safe, if we have enough bytes to write the significant digits. let mut cursor: usize; - unsafe { - index_unchecked_mut!(bytes[0] = bytes[1]); - index_unchecked_mut!(bytes[1]) = decimal_point; - - if !format.no_exponent_without_fraction() && digit_count == 1 && options.trim_floats() { - cursor = 1; - } else if digit_count < exact_count { - // Adjust the number of digits written, by appending zeros. - cursor = digit_count + 1; - let zeros = exact_count - digit_count; - unsafe { - index_unchecked_mut!(bytes[cursor..cursor + zeros]).fill(b'0'); - } - cursor += zeros; - } else if digit_count == 1 { - index_unchecked_mut!(bytes[2]) = b'0'; - cursor = 3; - } else { - cursor = digit_count + 1; - } + bytes[0] = bytes[1]; + bytes[1] = decimal_point; + if !format.no_exponent_without_fraction() && digit_count == 1 && options.trim_floats() { + cursor = 1; + } else if digit_count < exact_count { + // Adjust the number of digits written, by appending zeros. + cursor = digit_count + 1; + let zeros = exact_count - digit_count; + bytes[cursor..cursor + zeros].fill(b'0'); + cursor += zeros; + } else if digit_count == 1 { + bytes[2] = b'0'; + cursor = 3; + } else { + cursor = digit_count + 1; } // Now, write our scientific notation. // SAFETY: safe since bytes must be large enough to store all digits. - unsafe { shared::write_exponent::(bytes, &mut cursor, sci_exp, options.exponent()) }; + shared::write_exponent::(bytes, &mut cursor, sci_exp, options.exponent()); cursor } /// Write negative float to string without scientific notation. -/// Has a negative exponent (shift right) and no scientific notation. /// -/// # Safety -/// -/// Safe as long as `bytes` is large enough to hold the number of -/// significant digits and the leading zeros. -// TODO: Migrate to safe -pub unsafe fn write_float_negative_exponent( +/// Has a negative exponent (shift right) and no scientific notation. +pub fn write_float_negative_exponent( bytes: &mut [u8], fp: ExtendedFloat80, sci_exp: i32, @@ -167,12 +154,12 @@ pub unsafe fn write_float_negative_exponent= 2); + debug_assert!(cursor >= 2, "must have a buffer >= 2 to write our 0 digits"); // We write 0 digits even over the decimal point, since we might have // to round carry over. This is rare, but it could happen, and would // require a shift after. The good news is: if we have a shift, we // only need to move 1 digit. - let digits: &mut [u8] = &mut bytes[..cursor]; + let digits = &mut bytes[..cursor]; digits.fill(b'0'); // Write out our significant digits. @@ -189,28 +176,25 @@ pub unsafe fn write_float_negative_exponent= 3`. unsafe { - index_unchecked_mut!(bytes[0]) = b'1'; + bytes[0] = b'1'; if options.trim_floats() { cursor = 1; trimmed = true; } else { - index_unchecked_mut!(bytes[1]) = decimal_point; - index_unchecked_mut!(bytes[2]) = b'0'; + bytes[1] = decimal_point; + bytes[2] = b'0'; cursor = 3; } } } else if carried { // Carried, so we need to remove 1 zero before our digits. - // SAFETY: safe if `bytes.len() > cursor && cursor > 0`. unsafe { - index_unchecked_mut!(bytes[1]) = decimal_point; - index_unchecked_mut!(bytes[cursor - 1] = bytes[cursor]); + bytes[1] = decimal_point; + bytes[cursor - 1] = bytes[cursor]; } } else { - // SAFETY: safe if `bytes.len() >= 2`. - unsafe { index_unchecked_mut!(bytes[1]) = decimal_point }; + bytes[1] = decimal_point; cursor += digit_count; } @@ -221,11 +205,7 @@ pub unsafe fn write_float_negative_exponent( +/// Has a positive exponent (shift left) and no scientific notation. +pub fn write_float_positive_exponent( bytes: &mut [u8], fp: ExtendedFloat80, sci_exp: i32, @@ -257,7 +233,8 @@ pub unsafe fn write_float_positive_exponent= digit_count { // Great: we have more leading digits than we wrote, can write trailing zeros // and an optional decimal point. - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let digits = &mut index_unchecked_mut!(bytes[digit_count..leading_digits]); - digits.fill(b'0'); - } + bytes[digit_count..leading_digits].fill(b'0'); cursor = leading_digits; digit_count = leading_digits; // Only write decimal point if we're not trimming floats. if !options.trim_floats() { - // SAFETY: safe if `bytes.len() >= cursor + 2`. - unsafe { index_unchecked_mut!(bytes[cursor]) = decimal_point }; + bytes[cursor] = decimal_point; cursor += 1; - unsafe { index_unchecked_mut!(bytes[cursor]) = b'0' }; + bytes[cursor] = b'0'; cursor += 1; digit_count += 1; } else { @@ -287,20 +259,15 @@ pub unsafe fn write_float_positive_exponent count); - for i in (0..count).rev() { - index_unchecked_mut!(buf[i + 1] = buf[i]); - } + let buf = &mut bytes[leading_digits..digit_count + 1]; + assert!(buf.len() > count); + for i in (0..count).rev() { + buf[i + 1] = buf[i]; } // Now, write the decimal point. - // SAFETY: safe if the above step was safe, since `leading_digits < - // digit_count`. - unsafe { index_unchecked_mut!(bytes[leading_digits]) = decimal_point }; + bytes[leading_digits] = decimal_point; cursor = digit_count + 1; } @@ -313,11 +280,7 @@ pub unsafe fn write_float_positive_exponent digit_count { // Check if we need to write more trailing digits. let zeros = exact_count - digit_count; - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let digits = &mut index_unchecked_mut!(bytes[cursor..cursor + zeros]); - digits.fill(b'0'); - } + bytes[cursor..cursor + zeros].fill(b'0'); cursor += zeros; } @@ -433,8 +396,8 @@ pub fn compute_nearest_shorter(float: F) -> ExtendedFloat80 { // SAFETY: safe, since value must be finite and therefore in the correct range. // `-324 <= exponent <= 308`, so `x * log10(2) - log10(4 / 3)` must be in // `-98 <= x <= 93`, so the final value must be in [-93, 98] (for f64). We have - // precomputed powers for [-292, 326] for f64 (same logic applies for f32) so this - // is **ALWAYS** safe. + // precomputed powers for [-292, 326] for f64 (same logic applies for f32) so + // this is **ALWAYS** safe. let pow5 = unsafe { F::dragonbox_power(-minus_k) }; let mut xi = F::compute_left_endpoint(&pow5, beta); let mut zi = F::compute_right_endpoint(&pow5, beta); @@ -501,8 +464,8 @@ pub fn compute_nearest_normal(float: F) -> ExtendedFloat80 { // SAFETY: safe, since value must be finite and therefore in the correct range. // `-324 <= exponent <= 308`, so `x * log10(2)` must be in // `-98 <= x <= 93`, so the final value must be in [-93, 98] (for f64). We have - // precomputed powers for [-292, 326] for f64 (same logic applies for f32) so this - // is **ALWAYS** safe. + // precomputed powers for [-292, 326] for f64 (same logic applies for f32) so + // this is **ALWAYS** safe. let pow5 = unsafe { F::dragonbox_power(-minus_k) }; let beta = exponent + floor_log2_pow10(-minus_k); @@ -635,8 +598,8 @@ pub fn compute_left_closed_directed(float: F) -> ExtendedFloat80 { let minus_k = floor_log10_pow2(exponent) - F::KAPPA as i32; // SAFETY: safe, since value must be finite and therefore in the correct range. // `-324 <= exponent <= 308`, so `x * log10(2)` must be in [-98, 93] (for f64). - // We have precomputed powers for [-292, 326] for f64 (same logic applies for f32) - // so this is **ALWAYS** safe. + // We have precomputed powers for [-292, 326] for f64 (same logic applies for + // f32) so this is **ALWAYS** safe. let pow5 = unsafe { F::dragonbox_power(-minus_k) }; let beta = exponent + floor_log2_pow10(-minus_k); @@ -729,8 +692,8 @@ pub fn compute_right_closed_directed(float: F, shorter: bool) -> Ex assert!(F::KAPPA <= 2); // SAFETY: safe, since value must be finite and therefore in the correct range. // `-324 <= exponent <= 308`, so `x * log10(2)` must be in [-100, 92] (for f64). - // We have precomputed powers for [-292, 326] for f64 (same logic applies for f32) - // so this is **ALWAYS** safe. + // We have precomputed powers for [-292, 326] for f64 (same logic applies for + // f32) so this is **ALWAYS** safe. let pow5: ::Power = unsafe { F::dragonbox_power(-minus_k) }; let beta = exponent + floor_log2_pow10(-minus_k); @@ -781,7 +744,9 @@ pub fn compute_right_closed_directed(float: F, shorter: bool) -> Ex // Ensure we haven't re-assigned exponent or minus_k. assert!(float.exponent() == exponent); - debug_assert!(minus_k == floor_log10_pow2(float.exponent() - shorter as i32) - F::KAPPA as i32); + debug_assert!( + minus_k == floor_log10_pow2(float.exponent() - shorter as i32) - F::KAPPA as i32 + ); extended_float(significand, minus_k + F::KAPPA as i32) } diff --git a/lexical-write-float/src/compact.rs b/lexical-write-float/src/compact.rs index 1fe7c3f8..9fbda0a6 100644 --- a/lexical-write-float/src/compact.rs +++ b/lexical-write-float/src/compact.rs @@ -74,8 +74,7 @@ pub fn write_float( // SAFETY: safe since `digits.len()` is large enough to always hold // the generated digits, which is always <= 18. let (start, k) = grisu(float, &mut digits); - let (end, carried) = - unsafe { shared::truncate_and_round_decimal(&mut digits, start, options) }; + let (end, carried) = shared::truncate_and_round_decimal(&mut digits, start, options); (end, k + start as i32 - end as i32, carried) }; diff --git a/lexical-write-float/src/shared.rs b/lexical-write-float/src/shared.rs index 2c111632..a79489c2 100644 --- a/lexical-write-float/src/shared.rs +++ b/lexical-write-float/src/shared.rs @@ -47,17 +47,14 @@ pub fn round_up(digits: &mut [u8], count: usize, radix: u32) -> (usize, bool) { } /// Round the number of digits based on the maximum digits, for decimal digits. +/// /// `digits` is a mutable buffer of the current digits, `digit_count` is the /// length of the written digits in `digits`, and `exp` is the decimal exponent /// relative to the digits. Returns the digit count, resulting exp, and if /// the input carried to the next digit. -/// -/// # Safety -/// -/// Safe as long as `digit_count <= digits.len()`. #[allow(clippy::comparison_chain)] #[cfg_attr(not(feature = "compact"), inline(always))] -pub unsafe fn truncate_and_round_decimal( +pub fn truncate_and_round_decimal( digits: &mut [u8], digit_count: usize, options: &Options, @@ -111,26 +108,19 @@ pub unsafe fn truncate_and_round_decimal( } /// Write the sign for the exponent. -/// -/// # Safety -/// -/// Safe if `bytes` is large enough to hold the largest possible exponent, -/// with an extra byte for the sign. #[cfg_attr(not(feature = "compact"), inline(always))] -pub unsafe fn write_exponent_sign( +pub fn write_exponent_sign( bytes: &mut [u8], cursor: &mut usize, exp: i32, ) -> u32 { let format = NumberFormat::<{ FORMAT }> {}; if exp < 0 { - // SAFETY: safe if bytes is large enough to hold the output - unsafe { index_unchecked_mut!(bytes[*cursor]) = b'-' }; + bytes[*cursor] = b'-'; *cursor += 1; exp.wrapping_neg() as u32 } else if cfg!(feature = "format") && format.required_exponent_sign() { - // SAFETY: safe if bytes is large enough to hold the output - unsafe { index_unchecked_mut!(bytes[*cursor]) = b'+' }; + bytes[*cursor] = b'+'; *cursor += 1; exp as u32 } else { @@ -139,24 +129,17 @@ pub unsafe fn write_exponent_sign( } /// Write the symbol, sign, and digits for the exponent. -/// -/// # Safety -/// -/// Safe if the buffer can hold all the significant digits and the sign -/// starting from cursor. #[cfg_attr(not(feature = "compact"), inline(always))] -pub unsafe fn write_exponent( +pub fn write_exponent( bytes: &mut [u8], cursor: &mut usize, exp: i32, exponent_character: u8, ) { - *cursor += unsafe { - index_unchecked_mut!(bytes[*cursor]) = exponent_character; - *cursor += 1; - let positive_exp: u32 = write_exponent_sign::(bytes, cursor, exp); - positive_exp.write_exponent::(&mut index_unchecked_mut!(bytes[*cursor..])) - }; + bytes[*cursor] = exponent_character; + *cursor += 1; + let positive_exp: u32 = write_exponent_sign::(bytes, cursor, exp); + *cursor += positive_exp.write_exponent::(&mut bytes[*cursor..]); } /// Detect the notation to use for the float formatter and call the appropriate diff --git a/lexical-write-float/tests/algorithm_tests.rs b/lexical-write-float/tests/algorithm_tests.rs index f65c3e6a..85930b1a 100644 --- a/lexical-write-float/tests/algorithm_tests.rs +++ b/lexical-write-float/tests/algorithm_tests.rs @@ -351,9 +351,12 @@ fn write_float_positive_exponent(mant: u64, exp: i32, options: &Options, expecte }; let digit_count = f64::digit_count(fp.mant); let sci_exp = fp.exp + digit_count as i32 - 1; - let count = unsafe { - algorithm::write_float_positive_exponent::(&mut buffer, fp, sci_exp, &options) - }; + let count = algorithm::write_float_positive_exponent::( + &mut buffer, + fp, + sci_exp, + &options, + ); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -437,9 +440,12 @@ fn write_float_negative_exponent(mant: u64, exp: i32, options: &Options, expecte }; let digit_count = f64::digit_count(fp.mant); let sci_exp = fp.exp + digit_count as i32 - 1; - let count = unsafe { - algorithm::write_float_negative_exponent::(&mut buffer, fp, sci_exp, &options) - }; + let count = algorithm::write_float_negative_exponent::( + &mut buffer, + fp, + sci_exp, + &options, + ); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } From b2da5f5d64bcde39ee25c3e423ed5345c20e9460 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 14:27:01 -0500 Subject: [PATCH 08/14] Remove further unsafety from the overall API. This is a relatively trivial change and causes a small performance hit, but that change should be able to be mitigated with some minor refactoring. --- lexical-write-float/src/algorithm.rs | 6 ------ lexical-write-float/src/api.rs | 8 ++------ lexical-write-float/tests/algorithm_tests.rs | 18 ++++++++---------- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/lexical-write-float/src/algorithm.rs b/lexical-write-float/src/algorithm.rs index fc8a11e7..5947d6ed 100644 --- a/lexical-write-float/src/algorithm.rs +++ b/lexical-write-float/src/algorithm.rs @@ -41,12 +41,6 @@ use crate::shared; use crate::table::*; /// Optimized float-to-string algorithm for decimal strings. -/// -/// # Safety -/// -/// Safe as long as the float isn't special (NaN or Infinity), and `bytes` -/// is large enough to hold the significant digits. -// TODO: Migrate to safe #[inline(always)] pub fn write_float( float: F, diff --git a/lexical-write-float/src/api.rs b/lexical-write-float/src/api.rs index d359cd9a..df87a41b 100644 --- a/lexical-write-float/src/api.rs +++ b/lexical-write-float/src/api.rs @@ -25,9 +25,7 @@ macro_rules! float_to_lexical { -> &mut [u8] { let len = self.write_float::<{ STANDARD }>(bytes, &DEFAULT_OPTIONS); - // SAFETY: safe since `check_buffer::(bytes.len(), &options)` passes. - // TODO: Make safe - unsafe { &mut index_unchecked_mut!(bytes[..len]) } + &mut bytes[..len] } } @@ -41,9 +39,7 @@ macro_rules! float_to_lexical { ) -> &'a mut [u8] { let len = self.write_float::<{ FORMAT }>(bytes, &options); - // SAFETY: safe since `check_buffer::(bytes.len(), &options)` passes. - // TODO: Make safe - unsafe { &mut index_unchecked_mut!(bytes[..len]) } + &mut bytes[..len] } } )*) diff --git a/lexical-write-float/tests/algorithm_tests.rs b/lexical-write-float/tests/algorithm_tests.rs index 85930b1a..7635f4fe 100644 --- a/lexical-write-float/tests/algorithm_tests.rs +++ b/lexical-write-float/tests/algorithm_tests.rs @@ -261,9 +261,7 @@ fn write_float_scientific(mant: u64, exp: i32, options: &Options, expected: &str }; let digit_count = f64::digit_count(fp.mant); let sci_exp = fp.exp + digit_count as i32 - 1; - let count = unsafe { - algorithm::write_float_scientific::(&mut buffer, fp, sci_exp, &options) - }; + let count = algorithm::write_float_scientific::(&mut buffer, fp, sci_exp, &options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -593,7 +591,7 @@ const F64_DATA: [f64; 33] = [ fn write_float(f: T, options: &Options, expected: &str) { let mut buffer = [b'\x00'; BUFFER_SIZE]; - let count = unsafe { algorithm::write_float::<_, FORMAT>(f, &mut buffer, options) }; + let count = algorithm::write_float::<_, FORMAT>(f, &mut buffer, options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -652,7 +650,7 @@ fn f32_roundtrip_test() { let mut buffer = [b'\x00'; BUFFER_SIZE]; let options = Options::builder().build().unwrap(); for &float in F32_DATA.iter() { - let count = unsafe { algorithm::write_float::<_, DECIMAL>(float, &mut buffer, &options) }; + let count = algorithm::write_float::<_, DECIMAL>(float, &mut buffer, &options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; let roundtrip = actual.parse::(); assert_eq!(roundtrip, Ok(float)); @@ -729,7 +727,7 @@ fn f64_roundtrip_test() { let mut buffer = [b'\x00'; BUFFER_SIZE]; let options = Options::builder().build().unwrap(); for &float in F64_DATA.iter() { - let count = unsafe { algorithm::write_float::<_, DECIMAL>(float, &mut buffer, &options) }; + let count = algorithm::write_float::<_, DECIMAL>(float, &mut buffer, &options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; let roundtrip = actual.parse::(); assert_eq!(roundtrip, Ok(float)); @@ -766,7 +764,7 @@ default_quickcheck! { if f.is_special() { true } else { - let count = unsafe { algorithm::write_float::<_, DECIMAL>(f, &mut buffer, &options) }; + let count = algorithm::write_float::<_, DECIMAL>(f, &mut buffer, &options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; let roundtrip = actual.parse::(); roundtrip == Ok(f) @@ -780,7 +778,7 @@ default_quickcheck! { if f.is_special() { true } else { - let count = unsafe { algorithm::write_float::<_, DECIMAL>(f, &mut buffer, &options) }; + let count = algorithm::write_float::<_, DECIMAL>(f, &mut buffer, &options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; let roundtrip = actual.parse::(); roundtrip == Ok(f) @@ -797,7 +795,7 @@ proptest! { let options = Options::builder().build().unwrap(); let f = f.abs(); if !f.is_special() { - let count = unsafe { algorithm::write_float::<_, DECIMAL>(f, &mut buffer, &options) }; + let count = algorithm::write_float::<_, DECIMAL>(f, &mut buffer, &options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; let roundtrip = actual.parse::(); prop_assert_eq!(roundtrip, Ok(f)) @@ -810,7 +808,7 @@ proptest! { let options = Options::builder().build().unwrap(); let f = f.abs(); if !f.is_special() { - let count = unsafe { algorithm::write_float::<_, DECIMAL>(f, &mut buffer, &options) }; + let count = algorithm::write_float::<_, DECIMAL>(f, &mut buffer, &options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; let roundtrip = actual.parse::(); prop_assert_eq!(roundtrip, Ok(f)) From a2434f04d6788dcc003ff06c2f9382b5ab8f0fb3 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 15:09:22 -0500 Subject: [PATCH 09/14] Minor refactoring that mostly improves performance. One specific benchmark however has significant regressions, the random:big_ints/write_f32_lexical benchmark, while the f64 version does not. The benchmark is still considerably faster than ryu or fmt, however. --- lexical-write-float/src/algorithm.rs | 4 + lexical-write-float/src/api.rs | 6 +- lexical-write-float/src/binary.rs | 119 ++++++++------------------- lexical-write-float/src/write.rs | 42 +++++----- 4 files changed, 64 insertions(+), 107 deletions(-) diff --git a/lexical-write-float/src/algorithm.rs b/lexical-write-float/src/algorithm.rs index 5947d6ed..602c1694 100644 --- a/lexical-write-float/src/algorithm.rs +++ b/lexical-write-float/src/algorithm.rs @@ -79,6 +79,7 @@ pub fn write_float( } /// Write float to string in scientific notation. +#[inline] pub fn write_float_scientific( bytes: &mut [u8], fp: ExtendedFloat80, @@ -133,6 +134,7 @@ pub fn write_float_scientific( /// Write negative float to string without scientific notation. /// /// Has a negative exponent (shift right) and no scientific notation. +#[inline] pub fn write_float_negative_exponent( bytes: &mut [u8], fp: ExtendedFloat80, @@ -209,6 +211,7 @@ pub fn write_float_negative_exponent( /// Write positive float to string without scientific notation. /// /// Has a positive exponent (shift left) and no scientific notation. +#[inline] pub fn write_float_positive_exponent( bytes: &mut [u8], fp: ExtendedFloat80, @@ -380,6 +383,7 @@ pub fn compute_round(float: F) -> ExtendedFloat80 { /// Compute the interval I = [m−w,m+w] if even, otherwise, (m−w,m+w). /// This is the simple case for a finite number where only the hidden bit is /// set. +#[inline] pub fn compute_nearest_shorter(float: F) -> ExtendedFloat80 { // Compute k and beta. let exponent = float.exponent(); diff --git a/lexical-write-float/src/api.rs b/lexical-write-float/src/api.rs index df87a41b..72bc3075 100644 --- a/lexical-write-float/src/api.rs +++ b/lexical-write-float/src/api.rs @@ -24,8 +24,7 @@ macro_rules! float_to_lexical { fn to_lexical(self, bytes: &mut [u8]) -> &mut [u8] { - let len = self.write_float::<{ STANDARD }>(bytes, &DEFAULT_OPTIONS); - &mut bytes[..len] + self.write_float::<{ STANDARD }>(bytes, &DEFAULT_OPTIONS) } } @@ -38,8 +37,7 @@ macro_rules! float_to_lexical { options: &Self::Options, ) -> &'a mut [u8] { - let len = self.write_float::<{ FORMAT }>(bytes, &options); - &mut bytes[..len] + self.write_float::<{ FORMAT }>(bytes, &options) } } )*) diff --git a/lexical-write-float/src/binary.rs b/lexical-write-float/src/binary.rs index 61e7035c..23a57f1d 100644 --- a/lexical-write-float/src/binary.rs +++ b/lexical-write-float/src/binary.rs @@ -32,8 +32,8 @@ use crate::shared; /// /// Panics if exponent notation is used, and the exponent base and /// mantissa radix are not the same in `FORMAT`. -// TODO: This needs to be safe -pub unsafe fn write_float( +#[inline] +pub fn write_float( float: F, bytes: &mut [u8], options: &Options, @@ -102,18 +102,13 @@ where /// Write float to string in scientific notation. /// -/// # Safety -/// -/// Safe as long as `bytes` is large enough to hold the number of digits -/// and the scientific notation's exponent digits. -/// /// # Preconditions /// /// The mantissa must be truncated and rounded, prior to calling this, /// based on the number of maximum digits. In addition, `exponent_base` /// and `mantissa_radix` in `FORMAT` must be identical. #[cfg_attr(not(feature = "compact"), inline(always))] -pub unsafe fn write_float_scientific( +pub fn write_float_scientific( bytes: &mut [u8], mantissa: M, exp: i32, @@ -140,14 +135,11 @@ where let shl = calculate_shl(exp, bits_per_digit); let value = mantissa << shl; - // SAFETY: safe since the buffer must be larger than `M::FORMATTED_SIZE`. - let digit_count = unsafe { - let count = value.write_mantissa::(&mut index_unchecked_mut!(bytes[1..])); - index_unchecked_mut!(bytes[0] = bytes[1]); - index_unchecked_mut!(bytes[1]) = decimal_point; - let zeros = rtrim_char_count(&index_unchecked!(bytes[2..count + 1]), b'0'); - count - zeros - }; + let count = value.write_mantissa::(&mut bytes[1..]); + bytes[0] = bytes[1]; + bytes[1] = decimal_point; + let zeros = rtrim_char_count(&bytes[2..count + 1], b'0'); + let digit_count = count - zeros; // Extra 1 since we have the decimal point. let mut cursor = digit_count + 1; @@ -155,46 +147,34 @@ where let exact_count = shared::min_exact_digits(digit_count, options); // Write any trailing digits to the output. - // SAFETY: bytes since cannot be empty. if !format.no_exponent_without_fraction() && cursor == 2 && options.trim_floats() { // Need to trim floats from trailing zeros, and we have only a decimal. cursor -= 1; } else if exact_count < 2 { // Need to have at least 1 digit, the trailing `.0`. - unsafe { index_unchecked_mut!(bytes[cursor]) = b'0' }; + bytes[cursor] = b'0'; cursor += 1; } else if exact_count > digit_count { // NOTE: Neither `exact_count >= digit_count >= 2`. // We need to write `exact_count - (cursor - 1)` digits, since // cursor includes the decimal point. let digits_end = exact_count + 1; - // SAFETY: this is safe as long as the buffer was large enough - // to hold `min_significant_digits + 1`. - unsafe { - slice_fill_unchecked!(index_unchecked_mut!(bytes[cursor..digits_end]), b'0'); - } + bytes[cursor..digits_end].fill(b'0'); cursor = digits_end; } // Now, write our scientific notation. let scaled_sci_exp = scale_sci_exp(sci_exp, bits_per_digit); - // SAFETY: safe if the buffer is large enough to hold the maximum written float. - unsafe { - shared::write_exponent::(bytes, &mut cursor, scaled_sci_exp, options.exponent()) - }; + shared::write_exponent::(bytes, &mut cursor, scaled_sci_exp, options.exponent()); cursor } /// Write negative float to string without scientific notation. -/// Has a negative exponent (shift right) and no scientific notation. /// -/// # Safety -/// -/// Safe as long as `bytes` is large enough to hold the number of -/// significant digits and the leading zeros. +/// Has a negative exponent (shift right) and no scientific notation. #[cfg_attr(not(feature = "compact"), inline(always))] -pub unsafe fn write_float_negative_exponent( +pub fn write_float_negative_exponent( bytes: &mut [u8], mantissa: M, exp: i32, @@ -229,13 +209,9 @@ where let zero_digits = fast_ceildiv(zero_bits, bits_per_digit) as usize; // Write our 0 digits. - // SAFETY: safe if `bytes.len() > BUFFER_SIZE - 2`. - unsafe { - index_unchecked_mut!(bytes[0]) = b'0'; - index_unchecked_mut!(bytes[1]) = decimal_point; - let digits = &mut index_unchecked_mut!(bytes[2..zero_digits + 1]); - slice_fill_unchecked!(digits, b'0'); - } + bytes[0] = b'0'; + bytes[1] = decimal_point; + bytes[2..zero_digits + 1].fill(b'0'); let mut cursor = zero_digits + 1; // Generate our digits after the shift. Store the number of written @@ -245,11 +221,9 @@ where // SAFETY: both are safe, if the buffer is large enough to hold the significant // digits. - let digit_count = unsafe { - let count = value.write_mantissa::(&mut index_unchecked_mut!(bytes[cursor..])); - let zeros = rtrim_char_count(&index_unchecked!(bytes[cursor..cursor + count]), b'0'); - count - zeros - }; + let count = value.write_mantissa::(&mut bytes[cursor..]); + let zeros = rtrim_char_count(&bytes[cursor..cursor + count], b'0'); + let digit_count = count - zeros; cursor += digit_count; // Determine if we need to add more trailing zeros. @@ -260,11 +234,7 @@ where // the significant digits, and the result is < 1. if exact_count > digit_count { let zeros = exact_count - digit_count; - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let digits = &mut index_unchecked_mut!(bytes[cursor..cursor + zeros]); - slice_fill_unchecked!(digits, b'0'); - } + bytes[cursor..cursor + zeros].fill(b'0'); cursor += zeros; } @@ -272,14 +242,10 @@ where } /// Write positive float to string without scientific notation. -/// Has a positive exponent (shift left) and no scientific notation. -/// -/// # Safety /// -/// Safe as long as `bytes` is large enough to hold the number of -/// significant digits and the (optional) trailing zeros. +/// Has a positive exponent (shift left) and no scientific notation. #[cfg_attr(not(feature = "compact"), inline(always))] -pub unsafe fn write_float_positive_exponent( +pub fn write_float_positive_exponent( bytes: &mut [u8], mantissa: M, exp: i32, @@ -301,12 +267,9 @@ where let shl = calculate_shl(exp, bits_per_digit); let value = mantissa << shl; - // SAFETY: safe since the buffer must be larger than `M::FORMATTED_SIZE`. - let mut digit_count = unsafe { - let count = value.write_mantissa::(bytes); - let zeros = rtrim_char_count(&index_unchecked!(bytes[..count]), b'0'); - count - zeros - }; + let count = value.write_mantissa::(bytes); + let zeros = rtrim_char_count(&bytes[..count], b'0'); + let mut digit_count = count - zeros; // Write the significant digits. // Calculate the number of digits we can write left of the decimal point. @@ -322,18 +285,13 @@ where if leading_digits >= digit_count { // We have more leading digits than digits we wrote: can write // any additional digits, and then just write the remaining zeros. - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let digits = &mut index_unchecked_mut!(bytes[digit_count..leading_digits]); - slice_fill_unchecked!(digits, b'0'); - } + bytes[digit_count..leading_digits].fill(b'0'); cursor = leading_digits; // Only write decimal point if we're not trimming floats. if !options.trim_floats() { - // SAFETY: safe if `cursor +2 <= bytes.len()`. - unsafe { index_unchecked_mut!(bytes[cursor]) = decimal_point }; + bytes[cursor] = decimal_point; cursor += 1; - unsafe { index_unchecked_mut!(bytes[cursor]) = b'0' }; + bytes[cursor] = b'0'; cursor += 1; digit_count += 1; } else { @@ -342,17 +300,14 @@ where } else { // We have less leading digits than digits we wrote: find the // decimal point index, shift all digits right by 1, then write it. - // SAFETY: safe if the buffer is large enough to hold the significant digits. let shifted = digit_count - leading_digits; - unsafe { - let buf = &mut index_unchecked_mut!(bytes[leading_digits..digit_count + 1]); - safe_assert!(buf.len() > shifted); - for i in (0..shifted).rev() { - index_unchecked_mut!(buf[i + 1] = buf[i]); - } - index_unchecked_mut!(bytes[leading_digits]) = decimal_point; - cursor = digit_count + 1; + let buf = &mut bytes[leading_digits..digit_count + 1]; + safe_assert!(buf.len() > shifted); + for i in (0..shifted).rev() { + buf[i + 1] = buf[i]; } + bytes[leading_digits] = decimal_point; + cursor = digit_count + 1; } // Determine if we need to add more trailing zeros after a decimal point. @@ -362,11 +317,7 @@ where if !trimmed && exact_count > digit_count { // Check if we need to write more trailing digits. let zeros = exact_count - digit_count; - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let digits = &mut index_unchecked_mut!(bytes[cursor..cursor + zeros]); - slice_fill_unchecked!(digits, b'0'); - } + [cursor..cursor + zeros].fill(b'0'); cursor += zeros; } diff --git a/lexical-write-float/src/write.rs b/lexical-write-float/src/write.rs index e18599ff..f832dc60 100644 --- a/lexical-write-float/src/write.rs +++ b/lexical-write-float/src/write.rs @@ -26,6 +26,7 @@ use crate::options::Options; use crate::radix; /// Write an special string to the buffer. +#[inline(always)] fn write_special(bytes: &mut [u8], special: Option<&[u8]>, error: &'static str) -> usize { // The NaN string must be <= 50 characters, so this should never panic. if let Some(special_str) = special { @@ -38,25 +39,27 @@ fn write_special(bytes: &mut [u8], special: Option<&[u8]>, error: &'static str) } /// Write an NaN string to the buffer. -#[inline(always)] -fn write_nan(bytes: &mut [u8], options: &Options, count: usize) -> usize { - count +#[inline] +fn write_nan<'a>(bytes: &'a mut [u8], options: &Options, count: usize) -> &'a mut [u8] { + let count = count + write_special( bytes, options.nan_string(), "NaN explicitly disabled but asked to write NaN as string.", - ) + ); + &mut bytes[..count] } /// Write an Inf string to the buffer. -#[inline(always)] -fn write_inf(bytes: &mut [u8], options: &Options, count: usize) -> usize { - count +#[inline] +fn write_inf<'a>(bytes: &'a mut [u8], options: &Options, count: usize) -> &'a mut [u8] { + let count = count + write_special( bytes, options.inf_string(), "Inf explicitly disabled but asked to write Inf as string.", - ) + ); + &mut bytes[..count] } /// Check if a buffer is sufficiently large. @@ -75,6 +78,7 @@ pub trait WriteFloat: RawFloat + FormattedSize { /// /// # Safety /// + /// # TODO: This is now safe effectively /// Safe as long as the buffer can hold [`FORMATTED_SIZE`] elements /// (or [`FORMATTED_SIZE_DECIMAL`] for decimal). If using custom digit /// precision control (such as specifying a minimum number of significant @@ -95,7 +99,7 @@ pub trait WriteFloat: RawFloat + FormattedSize { /// [`FORMATTED_SIZE`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE /// [`FORMATTED_SIZE_DECIMAL`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE_DECIMAL #[cfg_attr(not(feature = "compact"), inline(always))] - fn write_float(self, bytes: &mut [u8], options: &Options) -> usize + fn write_float<'a, const FORMAT: u128>(self, bytes: &'a mut [u8], options: &Options) -> &'a mut [u8] where Self::Unsigned: FormattedSize + WriteInteger, { @@ -117,7 +121,7 @@ pub trait WriteFloat: RawFloat + FormattedSize { } } - let (float, count, bytes) = if self.needs_negative_sign() { + let (float, mut count, bytes) = if self.needs_negative_sign() { bytes[0] = b'-'; (-self, 1, &mut bytes[1..]) } else if cfg!(feature = "format") && format.required_mantissa_sign() { @@ -134,14 +138,14 @@ pub trait WriteFloat: RawFloat + FormattedSize { // SAFETY: safe if the buffer can hold the significant digits let radix = format.radix(); let exponent_base = format.exponent_base(); - count - + if radix == 10 { + count += if radix == 10 { unsafe { write_float_decimal::<_, FORMAT>(float, bytes, options) } } else if radix != exponent_base { unsafe { hex::write_float::<_, FORMAT>(float, bytes, options) } } else { unsafe { binary::write_float::<_, FORMAT>(float, bytes, options) } - } + }; + &mut bytes[..count] } #[cfg(feature = "radix")] @@ -149,8 +153,7 @@ pub trait WriteFloat: RawFloat + FormattedSize { // SAFETY: safe if the buffer can hold the significant digits let radix = format.radix(); let exponent_base = format.exponent_base(); - count - + if radix == 10 { + count += if radix == 10 { unsafe { write_float_decimal::<_, FORMAT>(float, bytes, options) } } else if radix != exponent_base { unsafe { hex::write_float::<_, FORMAT>(float, bytes, options) } @@ -158,13 +161,14 @@ pub trait WriteFloat: RawFloat + FormattedSize { unsafe { binary::write_float::<_, FORMAT>(float, bytes, options) } } else { unsafe { radix::write_float::<_, FORMAT>(float, bytes, options) } - } + }; + &mut bytes[..count] } #[cfg(not(feature = "power-of-two"))] { - // SAFETY: safe if the buffer can hold the significant digits - count + unsafe { write_float_decimal::<_, FORMAT>(float, bytes, options) } + count += write_float_decimal::<_, FORMAT>(float, bytes, options); + &mut bytes[..count] } } else if self.is_nan() { write_nan(bytes, options, count) @@ -187,7 +191,7 @@ macro_rules! write_float_as_f32 { ($($t:ty)*) => ($( impl WriteFloat for $t { #[inline(always)] - fn write_float(self, bytes: &mut [u8], options: &Options) -> usize + fn write_float(self, bytes: &mut [u8], options: &Options) -> &mut [u8] { // SAFETY: safe if `bytes` is large enough to hold the written bytes. self.as_f32().write_float::(bytes, options) From a7d958388d863aaf0db8e25ebb2a3347fa36207c Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 16:29:33 -0500 Subject: [PATCH 10/14] Remove all unsafety in our float writers. This does have some performance penalties but performance is still excellent, and so a few patches will need to be made so our compiler can elide more checks. --- .gitignore | 1 + lexical-util/src/algorithm.rs | 2 +- lexical-write-float/src/algorithm.rs | 33 ++-- lexical-write-float/src/api.rs | 6 +- lexical-write-float/src/binary.rs | 3 +- lexical-write-float/src/compact.rs | 170 ++++++------------ lexical-write-float/src/hex.rs | 31 ++-- lexical-write-float/src/index.rs | 70 +------- lexical-write-float/src/radix.rs | 175 ++++++------------- lexical-write-float/src/shared.rs | 3 + lexical-write-float/src/write.rs | 49 +++--- lexical-write-float/tests/algorithm_tests.rs | 3 +- lexical-write-float/tests/binary_tests.rs | 55 +++--- lexical-write-float/tests/compact_tests.rs | 25 ++- lexical-write-float/tests/hex_tests.rs | 7 +- lexical-write-float/tests/radix_tests.rs | 88 +++++----- 16 files changed, 248 insertions(+), 473 deletions(-) diff --git a/.gitignore b/.gitignore index 472c3797..49f192f2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ Cargo.lock /build *.pyc TODO.md +*.diff # Perftools perf.data diff --git a/lexical-util/src/algorithm.rs b/lexical-util/src/algorithm.rs index a819347d..a9713b8a 100644 --- a/lexical-util/src/algorithm.rs +++ b/lexical-util/src/algorithm.rs @@ -4,7 +4,7 @@ use crate::num::Integer; /// Copy bytes from source to destination. /// -/// This is only used in our compactt and radix integer formatted, so +/// This is only used in our compact and radix integer formatted, so /// performance isn't the highest consideration here. #[inline(always)] #[cfg(feature = "write")] diff --git a/lexical-write-float/src/algorithm.rs b/lexical-write-float/src/algorithm.rs index 602c1694..d96e4a68 100644 --- a/lexical-write-float/src/algorithm.rs +++ b/lexical-write-float/src/algorithm.rs @@ -58,24 +58,18 @@ pub fn write_float( // later into the algorithms, since we can determine the right path // and write the significant digits without using an intermediate buffer // in most cases. - - // SAFETY: All the underlying methods are safe, this just needs to have other - // API compatibility and this will remove the unsafe block. - // TODO: This should be safe now - unsafe { - write_float!( - float, - FORMAT, - sci_exp, - options, - write_float_scientific, - write_float_positive_exponent, - write_float_negative_exponent, - generic => F, - bytes => bytes, - args => fp, sci_exp, options, - ) - } + write_float!( + float, + FORMAT, + sci_exp, + options, + write_float_scientific, + write_float_positive_exponent, + write_float_negative_exponent, + generic => F, + bytes => bytes, + args => fp, sci_exp, options, + ) } /// Write float to string in scientific notation. @@ -155,8 +149,7 @@ pub fn write_float_negative_exponent( // to round carry over. This is rare, but it could happen, and would // require a shift after. The good news is: if we have a shift, we // only need to move 1 digit. - let digits = &mut bytes[..cursor]; - digits.fill(b'0'); + bytes[..cursor].fill(b'0'); // Write out our significant digits. // SAFETY: safe, if we have enough bytes to write the significant digits. diff --git a/lexical-write-float/src/api.rs b/lexical-write-float/src/api.rs index 72bc3075..76c3356a 100644 --- a/lexical-write-float/src/api.rs +++ b/lexical-write-float/src/api.rs @@ -24,7 +24,8 @@ macro_rules! float_to_lexical { fn to_lexical(self, bytes: &mut [u8]) -> &mut [u8] { - self.write_float::<{ STANDARD }>(bytes, &DEFAULT_OPTIONS) + let count = self.write_float::<{ STANDARD }>(bytes, &DEFAULT_OPTIONS); + &mut bytes[..count] } } @@ -37,7 +38,8 @@ macro_rules! float_to_lexical { options: &Self::Options, ) -> &'a mut [u8] { - self.write_float::<{ FORMAT }>(bytes, &options) + let count = self.write_float::<{ FORMAT }>(bytes, &options); + &mut bytes[..count] } } )*) diff --git a/lexical-write-float/src/binary.rs b/lexical-write-float/src/binary.rs index 23a57f1d..2dc30de7 100644 --- a/lexical-write-float/src/binary.rs +++ b/lexical-write-float/src/binary.rs @@ -302,7 +302,6 @@ where // decimal point index, shift all digits right by 1, then write it. let shifted = digit_count - leading_digits; let buf = &mut bytes[leading_digits..digit_count + 1]; - safe_assert!(buf.len() > shifted); for i in (0..shifted).rev() { buf[i + 1] = buf[i]; } @@ -317,7 +316,7 @@ where if !trimmed && exact_count > digit_count { // Check if we need to write more trailing digits. let zeros = exact_count - digit_count; - [cursor..cursor + zeros].fill(b'0'); + bytes[cursor..cursor + zeros].fill(b'0'); cursor += zeros; } diff --git a/lexical-write-float/src/compact.rs b/lexical-write-float/src/compact.rs index 9fbda0a6..12c93ffc 100644 --- a/lexical-write-float/src/compact.rs +++ b/lexical-write-float/src/compact.rs @@ -22,7 +22,7 @@ #![cfg(feature = "compact")] #![doc(hidden)] -use lexical_util::algorithm::rtrim_char_count; +use lexical_util::algorithm::{copy_to_dst, rtrim_char_count}; #[cfg(feature = "f16")] use lexical_util::bf16::bf16; use lexical_util::digit::digit_to_char_const; @@ -45,12 +45,6 @@ use crate::table::GRISU_POWERS_OF_TEN; /// This assumes the float is: /// 1). Non-special (NaN or Infinite). /// 2). Non-negative. -/// -/// # Safety -/// -/// Safe as long as the float isn't special (NaN or Infinity), and `bytes` -/// is large enough to hold the significant digits. -// TODO: This needs to be safe pub fn write_float( float: F, bytes: &mut [u8], @@ -71,8 +65,6 @@ pub fn write_float( digits[0] = b'0'; (1, 0, false) } else { - // SAFETY: safe since `digits.len()` is large enough to always hold - // the generated digits, which is always <= 18. let (start, k) = grisu(float, &mut digits); let (end, carried) = shared::truncate_and_round_decimal(&mut digits, start, options); (end, k + start as i32 - end as i32, carried) @@ -93,13 +85,8 @@ pub fn write_float( } /// Write float to string in scientific notation. -/// -/// # Safety -/// -/// Safe as long as `bytes` is large enough to hold the number of digits -/// and the scientific notation's exponent digits. #[allow(clippy::comparison_chain)] -pub unsafe fn write_float_scientific( +pub fn write_float_scientific( bytes: &mut [u8], digits: &mut [u8], digit_count: usize, @@ -121,51 +108,43 @@ pub unsafe fn write_float_scientific( let mut cursor: usize; bytes[0] = digits[0]; bytes[1] = decimal_point; - unsafe { - // SAFETY: safe if bytes is large enough to store all significant digits. - if !format.no_exponent_without_fraction() && digit_count == 1 && options.trim_floats() { - // No more digits and need to trim floats. - cursor = 1; - } else if digit_count < exact_count { - // Write our significant digits. - let src = digits[1..digit_count].as_ptr(); - let dst = &mut bytes[2..digit_count + 1]; - copy_nonoverlapping_unchecked!(dst, src, digit_count - 1); - cursor = digit_count + 1; - - // Adjust the number of digits written, by appending zeros. - let zeros = exact_count - digit_count; - slice_fill_unchecked!(index_unchecked_mut!(bytes[cursor..cursor + zeros]), b'0'); - cursor += zeros; - } else if digit_count == 1 { - // Write a single, trailing 0. - bytes[2] = b'0'; - cursor = 3; - } else { - // Write our significant digits. - let src = digits[1..digit_count].as_ptr(); - let dst = &mut bytes[2..digit_count + 1]; - copy_nonoverlapping_unchecked!(dst, src, digit_count - 1); - cursor = digit_count + 1; - } + if !format.no_exponent_without_fraction() && digit_count == 1 && options.trim_floats() { + // No more digits and need to trim floats. + cursor = 1; + } else if digit_count < exact_count { + // Write our significant digits. + let src = &digits[1..digit_count]; + let dst = &mut bytes[2..digit_count + 1]; + copy_to_dst(dst, src); + cursor = digit_count + 1; + + // Adjust the number of digits written, by appending zeros. + let zeros = exact_count - digit_count; + bytes[cursor..cursor + zeros].fill(b'0'); + cursor += zeros; + } else if digit_count == 1 { + // Write a single, trailing 0. + bytes[2] = b'0'; + cursor = 3; + } else { + // Write our significant digits. + let src = &digits[1..digit_count]; + let dst = &mut bytes[2..digit_count + 1]; + copy_to_dst(dst, src); + cursor = digit_count + 1; } // Now, write our scientific notation. - // SAFETY: safe since bytes must be large enough to store the largest float. - unsafe { shared::write_exponent::(bytes, &mut cursor, sci_exp, options.exponent()) }; + shared::write_exponent::(bytes, &mut cursor, sci_exp, options.exponent()); cursor } /// Write negative float to string without scientific notation. -/// Has a negative exponent (shift right) and no scientific notation. -/// -/// # Safety /// -/// Safe as long as `bytes` is large enough to hold the number of -/// significant digits and the leading zeros. +/// Has a negative exponent (shift right) and no scientific notation. #[allow(clippy::comparison_chain)] -pub unsafe fn write_float_negative_exponent( +pub fn write_float_negative_exponent( bytes: &mut [u8], digits: &mut [u8], digit_count: usize, @@ -182,24 +161,16 @@ pub unsafe fn write_float_negative_exponent( // Write our 0 digits. Note that we cannot have carried, since we previously // adjusted for carrying and rounding before. - // SAFETY: safe if `bytes.len() < BUFFER_SIZE - 2`. bytes[0] = b'0'; bytes[1] = decimal_point; - unsafe { - let digits = &mut index_unchecked_mut!(bytes[2..sci_exp + 1]); - slice_fill_unchecked!(digits, b'0'); - } + bytes[2..sci_exp + 1].fill(b'0'); let mut cursor = sci_exp + 1; // Write out significant digits. - // SAFETY: safe if the buffer is large enough to hold all the significant - // digits. - unsafe { - let src = digits.as_ptr(); - let dst = &mut bytes[cursor..cursor + digit_count]; - copy_nonoverlapping_unchecked!(dst, src, digit_count); - cursor += digit_count; - } + let src = &digits[..digit_count]; + let dst = &mut bytes[cursor..cursor + digit_count]; + copy_to_dst(dst, src); + cursor += digit_count; // Determine the exact number of digits to write. let exact_count = shared::min_exact_digits(digit_count, options); @@ -207,10 +178,7 @@ pub unsafe fn write_float_negative_exponent( // Adjust the number of digits written, based on the exact number of digits. if digit_count < exact_count { let zeros = exact_count - digit_count; - // SAFETY: safe if bytes is large enough to hold the significant digits. - unsafe { - slice_fill_unchecked!(bytes[cursor..cursor + zeros], b'0'); - } + bytes[cursor..cursor + zeros].fill(b'0'); cursor += zeros; } @@ -218,13 +186,9 @@ pub unsafe fn write_float_negative_exponent( } /// Write positive float to string without scientific notation. -/// Has a positive exponent (shift left) and no scientific notation. -/// -/// # Safety /// -/// Safe as long as `bytes` is large enough to hold the number of -/// significant digits and the (optional) trailing zeros. -pub unsafe fn write_float_positive_exponent( +/// Has a positive exponent (shift left) and no scientific notation. +pub fn write_float_positive_exponent( bytes: &mut [u8], digits: &mut [u8], mut digit_count: usize, @@ -245,19 +209,14 @@ pub unsafe fn write_float_positive_exponent( if leading_digits >= digit_count { // We have more leading digits than digits we wrote: can write // any additional digits, and then just write the remaining ones. - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let src = digits.as_ptr(); - let dst = &mut bytes[..digit_count]; - copy_nonoverlapping_unchecked!(dst, src, digit_count); - let digits = &mut bytes[digit_count..leading_digits]; - slice_fill_unchecked!(digits, b'0'); - } + let src = &digits[..digit_count]; + let dst = &mut bytes[..digit_count]; + copy_to_dst(dst, src); + bytes[digit_count..leading_digits].fill(b'0'); cursor = leading_digits; digit_count = leading_digits; // Only write decimal point if we're not trimming floats. if !options.trim_floats() { - // SAFETY: safe if `cursor + 2 <= bytes.len()`. bytes[cursor] = decimal_point; cursor += 1; bytes[cursor] = b'0'; @@ -270,21 +229,15 @@ pub unsafe fn write_float_positive_exponent( // We have less leading digits than digits we wrote. // Write the digits before the decimal point. - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let src = digits.as_ptr(); - let dst = &mut bytes[..leading_digits]; - copy_nonoverlapping_unchecked!(dst, src, leading_digits); - bytes[leading_digits] = decimal_point; - } + let src = &digits[..leading_digits]; + let dst = &mut bytes[..leading_digits]; + copy_to_dst(dst, src); + bytes[leading_digits] = decimal_point; // Write the digits after the decimal point. - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let src = digits[leading_digits..digit_count].as_ptr(); - let dst = &mut bytes[leading_digits + 1..digit_count + 1]; - copy_nonoverlapping_unchecked!(dst, src, digit_count - leading_digits); - } + let src = &digits[leading_digits..digit_count]; + let dst = &mut bytes[leading_digits + 1..digit_count + 1]; + copy_to_dst(dst, src); cursor = digit_count + 1; } @@ -296,11 +249,7 @@ pub unsafe fn write_float_positive_exponent( if !trimmed && exact_count > digit_count { // Check if we need to write more trailing digits. let zeros = exact_count - digit_count; - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let digits = &mut bytes[cursor..cursor + zeros]; - slice_fill_unchecked!(digits, b'0'); - } + bytes[cursor..cursor + zeros].fill(b'0'); cursor += zeros; } @@ -331,10 +280,6 @@ fn round_digit( } /// Generate digits from upper and lower range on rounding of number. -/// -/// # Safety -/// -/// Safe as long as the extended float does not represent a 0. pub fn generate_digits( fp: &ExtendedFloat80, upper: &ExtendedFloat80, @@ -379,7 +324,6 @@ pub fn generate_digits( } // 10 - // Guaranteed to be safe, TENS has 20 elements. let mut ten = 10; loop { part2 *= 10; @@ -388,7 +332,6 @@ pub fn generate_digits( let digit = part2 >> -one.exp; if digit != 0 || idx != 0 { - // SAFETY: safe, digits.len() == 32. // In practice, this can't exceed 18, however, we have extra digits // **just** in case, since we write technically up to 29 here // before we underflow TENS. @@ -411,10 +354,6 @@ pub fn generate_digits( /// # Preconditions /// /// `float` must not be 0, because this fails with the Grisu algorithm. -/// -/// # Safety -/// -/// Safe as long as float is not 0. pub fn grisu(float: F, digits: &mut [u8]) -> (usize, i32) { debug_assert!(float != F::ZERO); @@ -422,7 +361,6 @@ pub fn grisu(float: F, digits: &mut [u8]) -> (usize, i32) { let (lower, upper) = normalized_boundaries::(&w); normalize(&mut w); - // SAFETY: safe since upper.exp must be in the valid binary range. let (cp, ki) = cached_grisu_power(upper.exp); let w = mul(&w, &cp); @@ -434,7 +372,6 @@ pub fn grisu(float: F, digits: &mut [u8]) -> (usize, i32) { let k = -ki; - // SAFETY: safe since generate_digits can only generate 18 digits generate_digits(&w, &upper, &lower, digits, k) } @@ -540,10 +477,6 @@ pub fn mul(x: &ExtendedFloat80, y: &ExtendedFloat80) -> ExtendedFloat80 { // CACHED POWERS /// Find cached power of 10 from the exponent. -/// -/// # Safety -/// -/// Safe as long as exp is within the range [-1140, 1089] fn cached_grisu_power(exp: i32) -> (ExtendedFloat80, i32) { // Make the bounds 64 + 1 larger, since those will still work, // but the exp can be biased within that range. @@ -562,7 +495,6 @@ fn cached_grisu_power(exp: i32) -> (ExtendedFloat80, i32) { let mut idx = ((approx - FIRSTPOWER) / STEPPOWERS) as usize; loop { - // SAFETY: safe as long as the original exponent was in range. let mant = f64::grisu_power(idx); let decexp = fast_decimal_power(idx); let binexp = fast_binary_power(decexp); @@ -605,10 +537,6 @@ fn fast_decimal_power(index: usize) -> i32 { /// Trait with specialized methods for the Grisu algorithm. pub trait GrisuFloat: Float { /// Get the pre-computed Grisu power from the index. - /// - /// # Safety - /// - /// Safe as long as `index < GRISU_POWERS_OF_TEN.len()`. #[inline(always)] fn grisu_power(index: usize) -> u64 { debug_assert!(index <= GRISU_POWERS_OF_TEN.len()); diff --git a/lexical-write-float/src/hex.rs b/lexical-write-float/src/hex.rs index 79bd98f6..8966fbfe 100644 --- a/lexical-write-float/src/hex.rs +++ b/lexical-write-float/src/hex.rs @@ -50,8 +50,7 @@ use crate::shared; /// Panics if the radix for the significant digits is not 16, if /// the exponent base is not 2, or if the radix for the exponent /// digits is not 10. -// TODO: Here -pub unsafe fn write_float( +pub fn write_float( float: F, bytes: &mut [u8], options: &Options, @@ -108,6 +107,7 @@ where sci_exp = 0; } + // SAFETY: Safe, just other API methods need to be migrated in. write_float!( float, FORMAT, @@ -134,7 +134,7 @@ where /// The mantissa must be truncated and rounded, prior to calling this, /// based on the number of maximum digits. #[cfg_attr(not(feature = "compact"), inline(always))] -pub unsafe fn write_float_scientific( +pub fn write_float_scientific( bytes: &mut [u8], mantissa: M, exp: i32, @@ -163,14 +163,11 @@ where let shl = calculate_shl(exp, bits_per_digit); let value = mantissa << shl; - // SAFETY: safe since the buffer must be larger than `M::FORMATTED_SIZE`. - let digit_count = unsafe { - let count = value.write_mantissa::(&mut index_unchecked_mut!(bytes[1..])); - index_unchecked_mut!(bytes[0] = bytes[1]); - index_unchecked_mut!(bytes[1]) = decimal_point; - let zeros = rtrim_char_count(&index_unchecked!(bytes[2..count + 1]), b'0'); - count - zeros - }; + let count = value.write_mantissa::(&mut bytes[1..]); + bytes[0] = bytes[1]; + bytes[1] = decimal_point; + let zeros = rtrim_char_count(&bytes[2..count + 1], b'0'); + let digit_count = count - zeros; // Extra 1 since we have the decimal point. let mut cursor = digit_count + 1; @@ -184,27 +181,21 @@ where cursor -= 1; } else if exact_count < 2 { // Need to have at least 1 digit, the trailing `.0`. - unsafe { index_unchecked_mut!(bytes[cursor]) = b'0' }; + bytes[cursor] = b'0'; cursor += 1; } else if exact_count > digit_count { // NOTE: Neither `exact_count >= digit_count >= 2`. // We need to write `exact_count - (cursor - 1)` digits, since // cursor includes the decimal point. let digits_end = exact_count + 1; - // SAFETY: this is safe as long as the buffer was large enough - // to hold `min_significant_digits + 1`. - unsafe { - slice_fill_unchecked!(index_unchecked_mut!(bytes[cursor..digits_end]), b'0'); - } + bytes[cursor..digits_end].fill(b'0'); cursor = digits_end; } // Now, write our scientific notation. // SAFETY: safe if bytes is large enough to store all digits. let scaled_sci_exp = scale_sci_exp(sci_exp, bits_per_digit, bits_per_base); - unsafe { - shared::write_exponent::(bytes, &mut cursor, scaled_sci_exp, options.exponent()) - }; + shared::write_exponent::(bytes, &mut cursor, scaled_sci_exp, options.exponent()); cursor } diff --git a/lexical-write-float/src/index.rs b/lexical-write-float/src/index.rs index 0d6090ff..492157c6 100644 --- a/lexical-write-float/src/index.rs +++ b/lexical-write-float/src/index.rs @@ -4,17 +4,9 @@ //! and other tests and careful validation against a wide range //! of randomized input. Parsers are much trickier to validate. -#![cfg_attr(not(feature = "power-of-two"), allow(unused_macros))] +#![cfg_attr(feature = "compact", allow(unused_macros))] #![doc(hidden)] -/// Enable an assertion only when the `safe` feature is enabled. -macro_rules! safe_assert { - ($cond:expr $(,)?) => { - #[cfg(feature = "safe")] - assert!($cond); - }; -} - /// Index a buffer, without bounds checking. #[cfg(not(feature = "safe"))] macro_rules! index_unchecked { @@ -23,37 +15,6 @@ macro_rules! index_unchecked { }; } -/// Index a buffer and get a mutable reference, without bounds checking. -#[cfg(not(feature = "safe"))] -macro_rules! index_unchecked_mut { - ($x:ident[$i:expr]) => { - *$x.get_unchecked_mut($i) - }; - - ($x:ident[$i:expr] = $y:ident[$j:expr]) => { - *$x.get_unchecked_mut($i) = *$y.get_unchecked($j) - }; -} - -/// Fill a slice with a value, without bounds checking. -#[cfg(not(feature = "safe"))] -macro_rules! slice_fill_unchecked { - ($slc:expr, $value:expr) => { - // Get the length first to avoid stacked borrows, since we might - // have a complex expression for the slice calculation. - let len = $slc.len(); - core::ptr::write_bytes($slc.as_mut_ptr(), $value, len) - }; -} - -/// Copy to a slice without overlaps, without bounds checking. -#[cfg(not(feature = "safe"))] -macro_rules! copy_nonoverlapping_unchecked { - ($dst:expr, $src:expr, $srclen:expr) => { - core::ptr::copy_nonoverlapping($src, $dst.as_mut_ptr(), $srclen) - }; -} - /// Index a buffer, with bounds checking. #[cfg(feature = "safe")] macro_rules! index_unchecked { @@ -61,32 +22,3 @@ macro_rules! index_unchecked { $x[$i] }; } - -/// Index a buffer and get a mutable reference, with bounds checking. -#[cfg(feature = "safe")] -macro_rules! index_unchecked_mut { - ($x:ident[$i:expr]) => { - $x[$i] - }; - - ($x:ident[$i:expr] = $y:ident[$j:expr]) => { - $x[$i] = $y[$j] - }; -} - -/// Fill a slice with a value, with bounds checking. -#[cfg(feature = "safe")] -macro_rules! slice_fill_unchecked { - ($slc:expr, $value:expr) => { - $slc.fill($value) - }; -} - -/// Copy to a slice, with bounds checking. -#[cfg(feature = "safe")] -macro_rules! copy_nonoverlapping_unchecked { - ($dst:expr, $src:expr, $srclen:expr) => { - let slc = unsafe { core::slice::from_raw_parts($src, $srclen) }; - $dst.copy_from_slice(slc) - }; -} diff --git a/lexical-write-float/src/radix.rs b/lexical-write-float/src/radix.rs index d880c1a6..aa34eb94 100644 --- a/lexical-write-float/src/radix.rs +++ b/lexical-write-float/src/radix.rs @@ -12,7 +12,7 @@ #![cfg(feature = "radix")] #![doc(hidden)] -use lexical_util::algorithm::{ltrim_char_count, rtrim_char_count}; +use lexical_util::algorithm::{copy_to_dst, ltrim_char_count, rtrim_char_count}; use lexical_util::constants::{FormattedSize, BUFFER_SIZE}; use lexical_util::digit::{char_to_digit_const, digit_to_char_const}; use lexical_util::format::NumberFormat; @@ -31,17 +31,11 @@ use crate::shared; /// 1). Non-special (NaN or Infinite). /// 2). Non-negative. /// -/// # Safety -/// -/// Safe as long as `bytes` is large enough to hold the number of -/// significant digits, any (optional) leading or trailing zeros, -/// and the scientific exponent. -/// /// # Panics /// /// Panics if exponent notation is used. #[allow(clippy::collapsible_if)] -pub unsafe fn write_float( +pub fn write_float( float: F, bytes: &mut [u8], options: &Options, @@ -99,8 +93,7 @@ where // Write digit. let digit = fraction.as_u32(); let c = digit_to_char_const(digit, format.radix()); - // SAFETY: safe since we never write more than 1100 digits. - unsafe { index_unchecked_mut!(buffer[fraction_cursor]) = c }; + buffer[fraction_cursor] = c; fraction_cursor += 1; // Calculate remainder. fraction -= F::as_cast(digit); @@ -116,13 +109,11 @@ where break; } // Reconstruct digit. - // SAFETY: safe since we never write more than 1100 digits. - let c = unsafe { index_unchecked!(buffer[fraction_cursor]) }; + let c = buffer[fraction_cursor]; if let Some(digit) = char_to_digit_const(c, format.radix()) { let idx = digit + 1; let c = digit_to_char_const(idx, format.radix()); - // SAFETY: safe since we never write more than 1100 digits. - unsafe { index_unchecked_mut!(buffer[fraction_cursor]) = c }; + buffer[fraction_cursor] = c; fraction_cursor += 1; break; } @@ -143,9 +134,7 @@ where while (integer / base).exponent() > 0 { integer /= base; integer_cursor -= 1; - // SAFETY: safe since we never write more than 1100 digits, because - // the largest integer at `f64::MAX` is ~1024 digits. - unsafe { index_unchecked_mut!(buffer[integer_cursor]) = b'0' }; + buffer[integer_cursor] = b'0'; } loop { @@ -153,9 +142,7 @@ where integer_cursor -= 1; let idx = remainder.as_u32(); let c = digit_to_char_const(idx, format.radix()); - // SAFETY: safe since we never write more than 1100 digits, because - // the largest integer at `f64::MAX` is ~1024 digits. - unsafe { index_unchecked_mut!(buffer[integer_cursor]) = c }; + buffer[integer_cursor] = c; integer = (integer - remainder) / base; if integer <= F::ZERO { @@ -170,8 +157,7 @@ where // MUSL libm, and openlibm give us `40.0`, the correct answer. This of // course means we have off-by-1 errors, so the correct way is to trim // leading zeros, and then calculate the exponent as the offset. - // SAFETY: safe since both `integer_cursor` and `fraction_cursor` within bounds. - let digits = unsafe { &index_unchecked!(buffer[integer_cursor..fraction_cursor]) }; + let digits = &buffer[integer_cursor..fraction_cursor]; let zero_count = ltrim_char_count(digits, b'0'); let sci_exp: i32 = initial_cursor as i32 - integer_cursor as i32 - zero_count as i32 - 1; write_float!( @@ -190,18 +176,13 @@ where /// Write float to string in scientific notation. /// -/// # Safety -/// -/// Safe as long as `bytes` is large enough to hold the number of digits -/// and the scientific notation's exponent digits. -/// /// # Preconditions /// /// The mantissa must be truncated and rounded, prior to calling this, /// based on the number of maximum digits. In addition, `exponent_base` /// and `mantissa_radix` in `FORMAT` must be identical. #[cfg_attr(not(feature = "compact"), inline(always))] -pub unsafe fn write_float_scientific( +pub fn write_float_scientific( bytes: &mut [u8], sci_exp: i32, buffer: &mut [u8], @@ -225,11 +206,9 @@ pub unsafe fn write_float_scientific( integer_cursor }; let end = fraction_cursor.min(start + MAX_DIGIT_LENGTH + 1); - // SAFETY: safe since `start + digit_count <= end && end <= buffer.len()`. - let (digit_count, carried) = - unsafe { truncate_and_round(buffer, start, end, format.radix(), options) }; - // SAFETY: safe since `start + digit_count <= end`. - let digits = unsafe { &index_unchecked!(buffer[start..start + digit_count]) }; + let (mut digit_count, carried) = + truncate_and_round(buffer, start, end, format.radix(), options); + let digits = &buffer[start..start + digit_count]; // If we carried, just adjust the exponent since we will always have a // `digit_count == 1`. This means we don't have to worry about any other // digits. @@ -239,16 +218,13 @@ pub unsafe fn write_float_scientific( // Get as many digits as possible, up to `MAX_DIGIT_LENGTH+1` // since we are ignoring the digit for the first digit, // or the number of written digits. - // SAFETY: safe if the buffer is large enough to hold the significant digits. - let digit_count = unsafe { - index_unchecked_mut!(bytes[0] = digits[0]); - index_unchecked_mut!(bytes[1]) = decimal_point; - let src = digits.as_ptr().add(1); - let dst = &mut index_unchecked_mut!(bytes[2..digit_count + 1]); - copy_nonoverlapping_unchecked!(dst, src, digit_count - 1); - let zeros = rtrim_char_count(&index_unchecked!(bytes[2..digit_count + 1]), b'0'); - digit_count - zeros - }; + bytes[0] = digits[0]; + bytes[1] = decimal_point; + let src = &digits[1..digit_count]; + let dst = &mut bytes[2..digit_count + 1]; + copy_to_dst(dst, src); + let zeros = rtrim_char_count(&bytes[2..digit_count + 1], b'0'); + digit_count -= zeros; // Extra 1 since we have the decimal point. let mut cursor = digit_count + 1; @@ -262,38 +238,26 @@ pub unsafe fn write_float_scientific( cursor -= 1; } else if exact_count < 2 { // Need to have at least 1 digit, the trailing `.0`. - // SAFETY: safe as long as `cursor < bytes.len()`. - unsafe { index_unchecked_mut!(bytes[cursor]) = b'0' }; + bytes[cursor] = b'0'; cursor += 1; } else if exact_count > digit_count { // NOTE: Neither `exact_count >= digit_count >= 2`. // We need to write `exact_count - (cursor - 1)` digits, since // cursor includes the decimal point. let digits_end = exact_count + 1; - // SAFETY: this is safe as long as the buffer was large enough - // to hold `min_significant_digits + 1`. - unsafe { - slice_fill_unchecked!(index_unchecked_mut!(bytes[cursor..digits_end]), b'0'); - } + bytes[cursor..digits_end].fill(b'0'); cursor = digits_end; } // Now, write our scientific notation. - // SAFETY: safe if bytes is large enough to store the largest float with the - // smallest radix. - unsafe { shared::write_exponent::(bytes, &mut cursor, sci_exp, options.exponent()) }; + shared::write_exponent::(bytes, &mut cursor, sci_exp, options.exponent()); cursor } /// Write float to string without scientific notation. -/// -/// # Safety -/// -/// Safe as long as `bytes` is large enough to hold the number of -/// significant digits and the leading zeros. #[cfg_attr(not(feature = "compact"), inline(always))] -pub unsafe fn write_float_nonscientific( +pub fn write_float_nonscientific( bytes: &mut [u8], _: i32, buffer: &mut [u8], @@ -313,9 +277,8 @@ pub unsafe fn write_float_nonscientific( // Round and truncate the number of significant digits. let mut start = integer_cursor; let end = fraction_cursor.min(start + MAX_DIGIT_LENGTH + 1); - // SAFETY: safe since `start + digit_count <= end && end <= buffer.len()`. let (mut digit_count, carried) = - unsafe { truncate_and_round(buffer, start, end, format.radix(), options) }; + truncate_and_round(buffer, start, end, format.radix(), options); // Adjust the buffer if we carried. // Note that we can **only** carry if it overflowed through the integer @@ -323,36 +286,24 @@ pub unsafe fn write_float_nonscientific( if carried { debug_assert!(digit_count == 1); start -= 1; - // SAFETY: safe since `start > 0`, since we have 1100 bytes for integer digits, - // but the theoretical max is ~1024. - unsafe { index_unchecked_mut!(buffer[start]) = b'1' }; + buffer[start] = b'1'; } - // SAFETY: safe since `start + digit_count <= end`. - let digits = unsafe { &index_unchecked!(buffer[start..start + digit_count]) }; + let digits = &buffer[start..start + digit_count]; // Write the integer component. let integer_length = initial_cursor - start; let integer_count = digit_count.min(integer_length); - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let src = digits.as_ptr(); - let dst = &mut index_unchecked_mut!(bytes[..integer_count]); - copy_nonoverlapping_unchecked!(dst, src, integer_count); - } + let src = &digits[..integer_count]; + let dst = &mut bytes[..integer_count]; + copy_to_dst(dst, src); if integer_count < integer_length { // We have more leading digits than digits we wrote: can write // any additional digits, and then just write the remaining zeros. - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let digits = &mut index_unchecked_mut!(bytes[integer_count..integer_length]); - slice_fill_unchecked!(digits, b'0'); - } + bytes[integer_count..integer_length].fill(b'0'); } let mut cursor = integer_length; - - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { index_unchecked_mut!(bytes[cursor]) = decimal_point }; + bytes[cursor] = decimal_point; cursor += 1; // Write the fraction component. @@ -360,25 +311,21 @@ pub unsafe fn write_float_nonscientific( // may have been truncated. // SAFETY: safe since `integer_count < digits.len()` since `digit_count < // digits.len()`. - let digits = unsafe { &index_unchecked!(digits[integer_count..]) }; + let digits = &digits[integer_count..]; let fraction_count = digit_count.saturating_sub(integer_length); if fraction_count > 0 { // Need to write additional fraction digits. - // SAFETY: safe if the buffer is large enough to hold the significant digits. - unsafe { - let src = digits.as_ptr(); - let end = cursor + fraction_count; - let dst = &mut index_unchecked_mut!(bytes[cursor..end]); - copy_nonoverlapping_unchecked!(dst, src, fraction_count); - let zeros = rtrim_char_count(&index_unchecked!(bytes[cursor..end]), b'0'); - cursor += fraction_count - zeros; - } + let src = &digits[..fraction_count]; + let end = cursor + fraction_count; + let dst = &mut bytes[cursor..end]; + copy_to_dst(dst, src); + let zeros = rtrim_char_count(&bytes[cursor..end], b'0'); + cursor += fraction_count - zeros; } else if options.trim_floats() { // Remove the decimal point, went too far. cursor -= 1; } else { - // SAFETY: safe if `cursor < bytes.len()`. - unsafe { index_unchecked_mut!(bytes[cursor]) = b'0' }; + bytes[cursor] = b'0'; cursor += 1; digit_count += 1; } @@ -393,11 +340,7 @@ pub unsafe fn write_float_nonscientific( // We need to write `exact_count - (cursor - 1)` digits, since // cursor includes the decimal point. let digits_end = cursor + exact_count - digit_count; - // SAFETY: this is safe as long as the buffer was large enough - // to hold `min_significant_digits + 1`. - unsafe { - slice_fill_unchecked!(index_unchecked_mut!(bytes[cursor..digits_end]), b'0'); - } + bytes[cursor..digits_end].fill(b'0'); cursor = digits_end; } @@ -425,13 +368,9 @@ const MAX_DIGIT_LENGTH: usize = BUFFER_SIZE - MAX_NONDIGIT_LENGTH; /// Round mantissa to the nearest value, returning only the number /// of significant digits. Returns the number of digits of the mantissa, /// and if the rounding did a full carry. -/// -/// # Safety -/// -/// Safe if `end <= buffer.len()`. #[cfg_attr(not(feature = "compact"), inline(always))] #[allow(clippy::comparison_chain)] -pub unsafe fn truncate_and_round( +pub fn truncate_and_round( buffer: &mut [u8], start: usize, end: usize, @@ -459,17 +398,15 @@ pub unsafe fn truncate_and_round( // Need to add the number of leading zeros to the digits digit_count. let max_digits = { - // SAFETY: safe since `start + max_digits < end`. - let digits = unsafe { &mut index_unchecked_mut!(buffer[start..start + max_digits]) }; + let digits = &mut buffer[start..start + max_digits]; max_digits + ltrim_char_count(digits, b'0') }; // We need to round-nearest, tie-even, so we need to handle // the truncation **here**. If the representation is above // halfway at all, we need to round up, even if 1 bit. - // SAFETY: safe since `max_digits < digit_count`, and `max_digits > 0`. - let last = unsafe { index_unchecked!(buffer[start + max_digits - 1]) }; - let first = unsafe { index_unchecked!(buffer[start + max_digits]) }; + let last = buffer[start + max_digits - 1]; + let first = buffer[start + max_digits]; let halfway = digit_to_char_const(radix / 2, radix); let rem = radix % 2; if first < halfway { @@ -477,37 +414,29 @@ pub unsafe fn truncate_and_round( (max_digits, false) } else if first > halfway { // Round-up always. - // SAFETY: safe if `start <= end, because `max_digits < digit_count`. - let digits = unsafe { &mut index_unchecked_mut!(buffer[start..start + max_digits]) }; - unsafe { shared::round_up(digits, max_digits, radix) } + let digits = &mut buffer[start..start + max_digits]; + shared::round_up(digits, max_digits, radix) } else if rem == 0 { // Even radix, our halfway point `$c00000.....`. - // SAFETY: safe if `start <= end, because `max_digits < digit_count`. - let truncated = unsafe { &index_unchecked!(buffer[start + max_digits + 1..end]) }; + let truncated = &buffer[start + max_digits + 1..end]; if truncated.iter().all(|&x| x == b'0') && last & 1 == 0 { // At an exact halfway point, and even, round-down. (max_digits, false) } else { // Above halfway or at halfway and even, round-up - // SAFETY: safe if `digit_count <= digits.len()`, because `max_digits < - // digit_count`. - let digits = unsafe { &mut index_unchecked_mut!(buffer[start..start + max_digits]) }; - unsafe { shared::round_up(digits, max_digits, radix) } + let digits = &mut buffer[start..start + max_digits]; + shared::round_up(digits, max_digits, radix) } } else { // Odd radix, our halfway point is `$c$c$c$c$c$c....`. Cannot halfway points. - // SAFETY: safe if `start <= end, because `max_digits < digit_count`. - let truncated = unsafe { &index_unchecked!(buffer[start + max_digits + 1..end]) }; + let truncated = &buffer[start + max_digits + 1..end]; for &c in truncated.iter() { if c < halfway { return (max_digits, false); } else if c > halfway { // Above halfway - // SAFETY: safe if `digit_count <= digits.len()`, because - // `max_digits < digit_count`. - let digits = - unsafe { &mut index_unchecked_mut!(buffer[start..start + max_digits]) }; - return unsafe { shared::round_up(digits, max_digits, radix) }; + let digits = &mut buffer[start..start + max_digits]; + return shared::round_up(digits, max_digits, radix); } } (max_digits, false) diff --git a/lexical-write-float/src/shared.rs b/lexical-write-float/src/shared.rs index a79489c2..a91512de 100644 --- a/lexical-write-float/src/shared.rs +++ b/lexical-write-float/src/shared.rs @@ -183,14 +183,17 @@ macro_rules! write_float { if !format.no_exponent_notation() && require_exponent { // Write digits in scientific notation. // SAFETY: safe as long as bytes is large enough to hold all the digits. + // TODO: No longer need to be unsafe unsafe { $write_scientific::<$($generic,)? FORMAT>($bytes, $($args,)*) } } else if $sci_exp < 0 { // Write negative exponent without scientific notation. // SAFETY: safe as long as bytes is large enough to hold all the digits. + // TODO: No longer need to be unsafe unsafe { $write_negative::<$($generic,)? FORMAT>($bytes, $($args,)*) } } else { // Write positive exponent without scientific notation. // SAFETY: safe as long as bytes is large enough to hold all the digits. + // TODO: No longer need to be unsafe unsafe { $write_positive::<$($generic,)? FORMAT>($bytes, $($args,)*) } } }}; diff --git a/lexical-write-float/src/write.rs b/lexical-write-float/src/write.rs index f832dc60..4249af1b 100644 --- a/lexical-write-float/src/write.rs +++ b/lexical-write-float/src/write.rs @@ -39,27 +39,23 @@ fn write_special(bytes: &mut [u8], special: Option<&[u8]>, error: &'static str) } /// Write an NaN string to the buffer. -#[inline] -fn write_nan<'a>(bytes: &'a mut [u8], options: &Options, count: usize) -> &'a mut [u8] { - let count = count +fn write_nan(bytes: &mut [u8], options: &Options, count: usize) -> usize { + count + write_special( bytes, options.nan_string(), "NaN explicitly disabled but asked to write NaN as string.", - ); - &mut bytes[..count] + ) } /// Write an Inf string to the buffer. -#[inline] -fn write_inf<'a>(bytes: &'a mut [u8], options: &Options, count: usize) -> &'a mut [u8] { - let count = count +fn write_inf(bytes: &mut [u8], options: &Options, count: usize) -> usize { + count + write_special( bytes, options.inf_string(), "Inf explicitly disabled but asked to write Inf as string.", - ); - &mut bytes[..count] + ) } /// Check if a buffer is sufficiently large. @@ -99,7 +95,7 @@ pub trait WriteFloat: RawFloat + FormattedSize { /// [`FORMATTED_SIZE`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE /// [`FORMATTED_SIZE_DECIMAL`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE_DECIMAL #[cfg_attr(not(feature = "compact"), inline(always))] - fn write_float<'a, const FORMAT: u128>(self, bytes: &'a mut [u8], options: &Options) -> &'a mut [u8] + fn write_float(self, bytes: &mut [u8], options: &Options) -> usize where Self::Unsigned: FormattedSize + WriteInteger, { @@ -121,7 +117,7 @@ pub trait WriteFloat: RawFloat + FormattedSize { } } - let (float, mut count, bytes) = if self.needs_negative_sign() { + let (float, count, bytes) = if self.needs_negative_sign() { bytes[0] = b'-'; (-self, 1, &mut bytes[1..]) } else if cfg!(feature = "format") && format.required_mantissa_sign() { @@ -138,14 +134,14 @@ pub trait WriteFloat: RawFloat + FormattedSize { // SAFETY: safe if the buffer can hold the significant digits let radix = format.radix(); let exponent_base = format.exponent_base(); - count += if radix == 10 { - unsafe { write_float_decimal::<_, FORMAT>(float, bytes, options) } + count + + if radix == 10 { + write_float_decimal::<_, FORMAT>(float, bytes, options) } else if radix != exponent_base { - unsafe { hex::write_float::<_, FORMAT>(float, bytes, options) } + hex::write_float::<_, FORMAT>(float, bytes, options) } else { - unsafe { binary::write_float::<_, FORMAT>(float, bytes, options) } - }; - &mut bytes[..count] + binary::write_float::<_, FORMAT>(float, bytes, options) + } } #[cfg(feature = "radix")] @@ -153,22 +149,21 @@ pub trait WriteFloat: RawFloat + FormattedSize { // SAFETY: safe if the buffer can hold the significant digits let radix = format.radix(); let exponent_base = format.exponent_base(); - count += if radix == 10 { - unsafe { write_float_decimal::<_, FORMAT>(float, bytes, options) } + count + + if radix == 10 { + write_float_decimal::<_, FORMAT>(float, bytes, options) } else if radix != exponent_base { - unsafe { hex::write_float::<_, FORMAT>(float, bytes, options) } + hex::write_float::<_, FORMAT>(float, bytes, options) } else if matches!(radix, 2 | 4 | 8 | 16 | 32) { - unsafe { binary::write_float::<_, FORMAT>(float, bytes, options) } + binary::write_float::<_, FORMAT>(float, bytes, options) } else { - unsafe { radix::write_float::<_, FORMAT>(float, bytes, options) } - }; - &mut bytes[..count] + radix::write_float::<_, FORMAT>(float, bytes, options) + } } #[cfg(not(feature = "power-of-two"))] { - count += write_float_decimal::<_, FORMAT>(float, bytes, options); - &mut bytes[..count] + count + write_float_decimal::<_, FORMAT>(float, bytes, options) } } else if self.is_nan() { write_nan(bytes, options, count) diff --git a/lexical-write-float/tests/algorithm_tests.rs b/lexical-write-float/tests/algorithm_tests.rs index 7635f4fe..c957f2e9 100644 --- a/lexical-write-float/tests/algorithm_tests.rs +++ b/lexical-write-float/tests/algorithm_tests.rs @@ -261,7 +261,8 @@ fn write_float_scientific(mant: u64, exp: i32, options: &Options, expected: &str }; let digit_count = f64::digit_count(fp.mant); let sci_exp = fp.exp + digit_count as i32 - 1; - let count = algorithm::write_float_scientific::(&mut buffer, fp, sci_exp, &options); + let count = + algorithm::write_float_scientific::(&mut buffer, fp, sci_exp, &options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } diff --git a/lexical-write-float/tests/binary_tests.rs b/lexical-write-float/tests/binary_tests.rs index d2106b59..520daafa 100644 --- a/lexical-write-float/tests/binary_tests.rs +++ b/lexical-write-float/tests/binary_tests.rs @@ -179,9 +179,8 @@ where sci_exp = 0; } - let count = unsafe { - binary::write_float_scientific::<_, FORMAT>(&mut buffer, mantissa, exp, sci_exp, options) - }; + let count = + binary::write_float_scientific::<_, FORMAT>(&mut buffer, mantissa, exp, sci_exp, options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -466,15 +465,13 @@ fn write_float_negative_exponent( let exp = f.exponent(); let sci_exp = exp + mantissa_bits - 1; - let count = unsafe { - binary::write_float_negative_exponent::<_, FORMAT>( - &mut buffer, - mantissa, - exp, - sci_exp, - options, - ) - }; + let count = binary::write_float_negative_exponent::<_, FORMAT>( + &mut buffer, + mantissa, + exp, + sci_exp, + options, + ); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -709,15 +706,13 @@ fn write_float_positive_exponent( sci_exp = 0; } - let count = unsafe { - binary::write_float_positive_exponent::<_, FORMAT>( - &mut buffer, - mantissa, - exp, - sci_exp, - options, - ) - }; + let count = binary::write_float_positive_exponent::<_, FORMAT>( + &mut buffer, + mantissa, + exp, + sci_exp, + options, + ); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -954,7 +949,7 @@ where ::Unsigned: WriteInteger + FormattedSize, { let mut buffer = [b'\x00'; BUFFER_SIZE]; - let count = unsafe { binary::write_float::<_, FORMAT>(f, &mut buffer, options) }; + let count = binary::write_float::<_, FORMAT>(f, &mut buffer, options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -1100,7 +1095,7 @@ default_quickcheck! { true } else { let f = f.abs(); - let count = unsafe { binary::write_float::<_, BINARY>(f, &mut buffer, &options) }; + let count = binary::write_float::<_, BINARY>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 2, b'e'); roundtrip == f } @@ -1113,7 +1108,7 @@ default_quickcheck! { true } else { let f = f.abs(); - let count = unsafe { binary::write_float::<_, OCTAL>(f, &mut buffer, &options) }; + let count = binary::write_float::<_, OCTAL>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 8, b'e'); roundtrip == f } @@ -1126,7 +1121,7 @@ default_quickcheck! { true } else { let f = f.abs(); - let count = unsafe { binary::write_float::<_, BINARY>(f, &mut buffer, &options) }; + let count = binary::write_float::<_, BINARY>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 2, b'e'); roundtrip == f } @@ -1139,7 +1134,7 @@ default_quickcheck! { true } else { let f = f.abs(); - let count = unsafe { binary::write_float::<_, OCTAL>(f, &mut buffer, &options) }; + let count = binary::write_float::<_, OCTAL>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 8, b'e'); roundtrip == f } @@ -1155,7 +1150,7 @@ proptest! { let options = Options::builder().build().unwrap(); if !f.is_special() { let f = f.abs(); - let count = unsafe { binary::write_float::<_, BINARY>(f, &mut buffer, &options) }; + let count = binary::write_float::<_, BINARY>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 2, b'e'); prop_assert_eq!(roundtrip, f) } @@ -1167,7 +1162,7 @@ proptest! { let options = Options::builder().build().unwrap(); if !f.is_special() { let f = f.abs(); - let count = unsafe { binary::write_float::<_, OCTAL>(f, &mut buffer, &options) }; + let count = binary::write_float::<_, OCTAL>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 8, b'e'); prop_assert_eq!(roundtrip, f) } @@ -1179,7 +1174,7 @@ proptest! { let options = Options::builder().build().unwrap(); if !f.is_special() { let f = f.abs(); - let count = unsafe { binary::write_float::<_, BINARY>(f, &mut buffer, &options) }; + let count = binary::write_float::<_, BINARY>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 2, b'e'); prop_assert_eq!(roundtrip, f) } @@ -1191,7 +1186,7 @@ proptest! { let options = Options::builder().build().unwrap(); if !f.is_special() { let f = f.abs(); - let count = unsafe { binary::write_float::<_, OCTAL>(f, &mut buffer, &options) }; + let count = binary::write_float::<_, OCTAL>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 8, b'e'); prop_assert_eq!(roundtrip, f) } diff --git a/lexical-write-float/tests/compact_tests.rs b/lexical-write-float/tests/compact_tests.rs index 2830e42b..8fa4fb1b 100644 --- a/lexical-write-float/tests/compact_tests.rs +++ b/lexical-write-float/tests/compact_tests.rs @@ -556,9 +556,8 @@ fn f32_roundtrip_test() { fn write_float_scientific(digits: &mut [u8], k: i32, options: &Options, expected: &str) { let mut buffer = [b'\x00'; BUFFER_SIZE]; let ndigits = digits.len(); - let count = unsafe { - compact::write_float_scientific::(&mut buffer, digits, ndigits, k, &options) - }; + let count = + compact::write_float_scientific::(&mut buffer, digits, ndigits, k, &options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -591,9 +590,13 @@ fn write_float_scientific_test() { fn write_float_positive_exponent(digits: &mut [u8], k: i32, options: &Options, expected: &str) { let mut buffer = [b'\x00'; 512]; let ndigits = digits.len(); - let count = unsafe { - compact::write_float_positive_exponent::(&mut buffer, digits, ndigits, k, &options) - }; + let count = compact::write_float_positive_exponent::( + &mut buffer, + digits, + ndigits, + k, + &options, + ); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -618,9 +621,13 @@ fn write_float_positive_exponent_test() { fn write_float_negative_exponent(digits: &mut [u8], k: i32, options: &Options, expected: &str) { let mut buffer = [b'\x00'; 512]; let ndigits = digits.len(); - let count = unsafe { - compact::write_float_negative_exponent::(&mut buffer, digits, ndigits, k, &options) - }; + let count = compact::write_float_negative_exponent::( + &mut buffer, + digits, + ndigits, + k, + &options, + ); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } diff --git a/lexical-write-float/tests/hex_tests.rs b/lexical-write-float/tests/hex_tests.rs index e398cd4a..da6eac67 100644 --- a/lexical-write-float/tests/hex_tests.rs +++ b/lexical-write-float/tests/hex_tests.rs @@ -50,9 +50,8 @@ where sci_exp = 0; } - let count = unsafe { - hex::write_float_scientific::<_, FORMAT>(&mut buffer, mantissa, exp, sci_exp, options) - }; + let count = + hex::write_float_scientific::<_, FORMAT>(&mut buffer, mantissa, exp, sci_exp, options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -389,7 +388,7 @@ where ::Unsigned: WriteInteger + FormattedSize, { let mut buffer = [b'\x00'; BUFFER_SIZE]; - let count = unsafe { hex::write_float::<_, FORMAT>(f, &mut buffer, options) }; + let count = hex::write_float::<_, FORMAT>(f, &mut buffer, options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } diff --git a/lexical-write-float/tests/radix_tests.rs b/lexical-write-float/tests/radix_tests.rs index fd4d964c..98adbc8f 100644 --- a/lexical-write-float/tests/radix_tests.rs +++ b/lexical-write-float/tests/radix_tests.rs @@ -95,7 +95,7 @@ where ::Unsigned: WriteInteger + FormattedSize, { let mut buffer = [b'\x00'; BUFFER_SIZE]; - let count = unsafe { radix::write_float::<_, FORMAT>(f, &mut buffer, options) }; + let count = radix::write_float::<_, FORMAT>(f, &mut buffer, options); let actual = unsafe { std::str::from_utf8_unchecked(&buffer[..count]) }; assert_eq!(actual, expected); } @@ -243,7 +243,7 @@ macro_rules! test_radix { } else { $options.clone() }; - let count = unsafe { radix::write_float::<_, FORMAT>($f, &mut $buffer, &options) }; + let count = radix::write_float::<_, FORMAT>($f, &mut $buffer, &options); let roundtrip = $parse(&$buffer[..count], $radix, options.exponent()); assert_relative_eq!($f, roundtrip, epsilon = 1e-6, max_relative = 3e-6); }}; @@ -306,12 +306,12 @@ fn base21_test() { let mut buffer = [b'\x00'; 512]; let options = Options::builder().exponent(b'^').build().unwrap(); let f = 2879632400000000000000000.0f32; - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 21, b'^'); assert_relative_eq!(f, roundtrip, epsilon = 1e-5, max_relative = 1e-5); let f = 48205284000000000000000000000000000000.0f32; - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 21, b'^'); assert_relative_eq!(f, roundtrip, epsilon = 1e-5, max_relative = 1e-5); @@ -321,17 +321,17 @@ fn base21_test() { .build() .unwrap(); let f = 105861640000000000000000000000000000000.0f32; - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 21, b'^'); assert_relative_eq!(f, roundtrip, epsilon = 1e-1, max_relative = 1e-1); let f = 63900220000000000000000000000000000000.0f32; - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 21, b'^'); assert_relative_eq!(f, roundtrip, epsilon = 1e-1, max_relative = 1e-1); let f = 48205284000000000000000000000000000000.0f32; - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); assert_eq!(b"4.C44^17", &buffer[..count]); let options = Options::builder() @@ -341,7 +341,7 @@ fn base21_test() { .build() .unwrap(); let f = 48205284000000000000000000000000000000.0f32; - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); assert_eq!(b"4C440700000000000000000000000.0", &buffer[..count]); } @@ -370,7 +370,7 @@ default_quickcheck! { true } else { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 3, b'e'); relative_eq!(f, roundtrip, epsilon=1e-6, max_relative=1e-6) } @@ -383,7 +383,7 @@ default_quickcheck! { true } else { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 5, b'e'); relative_eq!(f, roundtrip, epsilon=1e-6, max_relative=1e-6) } @@ -396,7 +396,7 @@ default_quickcheck! { true } else { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 21, b'^'); relative_eq!(f, roundtrip, epsilon=1e-5, max_relative=1e-5) } @@ -409,7 +409,7 @@ default_quickcheck! { true } else { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 3, b'e'); relative_eq!(f, roundtrip, epsilon=1e-6, max_relative=1e-6) } @@ -422,7 +422,7 @@ default_quickcheck! { true } else { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 5, b'e'); relative_eq!(f, roundtrip, epsilon=1e-6, max_relative=1e-6) } @@ -435,7 +435,7 @@ default_quickcheck! { true } else { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 21, b'^'); relative_eq!(f, roundtrip, epsilon=1e-6, max_relative=1e-6) } @@ -451,7 +451,7 @@ proptest! { let options = Options::builder().build().unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 3, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-6, max_relative=1e-6); prop_assert!(equal) @@ -464,7 +464,7 @@ proptest! { let options = Options::builder().build().unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 5, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-6, max_relative=1e-6); prop_assert!(equal) @@ -477,7 +477,7 @@ proptest! { let options = Options::builder().exponent(b'^').build().unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 21, b'^'); let equal = relative_eq!(f, roundtrip, epsilon=1e-5, max_relative=1e-5); prop_assert!(equal) @@ -493,7 +493,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 3, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -509,7 +509,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 5, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -526,7 +526,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 21, b'^'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -542,7 +542,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 3, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -558,7 +558,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 5, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -575,7 +575,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 21, b'^'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -593,7 +593,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 3, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -611,7 +611,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 5, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -630,7 +630,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 21, b'^'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -648,7 +648,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 3, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -666,7 +666,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 5, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -685,7 +685,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f32 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f32(&buffer[..count], 21, b'^'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -698,7 +698,7 @@ proptest! { let options = Options::builder().build().unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 3, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-6, max_relative=1e-6); prop_assert!(equal) @@ -711,7 +711,7 @@ proptest! { let options = Options::builder().build().unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 5, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-6, max_relative=1e-6); prop_assert!(equal) @@ -724,7 +724,7 @@ proptest! { let options = Options::builder().exponent(b'^').build().unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 21, b'^'); let equal = relative_eq!(f, roundtrip, epsilon=1e-6, max_relative=1e-6); prop_assert!(equal) @@ -740,7 +740,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 3, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -756,7 +756,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 5, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -773,7 +773,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 21, b'^'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -789,7 +789,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 3, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -805,7 +805,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 5, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -822,7 +822,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 21, b'^'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -840,7 +840,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 3, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -858,7 +858,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 5, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -877,7 +877,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 21, b'^'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -895,7 +895,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE3>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE3>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 3, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -913,7 +913,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE5>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE5>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 5, b'e'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) @@ -932,7 +932,7 @@ proptest! { .unwrap(); if !(is_overflow!(@f64 f)) { let f = f.abs(); - let count = unsafe { radix::write_float::<_, BASE21>(f, &mut buffer, &options) }; + let count = radix::write_float::<_, BASE21>(f, &mut buffer, &options); let roundtrip = parse_f64(&buffer[..count], 21, b'^'); let equal = relative_eq!(f, roundtrip, epsilon=1e-1, max_relative=1e-1); prop_assert!(equal) From a2667d5981060f733dfe9c3a732b4c75491c1be3 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 16:42:53 -0500 Subject: [PATCH 11/14] Complete our safety mitigations in the float writers. Performance enhancements will soon follow to restore any lost performance in some cases which impacted it up to 3%. --- CHANGELOG | 1 + README.md | 2 -- lexical-benchmark/parse-float/Cargo.toml | 1 - lexical-benchmark/parse-float/denormal30.rs | 5 --- lexical-benchmark/parse-float/denormal6400.rs | 5 --- lexical-core/Cargo.toml | 17 --------- lexical-parse-float/Cargo.toml | 5 --- lexical-parse-float/src/bigint.rs | 8 ----- lexical-parse-float/src/fpu.rs | 1 - lexical-parse-float/src/libm.rs | 13 ------- lexical-parse-float/src/number.rs | 3 -- lexical-parse-integer/Cargo.toml | 5 --- lexical-write-float/Cargo.toml | 6 ---- lexical-write-float/src/index.rs | 9 ----- lexical-write-float/src/write.rs | 2 +- lexical-write-integer/Cargo.toml | 6 ---- lexical-write-integer/src/index.rs | 35 ------------------- lexical/Cargo.toml | 2 -- 18 files changed, 2 insertions(+), 124 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index b3b52dd8..85b11758 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -38,6 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for mips (MIPS), mipsel (MIPS LE), mips64 (MIPS64 BE), and mips64el (MIPS64 LE) on Linux. - All `_unchecked` API methods, since the performance benefits are dubious and it makes safety invariant checking much harder. +- The `safe` and `nightly` features, since ASM is now supported by the MSRV on stable and opt-in for memory-safe indexing is no longer relevant. ## [0.8.5] 2022-06-06 diff --git a/README.md b/README.md index 2e722f05..1a16079e 100644 --- a/README.md +++ b/README.md @@ -139,8 +139,6 @@ Lexical is highly customizable, and contains numerous other optional features:
With format enabled, the number format is dictated through bitflags and masks packed into a u128. These dictate the valid syntax of parsed and written numbers, including enabling digit separators, requiring integer or fraction digits, and toggling case-sensitive exponent characters.
- **compact**:   Optimize for binary size at the expense of performance.
This minimizes the use of pre-computed tables, producing significantly smaller binaries.
-- **safe**:   Requires all array indexing to be bounds-checked. -
This has limited impact for number parsers, since they use safe indexing except where indexing without bounds checking and can general be shown to be sound. The number writers frequently use unsafe indexing, since we can easily over-estimate the number of digits in the output due to the fixed-length input. We use comprehensive fuzzing, UB detection via miri, and proving local safe invariants to ensure correctness without impacting performance.
- **f16**:   Add support for numeric conversions to-and-from 16-bit floats.
Adds f16, a half-precision IEEE-754 floating-point type, and bf16, the Brain Float 16 type, and numeric conversions to-and-from these floats. Note that since these are storage formats, and therefore do not have native arithmetic operations, all conversions are done using an intermediate f32.
diff --git a/lexical-benchmark/parse-float/Cargo.toml b/lexical-benchmark/parse-float/Cargo.toml index 04348a25..b4971e10 100644 --- a/lexical-benchmark/parse-float/Cargo.toml +++ b/lexical-benchmark/parse-float/Cargo.toml @@ -30,7 +30,6 @@ power-of-two = ["lexical-util/power-of-two", "lexical-parse-float/power-of-two"] format = ["lexical-util/format", "lexical-parse-float/format"] compact = ["lexical-util/compact", "lexical-parse-float/compact"] asm = [] -nightly = ["lexical-parse-float/nightly"] integers = ["lexical-util/integers"] floats = ["lexical-util/floats"] json = [] diff --git a/lexical-benchmark/parse-float/denormal30.rs b/lexical-benchmark/parse-float/denormal30.rs index 1549bd9a..378074ef 100644 --- a/lexical-benchmark/parse-float/denormal30.rs +++ b/lexical-benchmark/parse-float/denormal30.rs @@ -1,8 +1,3 @@ -// Inline ASM was stabilized in 1.59.0. -// FIXME: Remove when the MSRV for Rustc >= 1.59.0. -#![allow(stable_features)] -#![cfg_attr(feature = "nightly", feature(asm))] - mod black_box; use black_box::black_box; use lexical_parse_float::FromLexical; diff --git a/lexical-benchmark/parse-float/denormal6400.rs b/lexical-benchmark/parse-float/denormal6400.rs index dde1660d..744c784a 100644 --- a/lexical-benchmark/parse-float/denormal6400.rs +++ b/lexical-benchmark/parse-float/denormal6400.rs @@ -1,8 +1,3 @@ -// Inline ASM was stabilized in 1.59.0. -// FIXME: Remove when the MSRV for Rustc >= 1.59.0. -#![allow(stable_features)] -#![cfg_attr(feature = "nightly", feature(asm))] - mod black_box; use black_box::black_box; use lexical_parse_float::FromLexical; diff --git a/lexical-core/Cargo.toml b/lexical-core/Cargo.toml index da0e3b64..f251c6fa 100644 --- a/lexical-core/Cargo.toml +++ b/lexical-core/Cargo.toml @@ -100,23 +100,6 @@ compact = [ "lexical-parse-integer?/compact", "lexical-parse-float?/compact" ] -# Ensure only safe indexing is used. -# This is only relevant for the number writers, since the parsers -# are memory safe by default (and only use memory unsafety when -# is the trivial to prove correct). -safe = [ - "lexical-write-integer?/safe", - "lexical-write-float?/safe", - "lexical-parse-integer?/safe", - "lexical-parse-float?/safe" -] -# Add support for nightly-only features. -nightly = [ - "lexical-write-integer?/nightly", - "lexical-write-float?/nightly", - "lexical-parse-integer?/nightly", - "lexical-parse-float?/nightly" -] # Enable support for 16-bit floats. f16 = [ "lexical-util/f16", diff --git a/lexical-parse-float/Cargo.toml b/lexical-parse-float/Cargo.toml index 9058b12e..8843ce37 100644 --- a/lexical-parse-float/Cargo.toml +++ b/lexical-parse-float/Cargo.toml @@ -68,11 +68,6 @@ compact = [ "lexical-util/compact", "lexical-parse-integer/compact" ] -# Ensure only safe indexing is used. This is effectively a no-op, since all -# examples of potential memory unsafety are trivial to prove safe. -safe = ["lexical-parse-integer/safe"] -# Add support for nightly-only features. -nightly = ["lexical-parse-integer/nightly"] # Enable support for 16-bit floats. f16 = ["lexical-util/f16"] diff --git a/lexical-parse-float/src/bigint.rs b/lexical-parse-float/src/bigint.rs index cf8515aa..351df8ef 100644 --- a/lexical-parse-float/src/bigint.rs +++ b/lexical-parse-float/src/bigint.rs @@ -18,14 +18,6 @@ use crate::table::get_large_int_power; /// # Safety /// /// Safe if `index < array.len()`. -#[cfg(feature = "safe")] -macro_rules! index_unchecked { - ($x:ident[$i:expr]) => { - $x[$i] - }; -} - -#[cfg(not(feature = "safe"))] macro_rules! index_unchecked { ($x:ident[$i:expr]) => { // SAFETY: safe if `index < array.len()`. diff --git a/lexical-parse-float/src/fpu.rs b/lexical-parse-float/src/fpu.rs index fa093c42..657eaee1 100644 --- a/lexical-parse-float/src/fpu.rs +++ b/lexical-parse-float/src/fpu.rs @@ -6,7 +6,6 @@ //! //! It is therefore also subject to a Apache2.0/MIT license. -#![cfg(feature = "nightly")] #![doc(hidden)] pub use fpu_precision::set_precision; diff --git a/lexical-parse-float/src/libm.rs b/lexical-parse-float/src/libm.rs index 5ad6cc90..904ca6d9 100644 --- a/lexical-parse-float/src/libm.rs +++ b/lexical-parse-float/src/libm.rs @@ -28,19 +28,6 @@ /// # Safety /// /// Safe if `index < array.len()`. -#[cfg(feature = "safe")] -macro_rules! i { - ($x:ident, $i:expr) => { - $x[$i] - }; -} - -/// Index an array without bounds checking. -/// -/// # Safety -/// -/// Safe if `index < array.len()`. -#[cfg(not(feature = "safe"))] macro_rules! i { ($x:ident, $i:expr) => { unsafe { *$x.get_unchecked($i) } diff --git a/lexical-parse-float/src/number.rs b/lexical-parse-float/src/number.rs index b1d10adf..6152e962 100644 --- a/lexical-parse-float/src/number.rs +++ b/lexical-parse-float/src/number.rs @@ -8,7 +8,6 @@ use lexical_util::format::NumberFormat; use crate::float::RawFloat; -#[cfg(feature = "nightly")] use crate::fpu::set_precision; /// Representation of a number as the significant digits and exponent. @@ -65,7 +64,6 @@ impl<'a> Number<'a> { // function takes care of setting the precision on architectures which // require setting it by changing the global state (like the control word of the // x87 FPU). - #[cfg(feature = "nightly")] let _cw = set_precision::(); if self.is_fast_path::() { @@ -105,7 +103,6 @@ impl<'a> Number<'a> { let format = NumberFormat:: {}; debug_assert!(format.mantissa_radix() == format.exponent_base()); - #[cfg(feature = "nightly")] let _cw = set_precision::(); let radix = format.radix(); diff --git a/lexical-parse-integer/Cargo.toml b/lexical-parse-integer/Cargo.toml index e8cda0b6..27dbe892 100644 --- a/lexical-parse-integer/Cargo.toml +++ b/lexical-parse-integer/Cargo.toml @@ -46,11 +46,6 @@ radix = ["lexical-util/radix", "power-of-two"] format = ["lexical-util/format"] # Reduce code size at the cost of performance. compact = ["lexical-util/compact"] -# Ensure only safe indexing is used. This is a no-op, since all -# examples of potential memory unsafety are trivial to prove safe. -safe = [] -# Add support for nightly-only features. -nightly = [] # Internal only features. # Enable the lint checks. diff --git a/lexical-write-float/Cargo.toml b/lexical-write-float/Cargo.toml index 7aa24602..10dfc526 100644 --- a/lexical-write-float/Cargo.toml +++ b/lexical-write-float/Cargo.toml @@ -67,12 +67,6 @@ compact = [ "lexical-util/compact", "lexical-write-integer/compact" ] -# Ensure only safe indexing is used. -# This is not enabled by default for writers, due to the performance -# costs, and since input can be easily validated to avoid buffer overwrites. -safe = ["lexical-write-integer/safe"] -# Add support for nightly-only features. -nightly = ["lexical-write-integer/nightly"] # Enable support for 16-bit floats. f16 = ["lexical-util/f16"] diff --git a/lexical-write-float/src/index.rs b/lexical-write-float/src/index.rs index 492157c6..66028c3f 100644 --- a/lexical-write-float/src/index.rs +++ b/lexical-write-float/src/index.rs @@ -8,17 +8,8 @@ #![doc(hidden)] /// Index a buffer, without bounds checking. -#[cfg(not(feature = "safe"))] macro_rules! index_unchecked { ($x:ident[$i:expr]) => { *$x.get_unchecked($i) }; } - -/// Index a buffer, with bounds checking. -#[cfg(feature = "safe")] -macro_rules! index_unchecked { - ($x:ident[$i:expr]) => { - $x[$i] - }; -} diff --git a/lexical-write-float/src/write.rs b/lexical-write-float/src/write.rs index 4249af1b..1cc63f6d 100644 --- a/lexical-write-float/src/write.rs +++ b/lexical-write-float/src/write.rs @@ -186,7 +186,7 @@ macro_rules! write_float_as_f32 { ($($t:ty)*) => ($( impl WriteFloat for $t { #[inline(always)] - fn write_float(self, bytes: &mut [u8], options: &Options) -> &mut [u8] + fn write_float(self, bytes: &mut [u8], options: &Options) -> usize { // SAFETY: safe if `bytes` is large enough to hold the written bytes. self.as_f32().write_float::(bytes, options) diff --git a/lexical-write-integer/Cargo.toml b/lexical-write-integer/Cargo.toml index 86bc8e90..6683f90b 100644 --- a/lexical-write-integer/Cargo.toml +++ b/lexical-write-integer/Cargo.toml @@ -46,12 +46,6 @@ radix = ["lexical-util/radix", "power-of-two"] format = ["lexical-util/format"] # Reduce code size at the cost of performance. compact = ["lexical-util/compact"] -# Ensure only safe indexing is used. -# This is not enabled by default for writers, due to the performance -# costs, and since input can be easily validated to avoid buffer overwrites. -safe = [] -# Add support for nightly-only features. -nightly = [] # Internal only features. # Enable the lint checks. diff --git a/lexical-write-integer/src/index.rs b/lexical-write-integer/src/index.rs index 32206cf5..14aceb43 100644 --- a/lexical-write-integer/src/index.rs +++ b/lexical-write-integer/src/index.rs @@ -8,26 +8,16 @@ #![cfg_attr(feature = "compact", allow(unused_macros, unused_macro_rules))] #![doc(hidden)] -#[cfg(not(feature = "safe"))] macro_rules! index_unchecked { ($x:ident[$i:expr]) => { *$x.get_unchecked($i) }; } -/// Index a buffer, with bounds checking. -#[cfg(feature = "safe")] -macro_rules! index_unchecked { - ($x:ident[$i:expr]) => { - $x[$i] - }; -} - /// Index a buffer and get a mutable reference, without bounds checking. // The `($x:ident[$i:expr] = $y:ident[$j:expr])` is not used with `compact`. // The newer version of the lint is `unused_macro_rules`, but this isn't // supported until nightly-2022-05-12. -#[cfg(not(feature = "safe"))] #[allow(unknown_lints, unused_macro_rules)] macro_rules! index_unchecked_mut { ($x:ident[$i:expr]) => { @@ -40,7 +30,6 @@ macro_rules! index_unchecked_mut { } /// Fill a slice with a value, without bounds checking. -#[cfg(not(feature = "safe"))] macro_rules! slice_fill_unchecked { ($slc:expr, $value:expr) => { // Get the length first to avoid stacked borrows, since we might @@ -49,27 +38,3 @@ macro_rules! slice_fill_unchecked { core::ptr::write_bytes($slc.as_mut_ptr(), $value, len) }; } - -/// Fill a slice with a value, with bounds checking. -#[cfg(feature = "safe")] -macro_rules! slice_fill_unchecked { - ($slc:expr, $value:expr) => { - $slc.fill($value) - }; -} - -/// Index a buffer and get a mutable reference, with bounds checking. -// The `($x:ident[$i:expr] = $y:ident[$j:expr])` is not used with `compact`. -// The newer version of the lint is `unused_macro_rules`, but this isn't -// supported until nightly-2022-05-12. -#[cfg(feature = "safe")] -#[allow(unknown_lints, unused_macro_rules)] -macro_rules! index_unchecked_mut { - ($x:ident[$i:expr]) => { - $x[$i] - }; - - ($x:ident[$i:expr] = $y:ident[$j:expr]) => { - $x[$i] = $y[$j] - }; -} diff --git a/lexical/Cargo.toml b/lexical/Cargo.toml index 9d4a892e..fb5a9efb 100644 --- a/lexical/Cargo.toml +++ b/lexical/Cargo.toml @@ -27,8 +27,6 @@ path = "../lexical-core" default = ["std", "write-integers", "write-floats", "parse-integers", "parse-floats"] # Use the standard library. std = ["lexical-core/std"] -# Add support for nightly-only features. -nightly = ["lexical-core/nightly"] # Add support for writing integers. write-integers = ["lexical-core/write-integers", "write", "integers"] # Add support for writing floats. From c6f69e378c7784fe59bba981cb6207337a45f3e6 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 16:47:13 -0500 Subject: [PATCH 12/14] Remove modules with unused features. --- README.md | 10 +++------- lexical-benchmark/parse-float/black_box.rs | 14 +------------- lexical-parse-float/src/lib.rs | 2 -- 3 files changed, 4 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 1a16079e..74c88396 100644 --- a/README.md +++ b/README.md @@ -329,16 +329,12 @@ lexical-core should also work on a wide variety of other architectures and ISAs. The currently supported versions are: - v1.0.x -- v0.8.x -- v0.7.x (Maintenance) -- v0.6.x (Maintenance) + +Due to security considerations, all other versions have been yanked. **Rustc Compatibility** -- v0.8.x supports 1.63+, including stable, beta, and nightly. -- v0.8.x supports 1.51+, including stable, beta, and nightly. -- v0.7.x supports 1.37+, including stable, beta, and nightly. -- v0.6.x supports Rustc 1.24+, including stable, beta, and nightly. +- v1.0.x supports 1.63+, including stable, beta, and nightly. Please report any errors compiling a supported lexical-core version on a compatible Rustc version. diff --git a/lexical-benchmark/parse-float/black_box.rs b/lexical-benchmark/parse-float/black_box.rs index 19cbcf83..3c283c1d 100644 --- a/lexical-benchmark/parse-float/black_box.rs +++ b/lexical-benchmark/parse-float/black_box.rs @@ -1,5 +1,4 @@ // Optimized black box using the nicer assembly syntax. -#[cfg(feature = "asm")] pub fn black_box(mut dummy: f64) -> f64 { // THe `asm!` macro was stabilized in 1.59.0. use core::arch::asm; @@ -11,15 +10,4 @@ pub fn black_box(mut dummy: f64) -> f64 { ); dummy } -} - -// Optimized black box using the nicer assembly syntax. -#[cfg(not(feature = "asm"))] -#[allow(forgetting_copy_types)] -pub fn black_box(dummy: f64) -> f64 { - unsafe { - let x = core::ptr::read_volatile(&dummy); - core::mem::forget(dummy); - x - } -} +} \ No newline at end of file diff --git a/lexical-parse-float/src/lib.rs b/lexical-parse-float/src/lib.rs index 028f666b..035bbddf 100644 --- a/lexical-parse-float/src/lib.rs +++ b/lexical-parse-float/src/lib.rs @@ -30,8 +30,6 @@ //! * `radix` - Add support for strings of any radix. //! * `format` - Add support for parsing custom integer formats. //! * `compact` - Reduce code size at the cost of performance. -//! * `safe` - Ensure only memory-safe indexing is used. -//! * `nightly` - Enable assembly instructions to control FPU rounding modes. //! //! # Note //! From 70cc5575c9774c46430139efcfd9416904f54a33 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 17:01:28 -0500 Subject: [PATCH 13/14] Do more housekeeping for documentation. --- lexical-util/src/skip.rs | 4 ++-- lexical-write-float/src/algorithm.rs | 24 ++++++++----------- lexical-write-float/src/lib.rs | 1 - lexical-write-float/src/shared.rs | 12 +++------- lexical-write-integer/src/compact.rs | 36 ++++++++++------------------ lexical-write-integer/src/decimal.rs | 6 ++--- lexical-write-integer/src/index.rs | 6 ----- scripts/unsafe.sh | 4 +++- 8 files changed, 34 insertions(+), 59 deletions(-) diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs index 3231c759..5b208702 100644 --- a/lexical-util/src/skip.rs +++ b/lexical-util/src/skip.rs @@ -789,12 +789,12 @@ macro_rules! skip_iterator_bytesiter_base { #[inline(always)] fn read_u32(&self) -> Option { - unsafe { self.byte.read_u32() } + self.byte.read_u32() } #[inline(always)] fn read_u64(&self) -> Option { - unsafe { self.byte.read_u64() } + self.byte.read_u64() } #[inline(always)] diff --git a/lexical-write-float/src/algorithm.rs b/lexical-write-float/src/algorithm.rs index d96e4a68..c8b4e666 100644 --- a/lexical-write-float/src/algorithm.rs +++ b/lexical-write-float/src/algorithm.rs @@ -165,23 +165,19 @@ pub fn write_float_negative_exponent( if carried && cursor == 2 { // Rounded-up, and carried to the first byte, so instead of having // 0.9999, we have 1.0. - unsafe { - bytes[0] = b'1'; - if options.trim_floats() { - cursor = 1; - trimmed = true; - } else { - bytes[1] = decimal_point; - bytes[2] = b'0'; - cursor = 3; - } + bytes[0] = b'1'; + if options.trim_floats() { + cursor = 1; + trimmed = true; + } else { + bytes[1] = decimal_point; + bytes[2] = b'0'; + cursor = 3; } } else if carried { // Carried, so we need to remove 1 zero before our digits. - unsafe { - bytes[1] = decimal_point; - bytes[cursor - 1] = bytes[cursor]; - } + bytes[1] = decimal_point; + bytes[cursor - 1] = bytes[cursor]; } else { bytes[1] = decimal_point; cursor += digit_count; diff --git a/lexical-write-float/src/lib.rs b/lexical-write-float/src/lib.rs index e1f71059..c90160aa 100644 --- a/lexical-write-float/src/lib.rs +++ b/lexical-write-float/src/lib.rs @@ -60,7 +60,6 @@ //! //! # Safety //! -//! TODO: Validate this //! The `unsafe` usage in the options does not actually have any actual safety //! concerns unless the format is not validated: the float formatters assume //! NaN and Infinite strings are <= 50 characters. Creating custom format diff --git a/lexical-write-float/src/shared.rs b/lexical-write-float/src/shared.rs index a91512de..0a6559ea 100644 --- a/lexical-write-float/src/shared.rs +++ b/lexical-write-float/src/shared.rs @@ -182,19 +182,13 @@ macro_rules! write_float { let require_exponent = format.required_exponent_notation() || outside_break; if !format.no_exponent_notation() && require_exponent { // Write digits in scientific notation. - // SAFETY: safe as long as bytes is large enough to hold all the digits. - // TODO: No longer need to be unsafe - unsafe { $write_scientific::<$($generic,)? FORMAT>($bytes, $($args,)*) } + $write_scientific::<$($generic,)? FORMAT>($bytes, $($args,)*) } else if $sci_exp < 0 { // Write negative exponent without scientific notation. - // SAFETY: safe as long as bytes is large enough to hold all the digits. - // TODO: No longer need to be unsafe - unsafe { $write_negative::<$($generic,)? FORMAT>($bytes, $($args,)*) } + $write_negative::<$($generic,)? FORMAT>($bytes, $($args,)*) } else { // Write positive exponent without scientific notation. - // SAFETY: safe as long as bytes is large enough to hold all the digits. - // TODO: No longer need to be unsafe - unsafe { $write_positive::<$($generic,)? FORMAT>($bytes, $($args,)*) } + $write_positive::<$($generic,)? FORMAT>($bytes, $($args,)*) } }}; } diff --git a/lexical-write-integer/src/compact.rs b/lexical-write-integer/src/compact.rs index 619452d6..f5c368f7 100644 --- a/lexical-write-integer/src/compact.rs +++ b/lexical-write-integer/src/compact.rs @@ -27,31 +27,21 @@ pub trait Compact: UnsignedInteger + FormattedSize { let mut digits: [u8; 128] = [0u8; 128]; let mut index = digits.len(); - // SAFETY: safe as long as buffer is large enough to hold the max value. - // We never read unwritten values, and we never assume the data is initialized. - // Need at least 128-bits, at least as many as the bits in the current type. - // Since we make our safety variants inside, this is always safe. - // - // The logic is this: each iteration we remove a digit from the end, decrement - // the index, and assign it to the buffer. Since the longest digits possible - // would be radix 2, log2(128) == 128, so at most 128 digits. - let slc = unsafe { - // Decode all but the last digit. - let radix = Self::from_u32(radix); - let mut value = self; - while value >= radix { - let r = value % radix; - value /= radix; - index -= 1; - index_unchecked_mut!(digits[index]) = digit_to_char(u32::as_cast(r)); - } - - // Decode last digit. + // Decode all but the last digit. + let radix = Self::from_u32(radix); + let mut value = self; + while value >= radix { let r = value % radix; + value /= radix; index -= 1; - index_unchecked_mut!(digits[index]) = digit_to_char(u32::as_cast(r)); - &index_unchecked_mut!(digits[index..]) - }; + digits[index] = digit_to_char(u32::as_cast(r)); + } + + // Decode last digit. + let r = value % radix; + index -= 1; + digits[index] = digit_to_char(u32::as_cast(r)); + let slc = &digits[index..]; copy_to_dst(buffer, slc) } } diff --git a/lexical-write-integer/src/decimal.rs b/lexical-write-integer/src/decimal.rs index 61605e65..4abc056b 100644 --- a/lexical-write-integer/src/decimal.rs +++ b/lexical-write-integer/src/decimal.rs @@ -102,7 +102,7 @@ pub fn fast_digit_count(x: u32) -> usize { // SAFETY: always safe, since fast_log2 will always return a value // <= 32. This is because the range of values from `ctlz(x | 1)` is // `[0, 31]`, so `32 - 1 - ctlz(x | 1)` must be in the range `[0, 31]`. - let shift = unsafe { index_unchecked!(TABLE[fast_log2(x)]) }; + let shift = TABLE[fast_log2(x)]; let count = (x as u64 + shift) >> 32; count as usize } @@ -259,7 +259,7 @@ macro_rules! decimal_impl { impl Decimal for $t { #[inline(always)] fn decimal(self, buffer: &mut [u8]) -> usize { - // SAFETY: safe as long as buffer is large enough to hold the max value. + // SAFETY: safe since we've guaranteed the buffer is large enough to hold the max value. let count = self.digit_count(); assert!(count <= buffer.len()); unsafe { @@ -276,7 +276,7 @@ decimal_impl! { u32 u64 } impl Decimal for u128 { #[inline(always)] fn decimal(self, buffer: &mut [u8]) -> usize { - // SAFETY: safe as long as buffer is large enough to hold the max value. + // SAFETY: safe since we've guaranteed the buffer is large enough to hold the max value. let count = self.digit_count(); assert!(count <= buffer.len()); unsafe { diff --git a/lexical-write-integer/src/index.rs b/lexical-write-integer/src/index.rs index 14aceb43..52bcd48d 100644 --- a/lexical-write-integer/src/index.rs +++ b/lexical-write-integer/src/index.rs @@ -8,12 +8,6 @@ #![cfg_attr(feature = "compact", allow(unused_macros, unused_macro_rules))] #![doc(hidden)] -macro_rules! index_unchecked { - ($x:ident[$i:expr]) => { - *$x.get_unchecked($i) - }; -} - /// Index a buffer and get a mutable reference, without bounds checking. // The `($x:ident[$i:expr] = $y:ident[$j:expr])` is not used with `compact`. // The newer version of the lint is `unused_macro_rules`, but this isn't diff --git a/scripts/unsafe.sh b/scripts/unsafe.sh index f2acf741..6b25a976 100755 --- a/scripts/unsafe.sh +++ b/scripts/unsafe.sh @@ -1,5 +1,7 @@ #!/bin/bash # Print diagnostics on the amount of unsafe code used. +# NOTE: This is currently unused since cargo-count is +# unmaintained and requires an old version of clap. set -e @@ -10,7 +12,7 @@ count() { } # Change to our project home. -script_dir=`dirname "${BASH_SOURCE[0]}"` +script_dir=$(dirname "${BASH_SOURCE[0]}") cd "$script_dir"/ count "lexical-util" count "lexical-parse-integer" From 7df1e74b56ecc516b75637c1660422e301c956aa Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 14 Sep 2024 17:41:09 -0500 Subject: [PATCH 14/14] Make clippy happy (they deserve it). --- lexical-benchmark/parse-float/black_box.rs | 2 +- lexical-write-integer/src/decimal.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lexical-benchmark/parse-float/black_box.rs b/lexical-benchmark/parse-float/black_box.rs index 3c283c1d..92641a5d 100644 --- a/lexical-benchmark/parse-float/black_box.rs +++ b/lexical-benchmark/parse-float/black_box.rs @@ -10,4 +10,4 @@ pub fn black_box(mut dummy: f64) -> f64 { ); dummy } -} \ No newline at end of file +} diff --git a/lexical-write-integer/src/decimal.rs b/lexical-write-integer/src/decimal.rs index 4abc056b..835e58ac 100644 --- a/lexical-write-integer/src/decimal.rs +++ b/lexical-write-integer/src/decimal.rs @@ -276,7 +276,8 @@ decimal_impl! { u32 u64 } impl Decimal for u128 { #[inline(always)] fn decimal(self, buffer: &mut [u8]) -> usize { - // SAFETY: safe since we've guaranteed the buffer is large enough to hold the max value. + // SAFETY: safe since we've guaranteed the buffer is large enough to hold the + // max value. let count = self.digit_count(); assert!(count <= buffer.len()); unsafe {