From f8310b99805d37beace45904c230e4857ea9fcae Mon Sep 17 00:00:00 2001 From: Augustin Villetard Date: Fri, 27 Sep 2024 06:45:17 +0900 Subject: [PATCH] dev: optimized bitshifts by using a lookup table for powers of two --- crates/utils/src/constants.cairo | 261 +++++++++++++++++++++++++++++++ crates/utils/src/math.cairo | 31 +++- 2 files changed, 290 insertions(+), 2 deletions(-) diff --git a/crates/utils/src/constants.cairo b/crates/utils/src/constants.cairo index 78ce66bb3..0b4b1c676 100644 --- a/crates/utils/src/constants.cairo +++ b/crates/utils/src/constants.cairo @@ -210,6 +210,267 @@ pub const POW_2: [ 0x80000000000000000000000000000000 ]; +pub const POW_2_256: [ + u256 + ; 256] = [ + 0x1, + 0x2, + 0x4, + 0x8, + 0x10, + 0x20, + 0x40, + 0x80, + 0x100, + 0x200, + 0x400, + 0x800, + 0x1000, + 0x2000, + 0x4000, + 0x8000, + 0x10000, + 0x20000, + 0x40000, + 0x80000, + 0x100000, + 0x200000, + 0x400000, + 0x800000, + 0x1000000, + 0x2000000, + 0x4000000, + 0x8000000, + 0x10000000, + 0x20000000, + 0x40000000, + 0x80000000, + 0x100000000, + 0x200000000, + 0x400000000, + 0x800000000, + 0x1000000000, + 0x2000000000, + 0x4000000000, + 0x8000000000, + 0x10000000000, + 0x20000000000, + 0x40000000000, + 0x80000000000, + 0x100000000000, + 0x200000000000, + 0x400000000000, + 0x800000000000, + 0x1000000000000, + 0x2000000000000, + 0x4000000000000, + 0x8000000000000, + 0x10000000000000, + 0x20000000000000, + 0x40000000000000, + 0x80000000000000, + 0x100000000000000, + 0x200000000000000, + 0x400000000000000, + 0x800000000000000, + 0x1000000000000000, + 0x2000000000000000, + 0x4000000000000000, + 0x8000000000000000, + 0x10000000000000000, + 0x20000000000000000, + 0x40000000000000000, + 0x80000000000000000, + 0x100000000000000000, + 0x200000000000000000, + 0x400000000000000000, + 0x800000000000000000, + 0x1000000000000000000, + 0x2000000000000000000, + 0x4000000000000000000, + 0x8000000000000000000, + 0x10000000000000000000, + 0x20000000000000000000, + 0x40000000000000000000, + 0x80000000000000000000, + 0x100000000000000000000, + 0x200000000000000000000, + 0x400000000000000000000, + 0x800000000000000000000, + 0x1000000000000000000000, + 0x2000000000000000000000, + 0x4000000000000000000000, + 0x8000000000000000000000, + 0x10000000000000000000000, + 0x20000000000000000000000, + 0x40000000000000000000000, + 0x80000000000000000000000, + 0x100000000000000000000000, + 0x200000000000000000000000, + 0x400000000000000000000000, + 0x800000000000000000000000, + 0x1000000000000000000000000, + 0x2000000000000000000000000, + 0x4000000000000000000000000, + 0x8000000000000000000000000, + 0x10000000000000000000000000, + 0x20000000000000000000000000, + 0x40000000000000000000000000, + 0x80000000000000000000000000, + 0x100000000000000000000000000, + 0x200000000000000000000000000, + 0x400000000000000000000000000, + 0x800000000000000000000000000, + 0x1000000000000000000000000000, + 0x2000000000000000000000000000, + 0x4000000000000000000000000000, + 0x8000000000000000000000000000, + 0x10000000000000000000000000000, + 0x20000000000000000000000000000, + 0x40000000000000000000000000000, + 0x80000000000000000000000000000, + 0x100000000000000000000000000000, + 0x200000000000000000000000000000, + 0x400000000000000000000000000000, + 0x800000000000000000000000000000, + 0x1000000000000000000000000000000, + 0x2000000000000000000000000000000, + 0x4000000000000000000000000000000, + 0x8000000000000000000000000000000, + 0x10000000000000000000000000000000, + 0x20000000000000000000000000000000, + 0x40000000000000000000000000000000, + 0x80000000000000000000000000000000, + 0x100000000000000000000000000000000, + 0x200000000000000000000000000000000, + 0x400000000000000000000000000000000, + 0x800000000000000000000000000000000, + 0x1000000000000000000000000000000000, + 0x2000000000000000000000000000000000, + 0x4000000000000000000000000000000000, + 0x8000000000000000000000000000000000, + 0x10000000000000000000000000000000000, + 0x20000000000000000000000000000000000, + 0x40000000000000000000000000000000000, + 0x80000000000000000000000000000000000, + 0x100000000000000000000000000000000000, + 0x200000000000000000000000000000000000, + 0x400000000000000000000000000000000000, + 0x800000000000000000000000000000000000, + 0x1000000000000000000000000000000000000, + 0x2000000000000000000000000000000000000, + 0x4000000000000000000000000000000000000, + 0x8000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000, + 0x20000000000000000000000000000000000000, + 0x40000000000000000000000000000000000000, + 0x80000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000, + 0x200000000000000000000000000000000000000, + 0x400000000000000000000000000000000000000, + 0x800000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000, + 0x2000000000000000000000000000000000000000, + 0x4000000000000000000000000000000000000000, + 0x8000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000, + 0x20000000000000000000000000000000000000000, + 0x40000000000000000000000000000000000000000, + 0x80000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000, + 0x200000000000000000000000000000000000000000, + 0x400000000000000000000000000000000000000000, + 0x800000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000, + 0x2000000000000000000000000000000000000000000, + 0x4000000000000000000000000000000000000000000, + 0x8000000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000000, + 0x20000000000000000000000000000000000000000000, + 0x40000000000000000000000000000000000000000000, + 0x80000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000, + 0x200000000000000000000000000000000000000000000, + 0x400000000000000000000000000000000000000000000, + 0x800000000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000000, + 0x2000000000000000000000000000000000000000000000, + 0x4000000000000000000000000000000000000000000000, + 0x8000000000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000000000, + 0x20000000000000000000000000000000000000000000000, + 0x40000000000000000000000000000000000000000000000, + 0x80000000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000000, + 0x200000000000000000000000000000000000000000000000, + 0x400000000000000000000000000000000000000000000000, + 0x800000000000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000000000, + 0x2000000000000000000000000000000000000000000000000, + 0x4000000000000000000000000000000000000000000000000, + 0x8000000000000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000000000000, + 0x20000000000000000000000000000000000000000000000000, + 0x40000000000000000000000000000000000000000000000000, + 0x80000000000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000000000, + 0x200000000000000000000000000000000000000000000000000, + 0x400000000000000000000000000000000000000000000000000, + 0x800000000000000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000000000000, + 0x2000000000000000000000000000000000000000000000000000, + 0x4000000000000000000000000000000000000000000000000000, + 0x8000000000000000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000000000000000, + 0x20000000000000000000000000000000000000000000000000000, + 0x40000000000000000000000000000000000000000000000000000, + 0x80000000000000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000000000000, + 0x200000000000000000000000000000000000000000000000000000, + 0x400000000000000000000000000000000000000000000000000000, + 0x800000000000000000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000000000000000, + 0x2000000000000000000000000000000000000000000000000000000, + 0x4000000000000000000000000000000000000000000000000000000, + 0x8000000000000000000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000000000000000000, + 0x20000000000000000000000000000000000000000000000000000000, + 0x40000000000000000000000000000000000000000000000000000000, + 0x80000000000000000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000000000000000, + 0x200000000000000000000000000000000000000000000000000000000, + 0x400000000000000000000000000000000000000000000000000000000, + 0x800000000000000000000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000000000000000000, + 0x2000000000000000000000000000000000000000000000000000000000, + 0x4000000000000000000000000000000000000000000000000000000000, + 0x8000000000000000000000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000000000000000000000, + 0x20000000000000000000000000000000000000000000000000000000000, + 0x40000000000000000000000000000000000000000000000000000000000, + 0x80000000000000000000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000000000000000000, + 0x200000000000000000000000000000000000000000000000000000000000, + 0x400000000000000000000000000000000000000000000000000000000000, + 0x800000000000000000000000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000000000000000000000, + 0x2000000000000000000000000000000000000000000000000000000000000, + 0x4000000000000000000000000000000000000000000000000000000000000, + 0x8000000000000000000000000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000000000000000000000000, + 0x20000000000000000000000000000000000000000000000000000000000000, + 0x40000000000000000000000000000000000000000000000000000000000000, + 0x80000000000000000000000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000000000000000000000, + 0x200000000000000000000000000000000000000000000000000000000000000, + 0x400000000000000000000000000000000000000000000000000000000000000, + 0x800000000000000000000000000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000000000000000000000000, + 0x2000000000000000000000000000000000000000000000000000000000000000, + 0x4000000000000000000000000000000000000000000000000000000000000000, + 0x8000000000000000000000000000000000000000000000000000000000000000, +]; + pub const POW_2_0: u128 = 0x1; pub const POW_2_8: u128 = 0x100; pub const POW_2_16: u128 = 0x10000; diff --git a/crates/utils/src/math.cairo b/crates/utils/src/math.cairo index 6e77fdbc9..fbfa1ecf1 100644 --- a/crates/utils/src/math.cairo +++ b/crates/utils/src/math.cairo @@ -1,7 +1,8 @@ use core::integer::{u512}; -use core::num::traits::{Zero, One, BitSize, OverflowingAdd, OverflowingMul}; +use core::num::traits::{Zero, One, BitSize, OverflowingAdd, OverflowingMul, Bounded}; use core::panic_with_felt252; use core::traits::{BitAnd}; +use utils::constants::POW_2_256; // === Exponentiation === @@ -236,6 +237,8 @@ impl BitshiftImpl< +PartialOrd, +BitSize, +TryInto, + +TryInto, + +TryInto, > of Bitshift { fn shl(self: T, shift: T) -> T { // if we shift by more than nb_bits of T, the result is 0 @@ -243,6 +246,12 @@ impl BitshiftImpl< if shift > BitSize::::bits().try_into().unwrap() - One::one() { panic_with_felt252('mul Overflow'); } + // if the shift is within the bit size of u256 (<= 255 bits), + // use the POW_2 lookup table to get 2^shift for efficient multiplication + if shift.try_into().unwrap() <= BitSize::::bits() - One::::one() { + return self * (*POW_2_256.span().at(shift.try_into().unwrap())).try_into().unwrap(); + } + // for shifts greater than 255 bits, perform the shift manually let two = One::one() + One::one(); self * two.pow(shift) } @@ -252,6 +261,10 @@ impl BitshiftImpl< if shift > BitSize::::bits().try_into().unwrap() - One::one() { panic_with_felt252('mul Overflow'); } + // use the POW_2 lookup table when the bit size + if shift.try_into().unwrap() <= BitSize::::bits() - One::::one() { + return self / (*POW_2_256.span().at(shift.try_into().unwrap())).try_into().unwrap(); + } let two = One::one() + One::one(); self / two.pow(shift) } @@ -300,17 +313,31 @@ pub impl WrappingBitshiftImpl< +OverflowingMul, +WrappingExponentiation, +BitSize, + +Bounded, +TryInto, + +TryInto, + +TryInto, + +Into > of WrappingBitshift { fn wrapping_shl(self: T, shift: T) -> T { + if shift.try_into().unwrap() <= BitSize::::bits() - One::::one() { + let pow_2: u256 = (*POW_2_256.span().at(shift.try_into().unwrap())); + let pow2_mod_t: u256 = pow_2 % Bounded::::MAX.into(); + let (result, _) = self.overflowing_mul(pow2_mod_t.try_into().unwrap()); + return result; + } let two = One::::one() + One::::one(); let (result, _) = self.overflowing_mul(two.wrapping_pow(shift)); result } fn wrapping_shr(self: T, shift: T) -> T { + if shift.try_into().unwrap() <= BitSize::::bits() - One::::one() { + let pow_2: u256 = (*POW_2_256.span().at(shift.try_into().unwrap())); + let pow2_mod_t: u256 = pow_2 % Bounded::::MAX.into(); + return self / pow2_mod_t.try_into().unwrap(); + } let two = One::::one() + One::::one(); - if shift > BitSize::::bits().try_into().unwrap() - One::one() { return Zero::zero(); }