From 5ac4b3ec1c99c65572709758e4fb3d77a0b32c16 Mon Sep 17 00:00:00 2001
From: Tony Arcieri <bascule@gmail.com>
Date: Sat, 30 Nov 2024 12:54:52 -0700
Subject: [PATCH] Leverage `const_mut_refs`; MSRV 1.83 (#667)

Replaces macro-based code sharing between the stack-allocated and
heap-allocated `*Uint` types with `const fn` using `const_mut_refs`.
---
 .github/workflows/crypto-bigint.yml |  13 +-
 Cargo.toml                          |   2 +-
 README.md                           |   4 +-
 src/modular/reduction.rs            |  83 ++++++------
 src/uint/mul.rs                     | 191 ++++++++++++++--------------
 5 files changed, 145 insertions(+), 148 deletions(-)

diff --git a/.github/workflows/crypto-bigint.yml b/.github/workflows/crypto-bigint.yml
index b319809f..f4edaa27 100644
--- a/.github/workflows/crypto-bigint.yml
+++ b/.github/workflows/crypto-bigint.yml
@@ -20,7 +20,7 @@ jobs:
     strategy:
       matrix:
         rust:
-          - 1.81.0 # MSRV
+          - 1.83.0 # MSRV
           - stable
         target:
           - thumbv7em-none-eabi
@@ -48,7 +48,7 @@ jobs:
         include:
           # 32-bit Linux
           - target: i686-unknown-linux-gnu
-            rust: 1.81.0 # MSRV
+            rust: 1.83.0 # MSRV
             deps: sudo apt update && sudo apt install gcc-multilib
           - target: i686-unknown-linux-gnu
             rust: stable
@@ -56,7 +56,7 @@ jobs:
 
           # 64-bit Linux
           - target: x86_64-unknown-linux-gnu
-            rust: 1.81.0 # MSRV
+            rust: 1.83.0 # MSRV
           - target: x86_64-unknown-linux-gnu
             rust: stable
     steps:
@@ -105,6 +105,9 @@ jobs:
         with:
           toolchain: nightly
       - run: cargo update -Z minimal-versions
+      - uses: dtolnay/rust-toolchain@master
+        with:
+          toolchain: stable
       - run: cargo +stable build --release --all-features
 
   miri:
@@ -136,7 +139,7 @@ jobs:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@master
         with:
-          toolchain: 1.81.0
+          toolchain: stable
           components: clippy
       - run: cargo clippy --all --all-features -- -D warnings
 
@@ -156,7 +159,7 @@ jobs:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@master
         with:
-          toolchain: 1.81.0
+          toolchain: stable
       - run: cargo build --benches
       - run: cargo build --all-features --benches
 
diff --git a/Cargo.toml b/Cargo.toml
index 2b3cdd40..9a0f6a4d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@ keywords = ["arbitrary", "crypto", "bignum", "integer", "precision"]
 readme = "README.md"
 resolver = "2"
 edition = "2021"
-rust-version = "1.81"
+rust-version = "1.83"
 
 [dependencies]
 subtle = { version = "2.6", default-features = false }
diff --git a/README.md b/README.md
index 1da82ab7..321c49b1 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ microcontrollers).
 
 ## Minimum Supported Rust Version
 
-This crate requires **Rust 1.81** at a minimum.
+This crate requires **Rust 1.83** at a minimum.
 
 We may change the MSRV in the future, but it will be accompanied by a minor
 version bump.
@@ -68,7 +68,7 @@ dual licensed as above, without any additional terms or conditions.
 [build-image]: https://github.com/RustCrypto/crypto-bigint/actions/workflows/crypto-bigint.yml/badge.svg
 [build-link]: https://github.com/RustCrypto/crypto-bigint/actions/workflows/crypto-bigint.yml
 [license-image]: https://img.shields.io/badge/license-Apache2.0/MIT-blue.svg
-[rustc-image]: https://img.shields.io/badge/rustc-1.81+-blue.svg
+[rustc-image]: https://img.shields.io/badge/rustc-1.83+-blue.svg
 [chat-image]: https://img.shields.io/badge/zulip-join_chat-blue.svg
 [chat-link]: https://rustcrypto.zulipchat.com/#narrow/stream/300602-crypto-bigint
 
diff --git a/src/modular/reduction.rs b/src/modular/reduction.rs
index b53bc8a0..688389fe 100644
--- a/src/modular/reduction.rs
+++ b/src/modular/reduction.rs
@@ -5,43 +5,46 @@ use crate::{Limb, Odd, Uint};
 #[cfg(feature = "alloc")]
 use {crate::BoxedUint, subtle::Choice};
 
-/// Implement the Montgomery reduction algorithm.
-///
-/// This is implemented as a macro to abstract over `const fn` and boxed use cases, since the latter
-/// needs mutable references and thus the unstable `const_mut_refs` feature (rust-lang/rust#57349).
-// TODO(tarcieri): change this into a `const fn` when `const_mut_refs` is stable
-macro_rules! impl_montgomery_reduction {
-    ($upper:expr, $lower:expr, $modulus:expr, $mod_neg_inv:expr, $limbs:expr) => {{
-        let mut meta_carry = Limb::ZERO;
-        let mut new_sum;
-
-        let mut i = 0;
-        while i < $limbs {
-            let u = $lower[i].wrapping_mul($mod_neg_inv);
-
-            let (_, mut carry) = $lower[i].mac(u, $modulus[0], Limb::ZERO);
-            let mut new_limb;
-
-            let mut j = 1;
-            while j < ($limbs - i) {
-                (new_limb, carry) = $lower[i + j].mac(u, $modulus[j], carry);
-                $lower[i + j] = new_limb;
-                j += 1;
-            }
-            while j < $limbs {
-                (new_limb, carry) = $upper[i + j - $limbs].mac(u, $modulus[j], carry);
-                $upper[i + j - $limbs] = new_limb;
-                j += 1;
-            }
-
-            (new_sum, meta_carry) = $upper[i].adc(carry, meta_carry);
-            $upper[i] = new_sum;
-
-            i += 1;
+/// Algorithm 14.32 in Handbook of Applied Cryptography <https://cacr.uwaterloo.ca/hac/about/chap14.pdf>
+const fn montgomery_reduction_inner(
+    upper: &mut [Limb],
+    lower: &mut [Limb],
+    modulus: &[Limb],
+    mod_neg_inv: Limb,
+) -> Limb {
+    let nlimbs = modulus.len();
+    debug_assert!(nlimbs == upper.len());
+    debug_assert!(nlimbs == lower.len());
+
+    let mut meta_carry = Limb::ZERO;
+    let mut new_sum;
+
+    let mut i = 0;
+    while i < nlimbs {
+        let u = lower[i].wrapping_mul(mod_neg_inv);
+
+        let (_, mut carry) = lower[i].mac(u, modulus[0], Limb::ZERO);
+        let mut new_limb;
+
+        let mut j = 1;
+        while j < (nlimbs - i) {
+            (new_limb, carry) = lower[i + j].mac(u, modulus[j], carry);
+            lower[i + j] = new_limb;
+            j += 1;
         }
+        while j < nlimbs {
+            (new_limb, carry) = upper[i + j - nlimbs].mac(u, modulus[j], carry);
+            upper[i + j - nlimbs] = new_limb;
+            j += 1;
+        }
+
+        (new_sum, meta_carry) = upper[i].adc(carry, meta_carry);
+        upper[i] = new_sum;
+
+        i += 1;
+    }
 
-        meta_carry
-    }};
+    meta_carry
 }
 
 /// Algorithm 14.32 in Handbook of Applied Cryptography <https://cacr.uwaterloo.ca/hac/about/chap14.pdf>
@@ -51,12 +54,11 @@ pub const fn montgomery_reduction<const LIMBS: usize>(
     mod_neg_inv: Limb,
 ) -> Uint<LIMBS> {
     let (mut lower, mut upper) = *lower_upper;
-    let meta_carry = impl_montgomery_reduction!(
-        upper.limbs,
-        lower.limbs,
+    let meta_carry = montgomery_reduction_inner(
+        &mut upper.limbs,
+        &mut lower.limbs,
         &modulus.0.limbs,
         mod_neg_inv,
-        LIMBS
     );
 
     // Division is simply taking the upper half of the limbs
@@ -79,8 +81,7 @@ pub(crate) fn montgomery_reduction_boxed_mut(
     debug_assert_eq!(out.nlimbs(), modulus.nlimbs());
 
     let (lower, upper) = x.limbs.split_at_mut(modulus.nlimbs());
-    let meta_carry =
-        impl_montgomery_reduction!(upper, lower, &modulus.limbs, mod_neg_inv, modulus.nlimbs());
+    let meta_carry = montgomery_reduction_inner(upper, lower, &modulus.limbs, mod_neg_inv);
 
     out.limbs.copy_from_slice(upper);
     let borrow = out.sbb_assign(modulus, Limb::ZERO);
diff --git a/src/uint/mul.rs b/src/uint/mul.rs
index 4a122e96..3a2c3110 100644
--- a/src/uint/mul.rs
+++ b/src/uint/mul.rs
@@ -9,125 +9,118 @@ use subtle::CtOption;
 
 pub(crate) mod karatsuba;
 
-/// Implement the core schoolbook multiplication algorithm.
+/// Schoolbook multiplication a.k.a. long multiplication, i.e. the traditional method taught in
+/// schools.
 ///
-/// This is implemented as a macro to abstract over `const fn` and boxed use cases, since the latter
-/// needs mutable references and thus the unstable `const_mut_refs` feature (rust-lang/rust#57349).
-///
-/// It allows us to have a single place (this module) to improve the multiplication implementation
-/// which will also be reused for `BoxedUint`.
-// TODO(tarcieri): change this into a `const fn` when `const_mut_refs` is stable
-macro_rules! impl_schoolbook_multiplication {
-    ($lhs:expr, $rhs:expr, $lo:expr, $hi:expr) => {{
-        if $lhs.len() != $lo.len() || $rhs.len() != $hi.len() {
-            panic!("schoolbook multiplication length mismatch");
-        }
-
-        let mut i = 0;
-        while i < $lhs.len() {
-            let mut j = 0;
-            let mut carry = Limb::ZERO;
-            let xi = $lhs[i];
-
-            while j < $rhs.len() {
-                let k = i + j;
+/// The most efficient method for small numbers.
+const fn schoolbook_multiplication(lhs: &[Limb], rhs: &[Limb], lo: &mut [Limb], hi: &mut [Limb]) {
+    if lhs.len() != lo.len() || rhs.len() != hi.len() {
+        panic!("schoolbook multiplication length mismatch");
+    }
 
-                if k >= $lhs.len() {
-                    ($hi[k - $lhs.len()], carry) = $hi[k - $lhs.len()].mac(xi, $rhs[j], carry);
-                } else {
-                    ($lo[k], carry) = $lo[k].mac(xi, $rhs[j], carry);
-                }
+    let mut i = 0;
+    while i < lhs.len() {
+        let mut j = 0;
+        let mut carry = Limb::ZERO;
+        let xi = lhs[i];
 
-                j += 1;
-            }
+        while j < rhs.len() {
+            let k = i + j;
 
-            if i + j >= $lhs.len() {
-                $hi[i + j - $lhs.len()] = carry;
+            if k >= lhs.len() {
+                (hi[k - lhs.len()], carry) = hi[k - lhs.len()].mac(xi, rhs[j], carry);
             } else {
-                $lo[i + j] = carry;
+                (lo[k], carry) = lo[k].mac(xi, rhs[j], carry);
             }
-            i += 1;
+
+            j += 1;
         }
-    }};
+
+        if i + j >= lhs.len() {
+            hi[i + j - lhs.len()] = carry;
+        } else {
+            lo[i + j] = carry;
+        }
+        i += 1;
+    }
 }
 
-/// Implement the schoolbook method for squaring.
+/// Schoolbook method of squaring.
 ///
 /// Like schoolbook multiplication, but only considering half of the multiplication grid.
-// TODO: change this into a `const fn` when `const_mut_refs` is stable.
-macro_rules! impl_schoolbook_squaring {
-    ($limbs:expr, $lo:expr, $hi:expr) => {{
-        // Translated from https://github.com/ucbrise/jedi-pairing/blob/c4bf151/include/core/bigint.hpp#L410
-        //
-        // Permission to relicense the resulting translation as Apache 2.0 + MIT was given
-        // by the original author Sam Kumar: https://github.com/RustCrypto/crypto-bigint/pull/133#discussion_r1056870411
-
-        if $limbs.len() != $lo.len() || $lo.len() != $hi.len() {
-            panic!("schoolbook squaring length mismatch");
-        }
-
-        let mut i = 1;
-        while i < $limbs.len() {
-            let mut j = 0;
-            let mut carry = Limb::ZERO;
-            let xi = $limbs[i];
+pub(crate) const fn schoolbook_squaring(limbs: &[Limb], lo: &mut [Limb], hi: &mut [Limb]) {
+    // Translated from https://github.com/ucbrise/jedi-pairing/blob/c4bf151/include/core/bigint.hpp#L410
+    //
+    // Permission to relicense the resulting translation as Apache 2.0 + MIT was given
+    // by the original author Sam Kumar: https://github.com/RustCrypto/crypto-bigint/pull/133#discussion_r1056870411
+
+    if limbs.len() != lo.len() || lo.len() != hi.len() {
+        panic!("schoolbook squaring length mismatch");
+    }
 
-            while j < i {
-                let k = i + j;
+    let mut i = 1;
+    while i < limbs.len() {
+        let mut j = 0;
+        let mut carry = Limb::ZERO;
+        let xi = limbs[i];
 
-                if k >= $limbs.len() {
-                    ($hi[k - $limbs.len()], carry) = $hi[k - $limbs.len()].mac(xi, $limbs[j], carry);
-                } else {
-                    ($lo[k], carry) = $lo[k].mac(xi, $limbs[j], carry);
-                }
+        while j < i {
+            let k = i + j;
 
-                j += 1;
-            }
-
-            if (2 * i) < $limbs.len() {
-                $lo[2 * i] = carry;
+            if k >= limbs.len() {
+                (hi[k - limbs.len()], carry) = hi[k - limbs.len()].mac(xi, limbs[j], carry);
             } else {
-                $hi[2 * i - $limbs.len()] = carry;
+                (lo[k], carry) = lo[k].mac(xi, limbs[j], carry);
             }
 
-            i += 1;
+            j += 1;
         }
 
-        // Double the current result, this accounts for the other half of the multiplication grid.
-        // The top word is empty, so we use a special purpose shl.
-        let mut carry = Limb::ZERO;
-        let mut i = 0;
-        while i < $limbs.len() {
-            ($lo[i].0, carry) = ($lo[i].0 << 1 | carry.0, $lo[i].shr(Limb::BITS - 1));
-            i += 1;
+        if (2 * i) < limbs.len() {
+            lo[2 * i] = carry;
+        } else {
+            hi[2 * i - limbs.len()] = carry;
         }
-        i = 0;
-        while i < $limbs.len() - 1 {
-            ($hi[i].0, carry) = ($hi[i].0 << 1 | carry.0, $hi[i].shr(Limb::BITS - 1));
-            i += 1;
-        }
-        $hi[$limbs.len() - 1] = carry;
 
-        // Handle the diagonal of the multiplication grid, which finishes the multiplication grid.
-        let mut carry = Limb::ZERO;
-        let mut i = 0;
-        while i < $limbs.len() {
-            let xi = $limbs[i];
-            if (i * 2) < $limbs.len() {
-                ($lo[i * 2], carry) = $lo[i * 2].mac(xi, xi, carry);
-            } else {
-                ($hi[i * 2 - $limbs.len()], carry) = $hi[i * 2 - $limbs.len()].mac(xi, xi, carry);
-            }
+        i += 1;
+    }
 
-            if (i * 2 + 1) < $limbs.len() {
-                ($lo[i * 2 + 1], carry) = $lo[i * 2 + 1].overflowing_add(carry);
-            } else {
-                ($hi[i * 2 + 1 - $limbs.len()], carry) = $hi[i * 2 + 1 - $limbs.len()].overflowing_add(carry);
-            }
+    // Double the current result, this accounts for the other half of the multiplication grid.
+    // The top word is empty, so we use a special purpose shl.
+    let mut carry = Limb::ZERO;
+    let mut i = 0;
+    while i < limbs.len() {
+        (lo[i].0, carry) = (lo[i].0 << 1 | carry.0, lo[i].shr(Limb::BITS - 1));
+        i += 1;
+    }
 
-            i += 1;
+    let mut i = 0;
+    while i < limbs.len() - 1 {
+        (hi[i].0, carry) = (hi[i].0 << 1 | carry.0, hi[i].shr(Limb::BITS - 1));
+        i += 1;
+    }
+    hi[limbs.len() - 1] = carry;
+
+    // Handle the diagonal of the multiplication grid, which finishes the multiplication grid.
+    let mut carry = Limb::ZERO;
+    let mut i = 0;
+    while i < limbs.len() {
+        let xi = limbs[i];
+        if (i * 2) < limbs.len() {
+            (lo[i * 2], carry) = lo[i * 2].mac(xi, xi, carry);
+        } else {
+            (hi[i * 2 - limbs.len()], carry) = hi[i * 2 - limbs.len()].mac(xi, xi, carry);
         }
-    }};
+
+        if (i * 2 + 1) < limbs.len() {
+            (lo[i * 2 + 1], carry) = lo[i * 2 + 1].overflowing_add(carry);
+        } else {
+            (hi[i * 2 + 1 - limbs.len()], carry) =
+                hi[i * 2 + 1 - limbs.len()].overflowing_add(carry);
+        }
+
+        i += 1;
+    }
 }
 
 impl<const LIMBS: usize> Uint<LIMBS> {
@@ -316,7 +309,7 @@ pub(crate) const fn uint_mul_limbs<const LIMBS: usize, const RHS_LIMBS: usize>(
     debug_assert!(lhs.len() == LIMBS && rhs.len() == RHS_LIMBS);
     let mut lo: Uint<LIMBS> = Uint::<LIMBS>::ZERO;
     let mut hi = Uint::<RHS_LIMBS>::ZERO;
-    impl_schoolbook_multiplication!(lhs, rhs, lo.limbs, hi.limbs);
+    schoolbook_multiplication(lhs, rhs, &mut lo.limbs, &mut hi.limbs);
     (lo, hi)
 }
 
@@ -327,7 +320,7 @@ pub(crate) const fn uint_square_limbs<const LIMBS: usize>(
 ) -> (Uint<LIMBS>, Uint<LIMBS>) {
     let mut lo = Uint::<LIMBS>::ZERO;
     let mut hi = Uint::<LIMBS>::ZERO;
-    impl_schoolbook_squaring!(limbs, lo.limbs, hi.limbs);
+    schoolbook_squaring(limbs, &mut lo.limbs, &mut hi.limbs);
     (lo, hi)
 }
 
@@ -336,7 +329,7 @@ pub(crate) const fn uint_square_limbs<const LIMBS: usize>(
 pub(crate) fn mul_limbs(lhs: &[Limb], rhs: &[Limb], out: &mut [Limb]) {
     debug_assert_eq!(lhs.len() + rhs.len(), out.len());
     let (lo, hi) = out.split_at_mut(lhs.len());
-    impl_schoolbook_multiplication!(lhs, rhs, lo, hi);
+    schoolbook_multiplication(lhs, rhs, lo, hi);
 }
 
 /// Wrapper function used by `BoxedUint`
@@ -344,7 +337,7 @@ pub(crate) fn mul_limbs(lhs: &[Limb], rhs: &[Limb], out: &mut [Limb]) {
 pub(crate) fn square_limbs(limbs: &[Limb], out: &mut [Limb]) {
     debug_assert_eq!(limbs.len() * 2, out.len());
     let (lo, hi) = out.split_at_mut(limbs.len());
-    impl_schoolbook_squaring!(limbs, lo, hi);
+    schoolbook_squaring(limbs, lo, hi);
 }
 
 #[cfg(test)]