Skip to content

Commit

Permalink
arithmetic: Avoid heap & simplify alignment logic in `elem_exp_constt…
Browse files Browse the repository at this point in the history
…ime`.

Avoid allocating on the heap. Let the compiler do the alignment
instead of manually aligning the start of the table.
  • Loading branch information
briansmith committed Jan 27, 2025
1 parent c5475ed commit 8615a46
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 16 deletions.
1 change: 1 addition & 0 deletions src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ mod constant;
pub mod bigint;

pub(crate) mod inout;
mod limb512aligned;
pub mod montgomery;

mod n0;
Expand Down
52 changes: 36 additions & 16 deletions src/arithmetic/bigint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,13 @@ pub(crate) use self::{
modulusvalue::OwnedModulusValue,
private_exponent::PrivateExponent,
};
use super::{inout::AliasingSlices3, montgomery::*, LimbSliceError, MAX_LIMBS};
use super::{inout::AliasingSlices3, limb512aligned, montgomery::*, LimbSliceError, MAX_LIMBS};
use crate::{
bits::BitLength,
c,
error::{self, LenMismatchError},
limb::{self, Limb, LIMB_BITS},
};
use alloc::vec;
use core::{
marker::PhantomData,
num::{NonZeroU64, NonZeroUsize},
Expand Down Expand Up @@ -410,6 +409,11 @@ pub(crate) fn elem_exp_vartime<M>(
acc
}

// 4096 is the maximum size we support for elem_exp_consttime.
const ELEM_EXP_CONSTTIME_MAX_MODULUS_LIMBS: usize = 4096 / LIMB_BITS;
const _LIMBS_PER_CHUNK_DIVIDES_ELEM_EXP_CONSTTIME_MAX_MODULUS_LIMBS: () =
assert!(ELEM_EXP_CONSTTIME_MAX_MODULUS_LIMBS % limb512aligned::LIMBS_PER_CHUNK == 0);

#[cfg(not(target_arch = "x86_64"))]
pub fn elem_exp_consttime<M>(
base: Elem<M, R>,
Expand All @@ -422,8 +426,18 @@ pub fn elem_exp_consttime<M>(
const TABLE_ENTRIES: usize = 1 << WINDOW_BITS;

let num_limbs = m.limbs().len();
if num_limbs % limb512aligned::LIMBS_PER_CHUNK != 0 {
return Err(error::Unspecified);
}
let cpe = num_limbs / limb512aligned::LIMBS_PER_CHUNK; // chunks per entry.

let mut table = vec![0; TABLE_ENTRIES * num_limbs];
type Storage = limb512aligned::Limb512AlignedStorage<
{ ELEM_EXP_CONSTTIME_MAX_MODULUS_LIMBS * TABLE_ENTRIES },
>;
let mut table = Storage::zeroed();
let table = table
.as_flattened_mut(TABLE_ENTRIES, cpe)
.ok_or_else(|| error::Unspecified)?;

fn gather<M>(table: &[Limb], acc: &mut Elem<M, R>, i: Window) {
prefixed_extern! {
Expand Down Expand Up @@ -463,9 +477,9 @@ pub fn elem_exp_consttime<M>(
}

// table[0] = base**0 (i.e. 1).
m.oneR(entry_mut(&mut table, 0, num_limbs));
m.oneR(entry_mut(table, 0, num_limbs));

entry_mut(&mut table, 1, num_limbs).copy_from_slice(&base.limbs);
entry_mut(table, 1, num_limbs).copy_from_slice(&base.limbs);
for i in 2..TABLE_ENTRIES {
let (src1, src2) = if i % 2 == 0 {
(i / 2, i / 2)
Expand Down Expand Up @@ -503,7 +517,7 @@ pub fn elem_exp_consttime<M>(
exponent: &PrivateExponent,
m: &Modulus<M>,
) -> Result<Elem<M, Unencoded>, error::Unspecified> {
use crate::{cpu, limb::LIMB_BYTES};
use crate::cpu;

// Pretty much all the math here requires CPU feature detection to have
// been done. `cpu_features` isn't threaded through all the internal
Expand All @@ -516,23 +530,29 @@ pub fn elem_exp_consttime<M>(
// inputs `tmp`, `am`, and `np` that immediately follow the table. All the
// awkwardness here stems from trying to use the assembly code like OpenSSL
// does.
const MOD_EXP_CTIME_ALIGN: usize = 64;

use crate::limb::{LeakyWindow, Window};

const WINDOW_BITS: usize = 5;
const TABLE_ENTRIES: usize = 1 << WINDOW_BITS;

let num_limbs = m.limbs().len();

const ALIGNMENT: usize = 64;
assert_eq!(ALIGNMENT % LIMB_BYTES, 0);
let mut table = vec![0; ((TABLE_ENTRIES + 3) * num_limbs) + ALIGNMENT];
let (table, state) = {
let misalignment = (table.as_ptr() as usize) % ALIGNMENT;
let table = &mut table[((ALIGNMENT - misalignment) / LIMB_BYTES)..];
assert_eq!((table.as_ptr() as usize) % ALIGNMENT, 0);
table.split_at_mut(TABLE_ENTRIES * num_limbs)
};
if num_limbs % limb512aligned::LIMBS_PER_CHUNK != 0 {
return Err(error::Unspecified);
}
let cpe = num_limbs / limb512aligned::LIMBS_PER_CHUNK; // chunks per entry.

const TABLE_ENTRIES_PLUS_3: usize = TABLE_ENTRIES + 3;
type Storage = limb512aligned::Limb512AlignedStorage<
{ ELEM_EXP_CONSTTIME_MAX_MODULUS_LIMBS * TABLE_ENTRIES_PLUS_3 },
>;
let mut table = Storage::zeroed();
let table = table
.as_flattened_mut(TABLE_ENTRIES_PLUS_3, cpe)
.ok_or_else(|| error::Unspecified)?;
assert_eq!((table.as_ptr() as usize) % MOD_EXP_CTIME_ALIGN, 0);
let (table, state) = table.split_at_mut(TABLE_ENTRIES * num_limbs);

fn scatter(table: &mut [Limb], acc: &[Limb], i: LeakyWindow, num_limbs: usize) {
prefixed_extern! {
Expand Down
47 changes: 47 additions & 0 deletions src/arithmetic/limb512aligned.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2025 Brian Smith.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

use crate::limb::{Limb, LIMB_BITS};
use core::mem::size_of;

// Some x86_64 assembly is written under the assumption that some of its
// input data and/or temporary storage is aligned to `MOD_EXP_CTIME_ALIGN`
// bytes, which was/is 64 in OpenSSL.
//
// We use this in the non-X86-64 implementation of exponentiation as well,
// with the hope of converging th two implementations into one.
#[repr(C, align(64))]
pub struct Limb512AlignedStorage<const N: usize>([Limb; N]);

const _LIMB_SIZE_DIVIDES_ALIGNMENT: () = assert!(64 % size_of::<Limb>() == 0);

pub(super) const LIMBS_PER_CHUNK: usize = 512 / LIMB_BITS;

impl<const N: usize> Limb512AlignedStorage<N> {
pub fn zeroed() -> Self {
assert_eq!(N % LIMBS_PER_CHUNK, 0); // TODO: const.
Self([0; N])
}

pub fn as_flattened_mut(
&mut self,
num_entries: usize,
chunks_per_entry: usize,
) -> Option<&mut [Limb]> {
let total_limbs = num_entries
.checked_mul(chunks_per_entry)?
.checked_mul(LIMBS_PER_CHUNK)?;
self.0.get_mut(..total_limbs)
}
}

0 comments on commit 8615a46

Please sign in to comment.