Skip to content

Commit

Permalink
Move x86_64-mont5 dispatching to Rust (Merge BoringSSL 3efe2eb)
Browse files Browse the repository at this point in the history
  • Loading branch information
briansmith committed Jan 27, 2025
2 parents 468a4e8 + 3efe2eb commit f1f2faa
Showing 1 changed file with 39 additions and 52 deletions.
91 changes: 39 additions & 52 deletions crypto/fipsmodule/bn/asm/x86_64-mont5.pl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
# output, so this isn't useful anyway.
$addx = 1;

# int bn_mul_mont_gather5(
# int bn_mul_mont_gather5_nohw(
$rp="%rdi"; # BN_ULONG *rp,
$ap="%rsi"; # const BN_ULONG *ap,
$bp="%rdx"; # const BN_ULONG *bp,
Expand All @@ -72,29 +72,17 @@
$code=<<___;
.text

.extern OPENSSL_ia32cap_P

.globl bn_mul_mont_gather5
.type bn_mul_mont_gather5,\@function,6
.globl bn_mul_mont_gather5_nohw
.type bn_mul_mont_gather5_nohw,\@function,6
.align 64
bn_mul_mont_gather5:
bn_mul_mont_gather5_nohw:
.cfi_startproc
_CET_ENDBR
# num is declared as an int, a 32-bit parameter, so the upper half is
# undefined. Zero the upper half to normalize it.
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
test \$7,${num}d
jnz .Lmul_enter
___
$code.=<<___ if ($addx);
leaq OPENSSL_ia32cap_P(%rip),%r11
mov 8(%r11),%r11d
___
$code.=<<___;
jmp .Lmul4x_enter

.align 16
.Lmul_enter:
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
push %rbx
.cfi_push %rbx
Expand Down Expand Up @@ -454,27 +442,21 @@
.Lmul_epilogue:
ret
.cfi_endproc
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
.size bn_mul_mont_gather5_nohw,.-bn_mul_mont_gather5_nohw
___
{{{
my @A=("%r10","%r11");
my @N=("%r13","%rdi");
$code.=<<___;
.globl bn_mul4x_mont_gather5
.type bn_mul4x_mont_gather5,\@function,6
.align 32
bn_mul4x_mont_gather5:
.cfi_startproc
_CET_ENDBR
.byte 0x67
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
and \$0x80108,%r11d
cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1
je .Lmulx4x_enter
___
$code.=<<___;
push %rbx
.cfi_push %rbx
push %rbp
Expand All @@ -490,6 +472,9 @@
.Lmul4x_prologue:

.byte 0x67
# num is declared as an int, a 32-bit parameter, so the upper half is
# undefined. It is important that this write to ${num}, which zeros the
# upper half, predates the first access.
shl \$3,${num}d # convert $num to bytes
lea ($num,$num,2),%r10 # 3*$num in bytes
neg $num # -$num
Expand Down Expand Up @@ -1079,7 +1064,7 @@
}}}
{{{
######################################################################
# void bn_power5(
# void bn_power5_nohw(
my $rptr="%rdi"; # BN_ULONG *rptr,
my $aptr="%rsi"; # const BN_ULONG *aptr,
my $bptr="%rdx"; # const void *table,
Expand All @@ -1094,23 +1079,14 @@
my ($a0,$a1,$ai)=("%r14","%r15","%rbx");

$code.=<<___;
.globl bn_power5
.type bn_power5,\@function,6
.globl bn_power5_nohw
.type bn_power5_nohw,\@function,6
.align 32
bn_power5:
bn_power5_nohw:
.cfi_startproc
_CET_ENDBR
mov %rsp,%rax
.cfi_def_cfa_register %rax
___
$code.=<<___ if ($addx);
leaq OPENSSL_ia32cap_P(%rip),%r11
mov 8(%r11),%r11d
and \$0x80108,%r11d
cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1
je .Lpowerx5_enter
___
$code.=<<___;
push %rbx
.cfi_push %rbx
push %rbp
Expand All @@ -1125,6 +1101,9 @@
.cfi_push %r15
.Lpower5_prologue:

# num is declared as an int, a 32-bit parameter, so the upper half is
# undefined. It is important that this write to ${num}, which zeros the
# upper half, come before the first access.
shl \$3,${num}d # convert $num to bytes
lea ($num,$num,2),%r10d # 3*$num
neg $num
Expand Down Expand Up @@ -1233,7 +1212,7 @@
.Lpower5_epilogue:
ret
.cfi_endproc
.size bn_power5,.-bn_power5
.size bn_power5_nohw,.-bn_power5_nohw

.globl bn_sqr8x_internal
.hidden bn_sqr8x_internal
Expand Down Expand Up @@ -2108,13 +2087,14 @@
my $bp="%rdx"; # restore original value

$code.=<<___;
.globl bn_mulx4x_mont_gather5
.type bn_mulx4x_mont_gather5,\@function,6
.align 32
bn_mulx4x_mont_gather5:
.cfi_startproc
_CET_ENDBR
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
push %rbx
.cfi_push %rbx
push %rbp
Expand All @@ -2129,6 +2109,9 @@
.cfi_push %r15
.Lmulx4x_prologue:

# num is declared as an int, a 32-bit parameter, so the upper half is
# undefined. It is important that this write to ${num}, which zeros the
# upper half, predates the first access.
shl \$3,${num}d # convert $num to bytes
lea ($num,$num,2),%r10 # 3*$num in bytes
neg $num # -$num
Expand Down Expand Up @@ -2583,7 +2566,7 @@
___
} {
######################################################################
# void bn_power5(
# void bn_powerx5(
my $rptr="%rdi"; # BN_ULONG *rptr,
my $aptr="%rsi"; # const BN_ULONG *aptr,
my $bptr="%rdx"; # const void *table,
Expand All @@ -2598,13 +2581,14 @@
my ($a0,$a1,$ai)=("%r14","%r15","%rbx");

$code.=<<___;
.globl bn_powerx5
.type bn_powerx5,\@function,6
.align 32
bn_powerx5:
.cfi_startproc
_CET_ENDBR
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lpowerx5_enter:
push %rbx
.cfi_push %rbx
push %rbp
Expand All @@ -2619,6 +2603,9 @@
.cfi_push %r15
.Lpowerx5_prologue:

# num is declared as an int, a 32-bit parameter, so the upper half is
# undefined. It is important that this write to ${num}, which zeros the
# upper half, predates the first access.
shl \$3,${num}d # convert $num to bytes
lea ($num,$num,2),%r10 # 3*$num in bytes
neg $num
Expand Down Expand Up @@ -3705,17 +3692,17 @@

.section .pdata
.align 4
.rva .LSEH_begin_bn_mul_mont_gather5
.rva .LSEH_end_bn_mul_mont_gather5
.rva .LSEH_info_bn_mul_mont_gather5
.rva .LSEH_begin_bn_mul_mont_gather5_nohw
.rva .LSEH_end_bn_mul_mont_gather5_nohw
.rva .LSEH_info_bn_mul_mont_gather5_nohw

.rva .LSEH_begin_bn_mul4x_mont_gather5
.rva .LSEH_end_bn_mul4x_mont_gather5
.rva .LSEH_info_bn_mul4x_mont_gather5

.rva .LSEH_begin_bn_power5
.rva .LSEH_end_bn_power5
.rva .LSEH_info_bn_power5
.rva .LSEH_begin_bn_power5_nohw
.rva .LSEH_end_bn_power5_nohw
.rva .LSEH_info_bn_power5_nohw
___
$code.=<<___ if ($addx);
.rva .LSEH_begin_bn_mulx4x_mont_gather5
Expand All @@ -3733,7 +3720,7 @@

.section .xdata
.align 8
.LSEH_info_bn_mul_mont_gather5:
.LSEH_info_bn_mul_mont_gather5_nohw:
.byte 9,0,0,0
.rva mul_handler
.rva .Lmul_body,.Lmul_body,.Lmul_epilogue # HandlerData[]
Expand All @@ -3743,7 +3730,7 @@
.rva mul_handler
.rva .Lmul4x_prologue,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[]
.align 8
.LSEH_info_bn_power5:
.LSEH_info_bn_power5_nohw:
.byte 9,0,0,0
.rva mul_handler
.rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # HandlerData[]
Expand Down

0 comments on commit f1f2faa

Please sign in to comment.