Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vector crypto additional #1748

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion disasm/disasm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2157,7 +2157,8 @@ void disassembler_t::add_instructions(const isa_parser_t* isa)
#undef DISASM_VECTOR_VV_VX_VIU_ZIMM6
}

if (isa->extension_enabled(EXT_ZVBC)) {
if (isa->extension_enabled(EXT_ZVBC) ||
isa->extension_enabled(EXT_ZVBC32E)) {
#define DISASM_VECTOR_VV_VX(name) \
DEFINE_VECTOR_VV(name##_vv); \
DEFINE_VECTOR_VX(name##_vx)
Expand All @@ -2175,6 +2176,13 @@ void disassembler_t::add_instructions(const isa_parser_t* isa)
DEFINE_VECTOR_VV(vghsh_vv);
}

if (isa->extension_enabled(EXT_ZVKGS)) {
// Despite its suffix, the vgmul.vv instruction
// is really ".v", with the form "vgmul.vv vd, vs2".
DEFINE_VECTOR_V(vgmul_vs);
DEFINE_VECTOR_VV(vghsh_vs);
}

if (isa->extension_enabled(EXT_ZVKNED)) {
// Despite their suffixes, the vaes*.{vv,vs} instructions
// are really ".v", with the form "<op>.{vv,vs} vd, vs2".
Expand Down
4 changes: 4 additions & 0 deletions disasm/isa_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -253,12 +253,16 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
extension_table[EXT_ZVBB] = true;
} else if (ext_str == "zvbc") {
extension_table[EXT_ZVBC] = true;
} else if (ext_str == "zvbc32e") {
extension_table[EXT_ZVBC32E] = true;
} else if (ext_str == "zvfbfmin") {
extension_table[EXT_ZVFBFMIN] = true;
} else if (ext_str == "zvfbfwma") {
extension_table[EXT_ZVFBFWMA] = true;
} else if (ext_str == "zvkg") {
extension_table[EXT_ZVKG] = true;
} else if (ext_str == "zvkgs") {
extension_table[EXT_ZVKGS] = true;
} else if (ext_str == "zvkn") {
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVKNED] = true;
Expand Down
1 change: 1 addition & 0 deletions riscv/decode_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
#define require_rv32 require(xlen == 32)
#define require_extension(s) require(p->extension_enabled(s))
#define require_either_extension(A,B) require(p->extension_enabled(A) || p->extension_enabled(B));
#define require_either_extension_condition(A,cA, B, cB) require(p->extension_enabled(A) && (cA) || p->extension_enabled(B) && (cB));
#define require_impl(s) require(p->supports_impl(s))
#define require_fs require(STATE.sstatus->enabled(SSTATUS_FS))
#define require_fp STATE.fflags->verify_permissions(insn, false)
Expand Down
13 changes: 11 additions & 2 deletions riscv/encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

/*
* This file is auto-generated by running 'make' in
* https://github.com/riscv/riscv-opcodes (c55d30f)
* https://github.com/riscv/riscv-opcodes (f1d708d)
*/

#ifndef RISCV_CSR_ENCODING_H
Expand Down Expand Up @@ -110,14 +110,17 @@
#define DCSR_CAUSE_STEP 4
#define DCSR_CAUSE_HALT 5
#define DCSR_CAUSE_GROUP 6
#define DCSR_CAUSE_EXTCAUSE 7

#define DCSR_EXTCAUSE_CRITERR 0

#define MCONTROL_TYPE(xlen) (0xfULL<<((xlen)-4))
#define MCONTROL_DMODE(xlen) (1ULL<<((xlen)-5))
#define MCONTROL_MASKMAX(xlen) (0x3fULL<<((xlen)-11))

#define MCONTROL_SELECT (1<<19)
#define MCONTROL_TIMING (1<<18)
#define MCONTROL_ACTION (0x3f<<12)
#define MCONTROL_ACTION (0xf<<12)
#define MCONTROL_CHAIN (1<<11)
#define MCONTROL_MATCH (0xf<<7)
#define MCONTROL_M (1<<6)
Expand Down Expand Up @@ -1840,8 +1843,12 @@
#define MASK_VFWSUB_WF 0xfc00707f
#define MATCH_VFWSUB_WV 0xd8001057
#define MASK_VFWSUB_WV 0xfc00707f
#define MATCH_VGHSH_VS 0x8e002077
#define MASK_VGHSH_VS 0xfe00707f
#define MATCH_VGHSH_VV 0xb2002077
#define MASK_VGHSH_VV 0xfe00707f
#define MATCH_VGMUL_VS 0xa608a077
#define MASK_VGMUL_VS 0xfe0ff07f
#define MATCH_VGMUL_VV 0xa208a077
#define MASK_VGMUL_VV 0xfe0ff07f
#define MATCH_VID_V 0x5008a057
Expand Down Expand Up @@ -3672,7 +3679,9 @@ DECLARE_INSN(vfwsub_vf, MATCH_VFWSUB_VF, MASK_VFWSUB_VF)
DECLARE_INSN(vfwsub_vv, MATCH_VFWSUB_VV, MASK_VFWSUB_VV)
DECLARE_INSN(vfwsub_wf, MATCH_VFWSUB_WF, MASK_VFWSUB_WF)
DECLARE_INSN(vfwsub_wv, MATCH_VFWSUB_WV, MASK_VFWSUB_WV)
DECLARE_INSN(vghsh_vs, MATCH_VGHSH_VS, MASK_VGHSH_VS)
DECLARE_INSN(vghsh_vv, MATCH_VGHSH_VV, MASK_VGHSH_VV)
DECLARE_INSN(vgmul_vs, MATCH_VGMUL_VS, MASK_VGMUL_VS)
DECLARE_INSN(vgmul_vv, MATCH_VGMUL_VV, MASK_VGMUL_VV)
DECLARE_INSN(vid_v, MATCH_VID_V, MASK_VID_V)
DECLARE_INSN(viota_m, MATCH_VIOTA_M, MASK_VIOTA_M)
Expand Down
8 changes: 4 additions & 4 deletions riscv/insns/vclmul_vv.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

#include "zvk_ext_macros.h"

require_zvbc;
require(P.VU.vsew == 64);
require_any_zvbc;
require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8);

VI_VV_ULOOP
({
// Perform a carryless multiplication 64bx64b on each 64b element,
// return the low 64b of the 128b product.
// Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element,
// return the low SEW bits of the (2.SEW)-bit product.
// <https://en.wikipedia.org/wiki/Carry-less_product>
vd = 0;
for (std::size_t bit_idx = 0; bit_idx < sew; ++bit_idx) {
Expand Down
8 changes: 4 additions & 4 deletions riscv/insns/vclmul_vx.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

#include "zvk_ext_macros.h"

require_zvbc;
require(P.VU.vsew == 64);
require_any_zvbc;
require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8);

VI_VX_ULOOP
({
// Perform a carryless multiplication 64bx64b on each 64b element,
// return the low 64b of the 128b product.
// Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element,
// return the low SEW bits of the (2.SEW)-bit product.
// <https://en.wikipedia.org/wiki/Carry-less_product>
vd = 0;
for (std::size_t bit_idx = 0; bit_idx < sew; ++bit_idx) {
Expand Down
8 changes: 4 additions & 4 deletions riscv/insns/vclmulh_vv.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

#include "zvk_ext_macros.h"

require_zvbc;
require(P.VU.vsew == 64);
require_any_zvbc;
require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8);

VI_VV_ULOOP
({
// Perform a carryless multiplication 64bx64b on each 64b element,
// return the high 64b of the 128b product.
// Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element,
// return the high SEW bits of the (2.SEW)-bit product.
// <https://en.wikipedia.org/wiki/Carry-less_product>
vd = 0;
for (std::size_t bit_idx = 1; bit_idx < sew; ++bit_idx) {
Expand Down
8 changes: 4 additions & 4 deletions riscv/insns/vclmulh_vx.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

#include "zvk_ext_macros.h"

require_zvbc;
require(P.VU.vsew == 64);
require_any_zvbc;
require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8);

VI_VX_ULOOP
({
// Perform a carryless multiplication 64bx64b on each 64b element,
// return the high 64b of the 128b product.
// Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element,
// return the high SEW bits of the (2.SEW)-bit product.
// <https://en.wikipedia.org/wiki/Carry-less_product>
vd = 0;
for (std::size_t bit_idx = 1; bit_idx < sew; ++bit_idx) {
Expand Down
46 changes: 46 additions & 0 deletions riscv/insns/vghsh_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// vghsh.vs vd, vs2, vs1

#include "zvk_ext_macros.h"

require_zvkgs;
require(P.VU.vsew == 32);
require_egw_fits(128);

VI_ZVK_VD_VS1_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
EGU32x4_t H = P.VU.elt_group<EGU32x4_t>(vs2_num, 0); EGU32x4_BREV8(H);,
{
EGU32x4_t Y = P.VU.elt_group<EGU32x4_t>(vd_num, idx_eg);; // Current partial hash
EGU32x4_t X = P.VU.elt_group<EGU32x4_t>(vs1_num, idx_eg);; // Block cipher output

EGU32x4_t Z = {};

// S = brev8(Y ^ X)
EGU32x4_t S;
EGU32x4_XOR(S, Y, X);
EGU32x4_BREV8(S);

for (int bit = 0; bit < 128; bit++) {
if (EGU32x4_ISSET(S, bit)) {
EGU32x4_XOREQ(Z, H);
}

const bool reduce = EGU32x4_ISSET(H, 127);
EGU32x4_LSHIFT(H); // Left shift by 1.
if (reduce) {
H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial
}
}
EGU32x4_BREV8(Z);
// Update the destination register.
EGU32x4_t &vd = P.VU.elt_group<EGU32x4_t>(vd_num, idx_eg, true);
EGU32x4_COPY(vd, Z);
}
);
41 changes: 41 additions & 0 deletions riscv/insns/vgmul_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// vgmul.vs vd, vs2

#include "zvk_ext_macros.h"

require_zvkgs;
require(P.VU.vsew == 32);
require_egw_fits(128);

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
EGU32x4_t H = P.VU.elt_group<EGU32x4_t>(vs2_num, 0); EGU32x4_BREV8(H);
,
{
EGU32x4_t Y = P.VU.elt_group<EGU32x4_t>(vd_num, idx_eg); // Multiplier
EGU32x4_BREV8(Y);
EGU32x4_t Z = {};

for (int bit = 0; bit < 128; bit++) {
if (EGU32x4_ISSET(Y, bit)) {
EGU32x4_XOREQ(Z, H);
}

bool reduce = EGU32x4_ISSET(H, 127);
EGU32x4_LSHIFT(H); // Lef shift by 1
if (reduce) {
H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial
}
}
EGU32x4_BREV8(Z);
// Update the destination register.
EGU32x4_t &vd = P.VU.elt_group<EGU32x4_t>(vd_num, idx_eg, true);
EGU32x4_COPY(vd, Z);
}
);
2 changes: 2 additions & 0 deletions riscv/isa_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ typedef enum {
EXT_ZILSD,
EXT_ZVBB,
EXT_ZVBC,
EXT_ZVBC32E,
EXT_ZVFBFMIN,
EXT_ZVFBFWMA,
EXT_ZVKG,
EXT_ZVKGS,
EXT_ZVKNED,
EXT_ZVKNHA,
EXT_ZVKNHB,
Expand Down
9 changes: 9 additions & 0 deletions riscv/riscv.mk.in
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,10 @@ riscv_insn_ext_zvkg= \
vghsh_vv \
vgmul_vv \

riscv_insn_ext_zvkgs= \
vghsh_vs \
vgmul_vs \

riscv_insn_ext_zvkned = \
vaesdf_vs \
vaesdf_vv \
Expand Down Expand Up @@ -1091,6 +1095,10 @@ riscv_insn_ext_zvk = \
$(riscv_insn_ext_zvksed) \
$(riscv_insn_ext_zvksh) \

riscv_insn_ext_zvka = \
$(riscv_insn_ext_zvbc32e) \
$(riscv_insn_ext_zvkgs) \

riscv_insn_list = \
$(riscv_insn_ext_i) \
$(riscv_insn_ext_c) \
Expand All @@ -1117,6 +1125,7 @@ riscv_insn_list = \
$(riscv_insn_ext_zfh_zfa) \
$(riscv_insn_ext_zicond) \
$(riscv_insn_ext_zvk) \
$(riscv_insn_ext_zvka) \
$(riscv_insn_priv) \
$(riscv_insn_smrnmi) \
$(riscv_insn_svinval) \
Expand Down
32 changes: 32 additions & 0 deletions riscv/zvk_ext_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,23 @@
require_extension(EXT_ZVBC); \
} while (0)

// Ensures that the ZVBC32e extension (vector carryless multiplication
// with 32-bit elements) is present, and the vector unit is enabled
// and in a valid state.
#define require_zvbc32e \
do { \
require_vector(true); \
require_extension(EXT_ZVBC32E); \
} while (0)

// Ensures that any ZVBC extensions (vector carryless multiplication)
// is present, and the vector unit is enabled and in a valid state.
#define require_any_zvbc \
do { \
require_vector(true); \
require_either_extension(EXT_ZVBC, EXT_ZVBC32E); \
} while (0)

// Ensures that the ZVKG extension (vector Galois Field Multiplication)
// is present, and the vector unit is enabled and in a valid state.
#define require_zvkg \
Expand All @@ -37,6 +54,14 @@
require_extension(EXT_ZVKG); \
} while (0)

// Ensures that the ZVKGS extension (vector Galois Field Multiplication
// with vector-scalar variant) is present, and the vector unit is
// enabled and in a valid state.
#define require_zvkgs \
do { \
require_vector(true); \
require_extension(EXT_ZVKGS); \
} while (0)
// Ensures that a ZVK extension supporting SHA-256 is present.
// For SHA-256, this support is present in either Zvknha or Zvknhb.
// Also ensures that the vector unit is enabled and in a valid state.
Expand Down Expand Up @@ -932,6 +957,13 @@
(DST)[bidx] = (SRC)[bidx]; \
}

// Copies a EGU32x4_t value from 'SRC' into 'DST'.
#define EGU32x4_COPY(DST, SRC) \
for (std::size_t bidx = 0; bidx < 4; ++bidx) { \
(DST)[bidx] = (SRC)[bidx]; \
}


// Performs "MUT_A ^= CONST_B;", i.e., xor of the bytes
// in A (mutated) with the bytes in B (unchanged).
#define EGU8x16_XOREQ(MUT_A, CONST_B) \
Expand Down