From 4d332116bf78819ac329b48741e7c9b1dc0332a5 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Mon, 23 Oct 2023 20:05:56 -0400 Subject: [PATCH 1/5] [Zvbc32e] enabling 32-bit vclmul/vclmulh --- disasm/disasm.cc | 3 ++- disasm/isa_parser.cc | 2 ++ riscv/decode_macros.h | 1 + riscv/insns/vclmul_vv.h | 8 ++++---- riscv/insns/vclmul_vx.h | 8 ++++---- riscv/insns/vclmulh_vv.h | 4 ++-- riscv/insns/vclmulh_vx.h | 8 ++++---- riscv/isa_parser.h | 2 ++ riscv/zvk_ext_macros.h | 25 +++++++++++++++++++++++++ 9 files changed, 46 insertions(+), 15 deletions(-) diff --git a/disasm/disasm.cc b/disasm/disasm.cc index c3ba62a690..b19f7cafc6 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -2157,7 +2157,8 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) #undef DISASM_VECTOR_VV_VX_VIU_ZIMM6 } - if (isa->extension_enabled(EXT_ZVBC)) { + if (isa->extension_enabled(EXT_ZVBC) || + isa->extension_enabled(EXT_ZVBC32E)) { #define DISASM_VECTOR_VV_VX(name) \ DEFINE_VECTOR_VV(name##_vv); \ DEFINE_VECTOR_VX(name##_vx) diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index 79203dfbe2..e9618aad0e 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -253,6 +253,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZVBB] = true; } else if (ext_str == "zvbc") { extension_table[EXT_ZVBC] = true; + } else if (ext_str == "zvbc32e") { + extension_table[EXT_ZVBC32E] = true; } else if (ext_str == "zvfbfmin") { extension_table[EXT_ZVFBFMIN] = true; } else if (ext_str == "zvfbfwma") { diff --git a/riscv/decode_macros.h b/riscv/decode_macros.h index ffb334cf5c..8d098bf62c 100644 --- a/riscv/decode_macros.h +++ b/riscv/decode_macros.h @@ -163,6 +163,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) #define require_rv32 require(xlen == 32) #define require_extension(s) require(p->extension_enabled(s)) #define require_either_extension(A,B) require(p->extension_enabled(A) || p->extension_enabled(B)); +#define require_either_extension_condition(A,cA, B, cB) require(p->extension_enabled(A) && (cA) || p->extension_enabled(B) && (cB)); #define require_impl(s) require(p->supports_impl(s)) #define require_fs require(STATE.sstatus->enabled(SSTATUS_FS)) #define require_fp STATE.fflags->verify_permissions(insn, false) diff --git a/riscv/insns/vclmul_vv.h b/riscv/insns/vclmul_vv.h index 8957738adc..4cee57e062 100644 --- a/riscv/insns/vclmul_vv.h +++ b/riscv/insns/vclmul_vv.h @@ -2,13 +2,13 @@ #include "zvk_ext_macros.h" -require_zvbc; -require(P.VU.vsew == 64); +require_any_zvbc; +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); VI_VV_ULOOP ({ - // Perform a carryless multiplication 64bx64b on each 64b element, - // return the low 64b of the 128b product. + // Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element, + // return the low SEW bits of the (2.SEW)-bit product. // vd = 0; for (std::size_t bit_idx = 0; bit_idx < sew; ++bit_idx) { diff --git a/riscv/insns/vclmul_vx.h b/riscv/insns/vclmul_vx.h index 1df7a3a2a4..060d30a985 100644 --- a/riscv/insns/vclmul_vx.h +++ b/riscv/insns/vclmul_vx.h @@ -2,13 +2,13 @@ #include "zvk_ext_macros.h" -require_zvbc; -require(P.VU.vsew == 64); +require_any_zvbc; +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); VI_VX_ULOOP ({ - // Perform a carryless multiplication 64bx64b on each 64b element, - // return the low 64b of the 128b product. + // Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element, + // return the low SEW bits of the (2.SEW)-bit product. // vd = 0; for (std::size_t bit_idx = 0; bit_idx < sew; ++bit_idx) { diff --git a/riscv/insns/vclmulh_vv.h b/riscv/insns/vclmulh_vv.h index 6a54bcfaa6..cb9e45df17 100644 --- a/riscv/insns/vclmulh_vv.h +++ b/riscv/insns/vclmulh_vv.h @@ -2,8 +2,8 @@ #include "zvk_ext_macros.h" -require_zvbc; -require(P.VU.vsew == 64); +require_any_zvbc; +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); VI_VV_ULOOP ({ diff --git a/riscv/insns/vclmulh_vx.h b/riscv/insns/vclmulh_vx.h index e874d1df68..8628a6bac6 100644 --- a/riscv/insns/vclmulh_vx.h +++ b/riscv/insns/vclmulh_vx.h @@ -2,13 +2,13 @@ #include "zvk_ext_macros.h" -require_zvbc; -require(P.VU.vsew == 64); +require_any_zvbc; +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); VI_VX_ULOOP ({ - // Perform a carryless multiplication 64bx64b on each 64b element, - // return the high 64b of the 128b product. + // Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element, + // return the high SEW bits of the (2.SEW)-bit product. // vd = 0; for (std::size_t bit_idx = 1; bit_idx < sew; ++bit_idx) { diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 45f637c640..d6d9b39b14 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -57,9 +57,11 @@ typedef enum { EXT_ZILSD, EXT_ZVBB, EXT_ZVBC, + EXT_ZVBC32E, EXT_ZVFBFMIN, EXT_ZVFBFWMA, EXT_ZVKG, + EXT_ZVKGS, EXT_ZVKNED, EXT_ZVKNHA, EXT_ZVKNHB, diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index f094629835..6fd385d6c4 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -29,6 +29,23 @@ require_extension(EXT_ZVBC); \ } while (0) +// Ensures that the ZVBC32e extension (vector carryless multiplication +// with 32-bit elements) is present, and the vector unit is enabled +// and in a valid state. +#define require_zvbc32e \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVBC32E); \ + } while (0) + +// Ensures that any ZVBC extensions (vector carryless multiplication) +// is present, and the vector unit is enabled and in a valid state. +#define require_any_zvbc \ + do { \ + require_vector(true); \ + require_either_extension(EXT_ZVBC, EXT_ZVBC32E); \ + } while (0) + // Ensures that the ZVKG extension (vector Galois Field Multiplication) // is present, and the vector unit is enabled and in a valid state. #define require_zvkg \ @@ -37,6 +54,14 @@ require_extension(EXT_ZVKG); \ } while (0) +// Ensures that the ZVKGS extension (vector Galois Field Multiplication +// with vector-scalar variant) is present, and the vector unit is +// enabled and in a valid state. +#define require_zvkgs \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVKGS); \ + } while (0) // Ensures that a ZVK extension supporting SHA-256 is present. // For SHA-256, this support is present in either Zvknha or Zvknhb. // Also ensures that the vector unit is enabled and in a valid state. From 7d9f333c4eb948825c9c8f12faf4edd75e362d80 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Wed, 31 Jan 2024 19:28:11 -0800 Subject: [PATCH 2/5] [Zvbc32e] updating vclmul_vv comment --- riscv/insns/vclmulh_vv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/riscv/insns/vclmulh_vv.h b/riscv/insns/vclmulh_vv.h index cb9e45df17..9bf7c429f8 100644 --- a/riscv/insns/vclmulh_vv.h +++ b/riscv/insns/vclmulh_vv.h @@ -7,8 +7,8 @@ require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU. VI_VV_ULOOP ({ - // Perform a carryless multiplication 64bx64b on each 64b element, - // return the high 64b of the 128b product. + // Perform a carryless multiplication SEW-bit x SEW-bit on each SEW-bit element, + // return the high SEW bits of the (2.SEW)-bit product. // vd = 0; for (std::size_t bit_idx = 1; bit_idx < sew; ++bit_idx) { From 927a086456b100729a7c96d0e3a1d4f0a9a3ec5c Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Tue, 23 Jul 2024 12:46:34 -0700 Subject: [PATCH 3/5] Importing new encoding.h with additional vector crypto instructions --- riscv/encoding.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/riscv/encoding.h b/riscv/encoding.h index 675b4f6438..0d33ebcb88 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -4,7 +4,7 @@ /* * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes (c55d30f) + * https://github.com/riscv/riscv-opcodes (f1d708d) */ #ifndef RISCV_CSR_ENCODING_H @@ -110,6 +110,9 @@ #define DCSR_CAUSE_STEP 4 #define DCSR_CAUSE_HALT 5 #define DCSR_CAUSE_GROUP 6 +#define DCSR_CAUSE_EXTCAUSE 7 + +#define DCSR_EXTCAUSE_CRITERR 0 #define MCONTROL_TYPE(xlen) (0xfULL<<((xlen)-4)) #define MCONTROL_DMODE(xlen) (1ULL<<((xlen)-5)) @@ -117,7 +120,7 @@ #define MCONTROL_SELECT (1<<19) #define MCONTROL_TIMING (1<<18) -#define MCONTROL_ACTION (0x3f<<12) +#define MCONTROL_ACTION (0xf<<12) #define MCONTROL_CHAIN (1<<11) #define MCONTROL_MATCH (0xf<<7) #define MCONTROL_M (1<<6) @@ -1840,8 +1843,12 @@ #define MASK_VFWSUB_WF 0xfc00707f #define MATCH_VFWSUB_WV 0xd8001057 #define MASK_VFWSUB_WV 0xfc00707f +#define MATCH_VGHSH_VS 0x8e002077 +#define MASK_VGHSH_VS 0xfe00707f #define MATCH_VGHSH_VV 0xb2002077 #define MASK_VGHSH_VV 0xfe00707f +#define MATCH_VGMUL_VS 0xa608a077 +#define MASK_VGMUL_VS 0xfe0ff07f #define MATCH_VGMUL_VV 0xa208a077 #define MASK_VGMUL_VV 0xfe0ff07f #define MATCH_VID_V 0x5008a057 @@ -3672,7 +3679,9 @@ DECLARE_INSN(vfwsub_vf, MATCH_VFWSUB_VF, MASK_VFWSUB_VF) DECLARE_INSN(vfwsub_vv, MATCH_VFWSUB_VV, MASK_VFWSUB_VV) DECLARE_INSN(vfwsub_wf, MATCH_VFWSUB_WF, MASK_VFWSUB_WF) DECLARE_INSN(vfwsub_wv, MATCH_VFWSUB_WV, MASK_VFWSUB_WV) +DECLARE_INSN(vghsh_vs, MATCH_VGHSH_VS, MASK_VGHSH_VS) DECLARE_INSN(vghsh_vv, MATCH_VGHSH_VV, MASK_VGHSH_VV) +DECLARE_INSN(vgmul_vs, MATCH_VGMUL_VS, MASK_VGMUL_VS) DECLARE_INSN(vgmul_vv, MATCH_VGMUL_VV, MASK_VGMUL_VV) DECLARE_INSN(vid_v, MATCH_VID_V, MASK_VID_V) DECLARE_INSN(viota_m, MATCH_VIOTA_M, MASK_VIOTA_M) From 78edff2a326b6b677a46e3e9c5cc1c98cdcab4fd Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Wed, 24 Jul 2024 15:49:57 -0700 Subject: [PATCH 4/5] Adding support for Zvkgs's vgmul.vs and vghsh.vs --- disasm/disasm.cc | 7 +++++++ disasm/isa_parser.cc | 2 ++ riscv/insns/vghsh_vs.h | 46 ++++++++++++++++++++++++++++++++++++++++++ riscv/insns/vgmul_vs.h | 41 +++++++++++++++++++++++++++++++++++++ riscv/riscv.mk.in | 9 +++++++++ riscv/zvk_ext_macros.h | 7 +++++++ 6 files changed, 112 insertions(+) create mode 100644 riscv/insns/vghsh_vs.h create mode 100644 riscv/insns/vgmul_vs.h diff --git a/disasm/disasm.cc b/disasm/disasm.cc index b19f7cafc6..cac4e003fe 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -2176,6 +2176,13 @@ void disassembler_t::add_instructions(const isa_parser_t* isa) DEFINE_VECTOR_VV(vghsh_vv); } + if (isa->extension_enabled(EXT_ZVKGS)) { + // Despite its suffix, the vgmul.vv instruction + // is really ".v", with the form "vgmul.vv vd, vs2". + DEFINE_VECTOR_V(vgmul_vs); + DEFINE_VECTOR_VV(vghsh_vs); + } + if (isa->extension_enabled(EXT_ZVKNED)) { // Despite their suffixes, the vaes*.{vv,vs} instructions // are really ".v", with the form ".{vv,vs} vd, vs2". diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index e9618aad0e..e53fe8fa2b 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -261,6 +261,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv) extension_table[EXT_ZVFBFWMA] = true; } else if (ext_str == "zvkg") { extension_table[EXT_ZVKG] = true; + } else if (ext_str == "zvkgs") { + extension_table[EXT_ZVKGS] = true; } else if (ext_str == "zvkn") { extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVKNED] = true; diff --git a/riscv/insns/vghsh_vs.h b/riscv/insns/vghsh_vs.h new file mode 100644 index 0000000000..ced25122d6 --- /dev/null +++ b/riscv/insns/vghsh_vs.h @@ -0,0 +1,46 @@ +// vghsh.vs vd, vs2, vs1 + +#include "zvk_ext_macros.h" + +require_zvkgs; +require(P.VU.vsew == 32); +require_egw_fits(128); + +VI_ZVK_VD_VS1_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + EGU32x4_t H = P.VU.elt_group(vs2_num, 0); EGU32x4_BREV8(H);, + { + EGU32x4_t Y = P.VU.elt_group(vd_num, idx_eg);; // Current partial hash + EGU32x4_t X = P.VU.elt_group(vs1_num, idx_eg);; // Block cipher output + + EGU32x4_t Z = {}; + + // S = brev8(Y ^ X) + EGU32x4_t S; + EGU32x4_XOR(S, Y, X); + EGU32x4_BREV8(S); + + for (int bit = 0; bit < 128; bit++) { + if (EGU32x4_ISSET(S, bit)) { + EGU32x4_XOREQ(Z, H); + } + + const bool reduce = EGU32x4_ISSET(H, 127); + EGU32x4_LSHIFT(H); // Left shift by 1. + if (reduce) { + H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + } + EGU32x4_BREV8(Z); + // Update the destination register. + EGU32x4_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU32x4_COPY(vd, Z); + } +); diff --git a/riscv/insns/vgmul_vs.h b/riscv/insns/vgmul_vs.h new file mode 100644 index 0000000000..80a097d122 --- /dev/null +++ b/riscv/insns/vgmul_vs.h @@ -0,0 +1,41 @@ +// vgmul.vs vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvkgs; +require(P.VU.vsew == 32); +require_egw_fits(128); + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + EGU32x4_t H = P.VU.elt_group(vs2_num, 0); EGU32x4_BREV8(H); + , + { + EGU32x4_t Y = P.VU.elt_group(vd_num, idx_eg); // Multiplier + EGU32x4_BREV8(Y); + EGU32x4_t Z = {}; + + for (int bit = 0; bit < 128; bit++) { + if (EGU32x4_ISSET(Y, bit)) { + EGU32x4_XOREQ(Z, H); + } + + bool reduce = EGU32x4_ISSET(H, 127); + EGU32x4_LSHIFT(H); // Lef shift by 1 + if (reduce) { + H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + } + EGU32x4_BREV8(Z); + // Update the destination register. + EGU32x4_t &vd = P.VU.elt_group(vd_num, idx_eg, true); + EGU32x4_COPY(vd, Z); + } +); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 60723b58e6..4d08e4b209 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1033,6 +1033,10 @@ riscv_insn_ext_zvkg= \ vghsh_vv \ vgmul_vv \ +riscv_insn_ext_zvkgs= \ + vghsh_vs \ + vgmul_vs \ + riscv_insn_ext_zvkned = \ vaesdf_vs \ vaesdf_vv \ @@ -1091,6 +1095,10 @@ riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvksed) \ $(riscv_insn_ext_zvksh) \ +riscv_insn_ext_zvka = \ + $(riscv_insn_ext_zvbc32e) \ + $(riscv_insn_ext_zvkgs) \ + riscv_insn_list = \ $(riscv_insn_ext_i) \ $(riscv_insn_ext_c) \ @@ -1117,6 +1125,7 @@ riscv_insn_list = \ $(riscv_insn_ext_zfh_zfa) \ $(riscv_insn_ext_zicond) \ $(riscv_insn_ext_zvk) \ + $(riscv_insn_ext_zvka) \ $(riscv_insn_priv) \ $(riscv_insn_smrnmi) \ $(riscv_insn_svinval) \ diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index 6fd385d6c4..d762c98629 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -957,6 +957,13 @@ (DST)[bidx] = (SRC)[bidx]; \ } +// Copies a EGU32x4_t value from 'SRC' into 'DST'. +#define EGU32x4_COPY(DST, SRC) \ + for (std::size_t bidx = 0; bidx < 4; ++bidx) { \ + (DST)[bidx] = (SRC)[bidx]; \ + } + + // Performs "MUT_A ^= CONST_B;", i.e., xor of the bytes // in A (mutated) with the bytes in B (unchanged). #define EGU8x16_XOREQ(MUT_A, CONST_B) \ From 21a280b12aab17ae382752f716b288fd91bf7174 Mon Sep 17 00:00:00 2001 From: Nicolas Brunie Date: Sun, 6 Oct 2024 13:22:14 -0700 Subject: [PATCH 5/5] Extending Zvbc32e vclmul[h] to SEW 8 and 16 --- riscv/insns/vclmul_vv.h | 2 +- riscv/insns/vclmul_vx.h | 2 +- riscv/insns/vclmulh_vv.h | 2 +- riscv/insns/vclmulh_vx.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/riscv/insns/vclmul_vv.h b/riscv/insns/vclmul_vv.h index 4cee57e062..ca1a64d37a 100644 --- a/riscv/insns/vclmul_vv.h +++ b/riscv/insns/vclmul_vv.h @@ -3,7 +3,7 @@ #include "zvk_ext_macros.h" require_any_zvbc; -require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8); VI_VV_ULOOP ({ diff --git a/riscv/insns/vclmul_vx.h b/riscv/insns/vclmul_vx.h index 060d30a985..a929bda00f 100644 --- a/riscv/insns/vclmul_vx.h +++ b/riscv/insns/vclmul_vx.h @@ -3,7 +3,7 @@ #include "zvk_ext_macros.h" require_any_zvbc; -require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8); VI_VX_ULOOP ({ diff --git a/riscv/insns/vclmulh_vv.h b/riscv/insns/vclmulh_vv.h index 9bf7c429f8..12d5952003 100644 --- a/riscv/insns/vclmulh_vv.h +++ b/riscv/insns/vclmulh_vv.h @@ -3,7 +3,7 @@ #include "zvk_ext_macros.h" require_any_zvbc; -require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8); VI_VV_ULOOP ({ diff --git a/riscv/insns/vclmulh_vx.h b/riscv/insns/vclmulh_vx.h index 8628a6bac6..77625672a1 100644 --- a/riscv/insns/vclmulh_vx.h +++ b/riscv/insns/vclmulh_vx.h @@ -3,7 +3,7 @@ #include "zvk_ext_macros.h" require_any_zvbc; -require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32); +require_either_extension_condition(EXT_ZVBC, P.VU.vsew == 64, EXT_ZVBC32E, P.VU.vsew == 32 || P.VU.vsew == 16 || P.VU.vsew == 8); VI_VX_ULOOP ({