diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index cdf015cf..52baff96 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -2020,21 +2020,21 @@ void put64() uint64_t type1; uint64_t type2; uint8_t code; - int n; + int idx; } tbl[] = { - { "aesdec128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDD, 2 }, - { "aesdec256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDF, 2 }, + { "aesdec128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDD, 8 }, + { "aesdec256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDF, 8 }, + { "aesdecwide128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 1 }, + { "aesdecwide256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 3 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string s1 = type2String(p->type1); std::string s2 = type2String(p->type2); - switch (p->n) { - case 1: - break; - case 2: + if (p->idx == 8) { printf("void %s(const Xmm& x, const Address& addr) { opAESKL(&x, addr, %s, %s, 0x%02X); }\n", p->name, s1.c_str(), s2.c_str(), p->code); - break; + } else { + printf("void %s(const Address& addr) { opAESKL(&xmm%d, addr, %s, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), s2.c_str(), p->code); } } } diff --git a/test/apx.cpp b/test/apx.cpp index fc29653d..ab3ca0ae 100644 --- a/test/apx.cpp +++ b/test/apx.cpp @@ -1783,9 +1783,15 @@ CYBOZU_TEST_AUTO(aeskl) aesdec128kl(xmm15, ptr[rax+rcx*4+0x12]); aesdec128kl(xmm15, ptr[r30+r29*8+0x34]); - aesdec256kl(xmm15, ptr[rax+rcx*4+0x12]); aesdec256kl(xmm15, ptr[r30+r29*8+0x34]); + + aesdecwide128kl(ptr[rax+rcx*4+0x12]); + aesdecwide128kl(ptr[r30+r29*8+0x34]); + + aesdecwide256kl(ptr[rax+rcx*4+0x12]); + aesdecwide256kl(ptr[r30+r29*8+0x34]); + } } c; const uint8_t tbl[] = { @@ -1795,6 +1801,12 @@ CYBOZU_TEST_AUTO(aeskl) // aesdec256kl 0xf3, 0x44, 0x0f, 0x38, 0xdf, 0x7c, 0x88, 0x12, 0x62, 0x1c, 0x7a, 0x08, 0xdf, 0x7c, 0xee, 0x34, + // aesdecwide128kl + 0xf3, 0x0f, 0x38, 0xd8, 0x4c, 0x88, 0x12, + 0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x4c, 0xee, 0x34, 0xf3, + // aesdecwide256kl + 0x0f, 0x38, 0xd8, 0x5c, 0x88, 0x12, + 0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x5c, 0xee, 0x34, }; const size_t n = sizeof(tbl); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 2ea7699e..915f938d 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1928,6 +1928,8 @@ void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); } void aesdec128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDD); } void aesdec256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDF); } +void aesdecwide128kl(const Address& addr) { opAESKL(&xmm1, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); } +void aesdecwide256kl(const Address& addr) { opAESKL(&xmm3, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); } void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); } void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); } void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }