Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Oct 13, 2024
2 parents b597cc4 + 7c54020 commit d067f0d
Show file tree
Hide file tree
Showing 23 changed files with 1,699 additions and 113 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ jobs:
steps:
- uses: actions/checkout@v4
- run: apt -y update
- run: apt -y install g++-multilib libboost-dev make nasm yasm
- run: apt -y install g++-multilib libboost-dev make nasm yasm wget xz-utils python3
- run: make test
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
- run: |
cd test
wget https://downloadmirror.intel.com/831748/sde-external-9.44.0-2024-08-22-lin.tar.xz
tar xvf sde-external-9.44.0-2024-08-22-lin.tar.xz
env XED=sde-external-9.44.0-2024-08-22-lin/xed64 make xed_test
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
/build* # cmake
*CVS
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)

project(xbyak LANGUAGES CXX VERSION 7.09.1)
project(xbyak LANGUAGES CXX VERSION 7.10)

file(GLOB headers xbyak/*.h)

Expand Down
1 change: 1 addition & 0 deletions doc/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# History

* 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended.
* 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw
* 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some mnemonics with {sae}/{er}.
* 2024/Oct/07 ver 7.08 support rdfsbase etc.
Expand Down
33 changes: 26 additions & 7 deletions doc/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,37 @@ vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]);
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
```

## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8 etc.
Some mnemonics have two types of encodings: VEX and EVEX.
The functions for these mnemonics include an optional parameter as the last argument to specify the encoding.
The default behavior depends on the order in which the instruction was introduced (whether VEX or EVEX came first),
and can be specified using setDefaultEncoding.

vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
```
vpdpbusd(xm0, xm1, xm2); // default encoding: EVEX (AVX512-VNNI)
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX (AVX-VNNI)
setDefaultEncoding(VexEncoding); // default encoding is VEX
vpdpbusd(xm0, xm1, xm2); // VEX encoding
vpdpbusd(xm0, xm1, xm2); // VEX
vmpsadbw(xm1, xm3, xm15, 3); // default encoding: VEX (AVX-VNNI)
vmpsadbw(xm1, xm3, xm15, 3, VexEncoding); // same as the above
vmpsadbw(xm1, xm3, xm15, 3, EvexEncoding); // EVEX (AVX10.2)
setDefaultEncoding(VexEncoding, EvexEncoding); // use 2nd argument.
vmpsadbw(xm1, xm3, xm15, 3); // EVEX
```

- setDefaultEncoding(PreferredEncoding encoding);
- Set the default encoding to select EVEX or VEX.
- The default value is EvexEncoding.
- This function affects only an instruction that has a PreferredEncoding argument such as vpdpbusd.
- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding)`
Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param.

param|vnniEnc|avx10Enc
-|-|-
EvexEncoding|AVX512-VNNI|AVX10.2
VexEncoding|AVX-VNNI|AVX-VNNI-INT8
default|EvexEncoding|VexEncoding
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds

### Remark
* `k1`, ..., `k7` are opmask registers.
Expand Down
137 changes: 135 additions & 2 deletions gen/gen_avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,6 @@ void putVcmp()
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }");
puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }");
}

void putVcmpAlias()
Expand Down Expand Up @@ -198,6 +196,19 @@ void putX_XM()
{ 0x7C, "vcvttph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
{ 0x7D, "vcvtuw2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
{ 0x7D, "vcvtw2ph", T_F3 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },

{ 0x51, "vsqrtnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16 },

{ 0x2F, "vcomish", T_MUST_EVEX | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
{ 0x2E, "vucomish", T_MUST_EVEX | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },

{ 0x2F, "vcomxsd", T_MUST_EVEX | T_F3 | T_0F | T_EW1 | T_SAE_X | T_N8 },
{ 0x2F, "vcomxsh", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
{ 0x2F, "vcomxss", T_MUST_EVEX | T_F2 | T_0F | T_EW0 | T_SAE_X | T_N4 },

{ 0x2E, "vucomxsd", T_MUST_EVEX | T_F3 | T_0F | T_EW1 | T_SAE_X | T_N8 },
{ 0x2E, "vucomxsh", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
{ 0x2E, "vucomxss", T_MUST_EVEX | T_F2 | T_0F | T_EW0 | T_SAE_X | T_N4 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
Expand Down Expand Up @@ -316,6 +327,9 @@ void putX_X_XM_IMM()
{ 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },

{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },

{ 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
{ 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },

Expand Down Expand Up @@ -401,6 +415,38 @@ void putX_X_XM_IMM()
{ 0x5A, "vcvtsh2sd", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
{ 0x13, "vcvtsh2ss", T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
{ 0x1D, "vcvtss2sh", T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },

{ 0x58, "vaddnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
{ 0x5E, "vdivnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
{ 0x5F, "vmaxpbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
{ 0x5D, "vminpbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
{ 0x59, "vmulnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
{ 0x2C, "vscalefpbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16,false },
{ 0x5C, "vsubnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },

{ 0x98, "vfmadd132nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
{ 0xA8, "vfmadd213nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
{ 0xB8, "vfmadd231nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },

{ 0x9C, "vfnmadd132nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
{ 0xAC, "vfnmadd213nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
{ 0xBC, "vfnmadd231nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },

{ 0x9A, "vfmsub132nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
{ 0xAA, "vfmsub213nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
{ 0xBA, "vfmsub231nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },

{ 0x9E, "vfnmsub132nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
{ 0xAE, "vfnmsub213nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
{ 0xBE, "vfnmsub231nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },

{ 0x67, "vcvt2ps2phx", T_MUST_EVEX | T_66 | T_0F38 | T_EW0 | T_YMM | T_B32 | T_ER_Y | T_ER_Z, false },
{ 0x74, "vcvtne2ph2bf8", T_MUST_EVEX | T_F2 | T_0F38 | T_EW0 | T_YMM | T_B16 | T_N1, false },
{ 0x74, "vcvtne2ph2bf8s", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_B16 | T_N1, false },
{ 0x18, "vcvtne2ph2hf8", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_B16 | T_N1, false },
{ 0x1B, "vcvtne2ph2hf8s", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_B16 | T_N1, false },

{ 0x52, "vdpphps", T_MUST_EVEX | T_0F38 | T_EW0 | T_YMM | T_B32, false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
Expand All @@ -410,6 +456,45 @@ void putX_X_XM_IMM()
}
}

void putX_X_XM_IMM_AVX10()
{
const struct Tbl {
uint8_t code;
const char *name;
uint64_t type;
uint64_t typeVex;
uint64_t typeEvex;
int sel;
bool hasIMM;
} tbl[] = {
// vpdpb[su,uu,ss]d[,s]
{ 0x50, "vpdpbssd", T_F2|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
{ 0x51, "vpdpbssds", T_F2|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
{ 0x50, "vpdpbsud", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
{ 0x51, "vpdpbsuds", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
{ 0x50, "vpdpbuud", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
{ 0x51, "vpdpbuuds", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },

// vpdpw[su,us,uu]d[,s]
{ 0xD2, "vpdpwsud", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
{ 0xD3, "vpdpwsuds", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
{ 0xD2, "vpdpwusd", T_66|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
{ 0xD3, "vpdpwusds", T_66|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
{ 0xD2, "vpdpwuud", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
{ 0xD3, "vpdpwuuds", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },

{ 0x42, "vmpsadbw", T_0F3A|T_YMM, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string s = type2String(p->type);
std::string sVex = type2String(p->typeVex);
std::string sEvex = type2String(p->typeEvex);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding, %s, %s, %s, %d); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? "imm" : "NONE", sVex.c_str(), sEvex.c_str(), p->sel);
}
}

void putShift()
{
const struct Tbl {
Expand Down Expand Up @@ -571,6 +656,8 @@ void putCvt()

{ 0x2A, "vcvtsi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 },
{ 0x7B, "vcvtusi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 },

{ 0x72, "vcvtneps2bf16", T_MUST_EVEX | T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 2 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
Expand Down Expand Up @@ -758,6 +845,15 @@ void putX_XM_IMM()

{ 0x62, "vpexpandb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N1, false },
{ 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false },

{ 0x2F, "vcomsbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_N2, false },
{ 0x42, "vgetexppbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
{ 0x26, "vgetmantpbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
{ 0x4C, "vrcppbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
{ 0x56, "vreducenepbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
{ 0x08, "vrndscalenepbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
{ 0x4E, "vrsqrtpbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
{ 0x2C, "vscalefpbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
Expand Down Expand Up @@ -952,6 +1048,41 @@ void putFP16()
putFP16_2();
}

void putAVX10_2()
{
puts("void vcmppbf16(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opVex(k, &x, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0xC2, imm); }");
puts("void vfpclasspbf16(const Opmask& k, const Operand& op, uint8_t imm) { opVex(k.changeBit(op.getBit()), 0, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0x66, imm); }");

const struct Tbl {
uint8_t code;
const char *name;
uint64_t type;
} tbl1[] = {
{ 0x74, "vcvtbiasph2bf8", T_MUST_EVEX | T_0F38 | T_EW0 |T_YMM | T_B16 },
{ 0x74, "vcvtbiasph2bf8s", T_MUST_EVEX | T_MAP5 | T_EW0 |T_YMM | T_B16 },
{ 0x18, "vcvtbiasph2hf8", T_MUST_EVEX | T_MAP5 | T_EW0 | T_YMM | T_B16 },
{ 0x1B, "vcvtbiasph2hf8s", T_MUST_EVEX | T_MAP5 | T_EW0 | T_YMM | T_B16 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl1); i++) {
const Tbl *p = &tbl1[i];
std::string s = type2String(p->type);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, %s, 0x%02X); }\n" , p->name, s.c_str(), p->code);
}
puts("void vcvthf82ph(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_N1, 0x1E); }");

const Tbl tbl2[] = {
{ 0x74, "vcvtneph2bf8", T_MUST_EVEX | T_F3 | T_0F38 | T_EW0 | T_YMM | T_B16 },
{ 0x74, "vcvtneph2bf8s", T_MUST_EVEX | T_F3 |T_MAP5 | T_EW0 | T_YMM | T_B16 },
{ 0x18, "vcvtneph2hf8", T_MUST_EVEX | T_F3 |T_MAP5 | T_EW0 | T_YMM | T_B16 },
{ 0x1B, "vcvtneph2hf8s", T_MUST_EVEX | T_F3 |T_MAP5 | T_EW0 | T_YMM | T_B16 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl2); i++) {
const Tbl *p = &tbl2[i];
std::string s = type2String(p->type);
printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n" , p->name, s.c_str(), p->code);
}
}

int main(int argc, char *[])
{
bool only64bit = argc == 2;
Expand All @@ -966,6 +1097,7 @@ int main(int argc, char *[])
putM_X();
putXM_X();
putX_X_XM_IMM();
putX_X_XM_IMM_AVX10();
putShift();
putExtractInsert();
putCvt();
Expand All @@ -977,4 +1109,5 @@ int main(int argc, char *[])
putScatter();
putV4FMA();
putFP16();
putAVX10_2();
}
33 changes: 16 additions & 17 deletions gen/gen_code.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void putX_X_XM(bool omitOnly)
{ 0x0C, "blendps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
{ 0x41, "dppd", T_0F3A | T_66 | T_W0, true, true, 3 },
{ 0x40, "dpps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
{ 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
{ 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 1 },
{ 0x0E, "pblendw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 },
{ 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 },
{ 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 },
Expand Down Expand Up @@ -1802,7 +1802,6 @@ void put()
const Tbl& p = tbl[i];
printf("void %s(const Xmm& x, const Address& addr) { opVex(x, 0, addr, %s, 0x%02X); }\n", p.name, type2String(p.type).c_str(), p.code);
}
printf("void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, %s|orEvexIf(encoding), 0x72); }\n", type2String(T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32).c_str());
}
// haswell gpr(reg, reg, r/m)
{
Expand Down Expand Up @@ -1893,8 +1892,6 @@ void put()
{ 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
{ 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
{ 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_EW1 | T_B64 },
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_EW1 | T_B64 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
Expand All @@ -1904,32 +1901,34 @@ void put()
}
// avx-vnni-int8
// avx-vnni-int16
#if 0
{
const struct Tbl {
uint8_t code;
const char *name;
uint64_t type;
} tbl[] = {
{ 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM },
{ 0x51, "vpdpbssds", T_F2 | T_0F38 | T_W0 | T_YMM },
{ 0x50, "vpdpbsud", T_F3 | T_0F38 | T_W0 | T_YMM },
{ 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
{ 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
{ 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },

{ 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM },
{ 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
{ 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM },
{ 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM },
{ 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM },
{ 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM },
// { 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM },
// { 0x51, "vpdpbssds", T_F2 | T_0F38 | T_W0 | T_YMM },
// { 0x50, "vpdpbsud", T_F3 | T_0F38 | T_W0 | T_YMM },
// { 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
// { 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
// { 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },

// { 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM },
// { 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
// { 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM },
// { 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM },
// { 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM },
// { 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string s = type2String(p->type);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
#endif
}

void put32()
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
version: '7.09.1',
version: '7.10',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
Expand Down
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

# Xbyak 7.09.1 [![Badge Build]][Build Status]
# Xbyak 7.10 [![Badge Build]][Build Status]

*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*

Expand Down
2 changes: 1 addition & 1 deletion readme.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.09.1
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.10

-----------------------------------------------------------------------------
◎概要
Expand Down
4 changes: 2 additions & 2 deletions test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ apx: apx.cpp $(XBYAK_INC)
avx10_test: avx10_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64

TEST_FILES=avx10.txt misc.txt
TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt convert.txt
xed_test:
@for target in $(addprefix target/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done
@for target in $(addprefix avx10/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done

test_nm: normalize_prefix $(TARGET)
$(MAKE) -C ../gen
Expand Down
Loading

0 comments on commit d067f0d

Please sign in to comment.