-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][MC][True16] Support VOP2 instructions with true16 format #115233
base: main
Are you sure you want to change the base?
[AMDGPU][MC][True16] Support VOP2 instructions with true16 format #115233
Conversation
595efc8
to
902962a
Compare
@llvm/pr-subscribers-mc Author: Brox Chen (broxigarchen) ChangesSupport true16 format for VOP2 instructions in MC This patch updates the true16 and fake16 vop_profile for the following instructions and update the asm/dasm tests: Patch is 101.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115233.diff 17 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fdef9865b82c06..f4eec66fbb6157 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -345,6 +345,25 @@ static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
(AMDGPU::OperandSemantics)OperandSemantics));
}
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+ unsigned OperandSemantics>
+static DecodeStatus
+decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm,
+ uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ assert(isUInt<9>(Imm) && "9-bit encoding expected");
+
+ if (Imm & AMDGPU::EncValues::IS_VGPR) {
+ bool IsHi = Imm & (1 << 7);
+ unsigned RegIdx = Imm & 0x7f;
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+ }
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
+ OpWidth, Imm & 0xFF, true, ImmWidth,
+ (AMDGPU::OperandSemantics)OperandSemantics));
+}
+
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
unsigned OperandSemantics>
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 852430129251c6..49be750acab960 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1161,7 +1161,8 @@ def SSrcOrLds_b32 : SrcRegOrImm9 <SRegOrLds_32, "OPW32", "OPERAND_REG_IMM_INT32"
class SrcRegOrImmDeferred9<RegisterClass regClass, string opWidth,
string operandType, int immWidth, int OperandSemantics>
: RegOrImmOperand<regClass, operandType> {
- let DecoderMethod = "decodeSrcRegOrImmDeferred9<AMDGPUDisassembler::" #
+ string DecoderMethodName = "decodeSrcRegOrImmDeferred9";
+ let DecoderMethod = DecoderMethodName # "<AMDGPUDisassembler::" #
opWidth # ", " # immWidth # ", " # OperandSemantics # ">";
}
@@ -1222,6 +1223,13 @@ def VSrc_bf16_Deferred : SrcRegOrImmDeferred9<VS_32, "OPW16", "OPERAND_REG_IMM_B
def VSrc_f16_Deferred : SrcRegOrImmDeferred9<VS_32, "OPW16", "OPERAND_REG_IMM_FP16_DEFERRED", 16, OperandSemantics.FP16>;
def VSrc_f32_Deferred : SrcRegOrImmDeferred9<VS_32, "OPW32", "OPERAND_REG_IMM_FP32_DEFERRED", 32, OperandSemantics.FP32>;
+// True 16 Operands
+def VSrcT_f16_Lo128_Deferred : SrcRegOrImmDeferred9<VS_16_Lo128, "OPW16",
+ "OPERAND_REG_IMM_FP16_DEFERRED", 16, OperandSemantics.FP16> {
+ let DecoderMethodName = "decodeOperand_VSrcT16_Lo128_Deferred";
+ let EncoderMethod = "getMachineOpValueT16Lo128";
+}
+
def VSrcFake16_bf16_Lo128_Deferred
: SrcRegOrImmDeferred9<VS_32_Lo128, "OPW16", "OPERAND_REG_IMM_BF16_DEFERRED", 16, OperandSemantics.BF16>;
def VSrcFake16_f16_Lo128_Deferred
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 925b60561c9d68..df41e3ceefe9e1 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -374,6 +374,12 @@ class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
}
def VOP_MADAK_F16 : VOP_MADAK <f16>;
+def VOP_MADAK_F16_t16 : VOP_MADAK <f16> {
+ let IsTrue16 = 1;
+ let IsRealTrue16 = 1;
+ let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
+ let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm);
+}
def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
@@ -399,6 +405,12 @@ class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
}
def VOP_MADMK_F16 : VOP_MADMK <f16>;
+def VOP_MADMK_F16_t16 : VOP_MADMK <f16> {
+ let IsTrue16 = 1;
+ let IsRealTrue16 = 1;
+ let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
+ let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1);
+}
def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
@@ -467,6 +479,42 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
}
def VOP_MAC_F16 : VOP_MAC <f16>;
+def VOP_MAC_F16_t16 : VOP_MAC <f16> {
+ let IsTrue16 = 1;
+ let IsRealTrue16 = 1;
+ let HasOpSel = 1;
+ let DstRC = VOPDstOperand_t16Lo128;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2);
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
+ let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+ getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument
+ dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
+ DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
+ let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+ getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument
+ dpp8:$dpp8, Dpp8FI:$fi);
+ let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue*/, 1/*IsVOP3Encoding*/>.ret;
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_16;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+}
def VOP_MAC_F16_fake16 : VOP_MAC <f16> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
@@ -998,6 +1046,9 @@ let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in {
let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in {
def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
}
+let True16Predicate = UseRealTrue16Insts in {
+ def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">;
+}
let True16Predicate = UseFakeTrue16Insts in {
def V_FMAMK_F16_fake16 : VOP2_Pseudo <"v_fmamk_f16_fake16", VOP_MADMK_F16_fake16, [], "">;
}
@@ -1006,6 +1057,9 @@ let isCommutable = 1 in {
let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in {
def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">;
}
+let True16Predicate = UseRealTrue16Insts in {
+ def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">;
+}
let True16Predicate = UseFakeTrue16Insts in {
def V_FMAAK_F16_fake16 : VOP2_Pseudo <"v_fmaak_f16_fake16", VOP_MADAK_F16_fake16, [], "">;
}
@@ -1020,6 +1074,9 @@ let SubtargetPredicate = isGFX10Plus in {
let True16Predicate = NotHasTrue16BitInsts in {
defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>;
}
+let True16Predicate = UseRealTrue16Insts in {
+ defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>;
+}
let True16Predicate = UseFakeTrue16Insts in {
defm V_FMAC_F16_fake16 : VOP2Inst <"v_fmac_f16_fake16", VOP_MAC_F16_fake16>;
}
@@ -1692,8 +1749,8 @@ multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, stri
VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", /*IsSingle*/1>;
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> {
- defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
- defm OpName#"_fake16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_fake16">;
+ defm _t16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
+ defm _fake16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_fake16">;
}
multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> :
@@ -1712,7 +1769,14 @@ multiclass VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmN
multiclass VOP2_Real_FULL_t16_gfx11_gfx12<bits<6> op, string asmName,
string opName = NAME> :
- VOP2_Real_FULL_with_name_gfx11_gfx12<op, opName, asmName>;
+ VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
+ VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmName,
+ string opName = NAME> {
+ defm _t16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_t16">;
+ defm _fake16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_fake16">;
+}
multiclass VOP2_Real_FULL_gfx11<bits<6> op> :
VOP2_Real_FULL<GFX11Gen, op>;
@@ -1747,15 +1811,15 @@ defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16
defm V_SUBREV_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">;
defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
-defm V_FMAC_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x036, "v_fmac_f16">;
+defm V_FMAC_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x036, "v_fmac_f16">;
defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
defm V_LDEXP_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
-defm V_FMAMK_F16_fake16 : VOP2Only_Real_MADK_t16_gfx11_gfx12<0x037, "v_fmamk_f16">;
-defm V_FMAAK_F16_fake16 : VOP2Only_Real_MADK_t16_gfx11_gfx12<0x038, "v_fmaak_f16">;
+defm V_FMAMK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037, "v_fmamk_f16">;
+defm V_FMAAK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">;
// VOP3 only.
defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
index 2a4b3ea2017013..0f2852fc531ed4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
@@ -648,50 +648,56 @@ v_dot2c_f32_f16 v5, src_scc, v2
v_dot2c_f32_f16 v255, 0xfe0b, v255
// GFX11: encoding: [0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, v1, v2, 0xfe0b
+v_fmaak_f16 v5.l, v1.l, v2.l, 0xfe0b
// GFX11: encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, v127, v2, 0xfe0b
+v_fmaak_f16 v5.l, v127.l, v2.l, 0xfe0b
// GFX11: encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, s1, v2, 0xfe0b
+v_fmaak_f16 v5.l, v1.h, v2.l, 0xfe0b
+// GFX11: encoding: [0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+
+v_fmaak_f16 v5.l, v127.h, v2.l, 0xfe0b
+// GFX11: encoding: [0xff,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+
+v_fmaak_f16 v5.l, s1, v2.l, 0xfe0b
// GFX11: encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, s105, v2, 0xfe0b
+v_fmaak_f16 v5.l, s105, v2.l, 0xfe0b
// GFX11: encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b
+v_fmaak_f16 v5.l, vcc_lo, v2.l, 0xfe0b
// GFX11: encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b
+v_fmaak_f16 v5.l, vcc_hi, v2.l, 0xfe0b
// GFX11: encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, ttmp15, v2, 0xfe0b
+v_fmaak_f16 v5.l, ttmp15, v2.l, 0xfe0b
// GFX11: encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, m0, v2, 0xfe0b
+v_fmaak_f16 v5.l, m0, v2.l, 0xfe0b
// GFX11: encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, exec_lo, v2, 0xfe0b
+v_fmaak_f16 v5.l, exec_lo, v2.l, 0xfe0b
// GFX11: encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, exec_hi, v2, 0xfe0b
+v_fmaak_f16 v5.l, exec_hi, v2.l, 0xfe0b
// GFX11: encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, null, v2, 0xfe0b
+v_fmaak_f16 v5.l, null, v2.l, 0xfe0b
// GFX11: encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, -1, v2, 0xfe0b
+v_fmaak_f16 v5.l, -1, v2.l, 0xfe0b
// GFX11: encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, 0.5, v2, 0xfe0b
-// GFX11: encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+v_fmaak_f16 v127.l, 0.5, v127.l, 0xfe0b
+// GFX11: encoding: [0xf0,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, src_scc, v2, 0xfe0b
-// GFX11: encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+v_fmaak_f16 v5.h, src_scc, v2.h, 0xfe0b
+// GFX11: encoding: [0xfd,0x04,0x0b,0x71,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b
-// GFX11: encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+v_fmaak_f16 v127.h, 0xfe0b, v127.h, 0xfe0b
+// GFX11: encoding: [0xff,0xfe,0xff,0x71,0x0b,0xfe,0x00,0x00]
v_fmaak_f32 v5, v1, v2, 0xaf123456
// GFX11: encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf]
@@ -783,50 +789,56 @@ v_fmac_dx9_zero_f32 v5, src_scc, v2
v_fmac_dx9_zero_f32 v255, 0xaf123456, v255
// GFX11: encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf]
-v_fmac_f16 v5, v1, v2
+v_fmac_f16 v5.l, v1.l, v2.l
// GFX11: encoding: [0x01,0x05,0x0a,0x6c]
-v_fmac_f16 v5, v127, v2
+v_fmac_f16 v5.l, v127.l, v2.l
// GFX11: encoding: [0x7f,0x05,0x0a,0x6c]
-v_fmac_f16 v5, s1, v2
+v_fmac_f16 v5.l, v1.h, v2.l
+// GFX11: encoding: [0x81,0x05,0x0a,0x6c]
+
+v_fmac_f16 v5.l, v127.h, v2.l
+// GFX11: encoding: [0xff,0x05,0x0a,0x6c]
+
+v_fmac_f16 v5.l, s1, v2.l
// GFX11: encoding: [0x01,0x04,0x0a,0x6c]
-v_fmac_f16 v5, s105, v2
+v_fmac_f16 v5.l, s105, v2.l
// GFX11: encoding: [0x69,0x04,0x0a,0x6c]
-v_fmac_f16 v5, vcc_lo, v2
+v_fmac_f16 v5.l, vcc_lo, v2.l
// GFX11: encoding: [0x6a,0x04,0x0a,0x6c]
-v_fmac_f16 v5, vcc_hi, v2
+v_fmac_f16 v5.l, vcc_hi, v2.l
// GFX11: encoding: [0x6b,0x04,0x0a,0x6c]
-v_fmac_f16 v5, ttmp15, v2
+v_fmac_f16 v5.l, ttmp15, v2.l
// GFX11: encoding: [0x7b,0x04,0x0a,0x6c]
-v_fmac_f16 v5, m0, v2
+v_fmac_f16 v5.l, m0, v2.l
// GFX11: encoding: [0x7d,0x04,0x0a,0x6c]
-v_fmac_f16 v5, exec_lo, v2
+v_fmac_f16 v5.l, exec_lo, v2.l
// GFX11: encoding: [0x7e,0x04,0x0a,0x6c]
-v_fmac_f16 v5, exec_hi, v2
+v_fmac_f16 v5.l, exec_hi, v2.l
// GFX11: encoding: [0x7f,0x04,0x0a,0x6c]
-v_fmac_f16 v5, null, v2
+v_fmac_f16 v5.l, null, v2.l
// GFX11: encoding: [0x7c,0x04,0x0a,0x6c]
-v_fmac_f16 v5, -1, v2
+v_fmac_f16 v5.l, -1, v2.l
// GFX11: encoding: [0xc1,0x04,0x0a,0x6c]
-v_fmac_f16 v5, 0.5, v2
-// GFX11: encoding: [0xf0,0x04,0x0a,0x6c]
+v_fmac_f16 v127.l, 0.5, v127.l
+// GFX11: encoding: [0xf0,0xfe,0xfe,0x6c]
-v_fmac_f16 v5, src_scc, v2
-// GFX11: encoding: [0xfd,0x04,0x0a,0x6c]
+v_fmac_f16 v5.h, src_scc, v2.h
+// GFX11: encoding: [0xfd,0x04,0x0b,0x6d]
-v_fmac_f16 v127, 0xfe0b, v127
-// GFX11: encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00]
+v_fmac_f16 v127.h, 0xfe0b, v127.h
+// GFX11: encoding: [0xff,0xfe,0xff,0x6d,0x0b,0xfe,0x00,0x00]
v_fmac_f32 v5, v1, v2
// GFX11: encoding: [0x01,0x05,0x0a,0x56]
@@ -918,50 +930,56 @@ v_fmac_legacy_f32 v5, src_scc, v2
v_fmac_legacy_f32 v255, 0xaf123456, v255
// GFX11: encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf]
-v_fmamk_f16 v5, v1, 0xfe0b, v3
+v_fmamk_f16 v5.l, v1.l, 0xfe0b, v3.l
// GFX11: encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, v127, 0xfe0b, v3
+v_fmamk_f16 v5.l, v127.l, 0xfe0b, v3.l
// GFX11: encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, s1, 0xfe0b, v3
+v_fmamk_f16 v5.l, v1.h, 0xfe0b, v3.l
+// GFX11: encoding: [0x81,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
+
+v_fmamk_f16 v5.l, v127.h, 0xfe0b, v3.l
+// GFX11: encoding: [0xff,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
+
+v_fmamk_f16 v5.l, s1, 0xfe0b, v3.l
// GFX11: encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, s105, 0xfe0b, v3
+v_fmamk_f16 v5.l, s105, 0xfe0b, v3.l
// GFX11: encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3
+v_fmamk_f16 v5.l, vcc_lo, 0xfe0b, v3.l
// GFX11: encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3
+v_fmamk_f16 v5.l, vcc_hi, 0xfe0b, v3.l
// GFX11: encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, ttmp15, 0xfe0b, v3
+v_fmamk_f16 v5.l, ttmp15, 0xfe0b, v3.l
// GFX11: encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, m0, 0xfe0b, v3
+v_fmamk_f16 v5.l, m0, 0xfe0b, v3.l
// GFX11: encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, exec_lo, 0xfe0b, v3
+v_fmamk_f16 v5.l, exec_lo, 0xfe0b, v3.l
// GFX11: encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, exec_hi, 0xfe0b, v3
+v_fmamk_f16 v5.l, exec_hi, 0xfe0b, v3.l
// GFX11: encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, null, 0xfe0b, v3
+v_fmamk_f16 v5.l, null, 0xfe0b, v3.l
// GFX11: encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, -1, 0xfe0b, v3
+v_fmamk_f16 v5.l, -1, 0xfe0b, v3.l
// GFX11: encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, 0.5, 0xfe0b, v3
-// GFX11: encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
+v_fmamk_f16 v127.l, 0.5, 0xfe0b, v127.l
+// GFX11: encoding: [0xf0,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, src_scc, 0xfe0b, v3
-// GFX11: encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
+v_fmamk_f16 v5.h, src_scc, 0xfe0b, v3.h
+// GFX11: encoding: [0xfd,0x06,0x0b,0x6f,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127
-// GFX11: encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00]
+v_fmamk_f16 v127.h, 0xfe0b, 0xfe0b, v127.h
+// GFX11: encoding: [0xff,0xfe,0xff,0x6f,0x0b,0xfe,0x00,0x00]
v_fmamk_f32 v5, v1, 0xaf123456, v3
// GFX11: encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s
index 3eff00bb96e475..f40278cb9c42e7 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s
@@ -629,47 +629,47 @@ v_dot2c_f32_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 f
v_dot2c_f32_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: encoding: [0xfa,0xfe,0xff,0x05,0xff,0x6f,0xf5,0x30]
-v_fmac_f16 v5, v1, v2 quad_perm:[3,2,1,0]
+v_fmac_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff]
-v_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3]
+v_fmac_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff]
-v_fmac_f16 v5, v1, v2 row_mirror
+v_fmac_f16 v5.l, v1.l, v2.l row_mirror
// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff]
-v_fmac_f16 v5, v1, v2 row_half_mirror
+v_fmac_f16 v5.l, v1.l, v2.l row_half_mirror
// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff]
-v_fmac_f16 v5, v1, v2 row_shl:1
+v_fmac_f16 v5.l, v1.l, v2.l row_shl:1
// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff]
-v_fmac_f16 v5, v1, v2 row_shl:15
+v_fmac_f16 v5.l, v...
[truncated]
|
@llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) ChangesSupport true16 format for VOP2 instructions in MC This patch updates the true16 and fake16 vop_profile for the following instructions and update the asm/dasm tests: Patch is 101.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115233.diff 17 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fdef9865b82c06..f4eec66fbb6157 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -345,6 +345,25 @@ static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
(AMDGPU::OperandSemantics)OperandSemantics));
}
+template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
+ unsigned OperandSemantics>
+static DecodeStatus
+decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm,
+ uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ assert(isUInt<9>(Imm) && "9-bit encoding expected");
+
+ if (Imm & AMDGPU::EncValues::IS_VGPR) {
+ bool IsHi = Imm & (1 << 7);
+ unsigned RegIdx = Imm & 0x7f;
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+ }
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
+ OpWidth, Imm & 0xFF, true, ImmWidth,
+ (AMDGPU::OperandSemantics)OperandSemantics));
+}
+
template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
unsigned OperandSemantics>
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 852430129251c6..49be750acab960 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1161,7 +1161,8 @@ def SSrcOrLds_b32 : SrcRegOrImm9 <SRegOrLds_32, "OPW32", "OPERAND_REG_IMM_INT32"
class SrcRegOrImmDeferred9<RegisterClass regClass, string opWidth,
string operandType, int immWidth, int OperandSemantics>
: RegOrImmOperand<regClass, operandType> {
- let DecoderMethod = "decodeSrcRegOrImmDeferred9<AMDGPUDisassembler::" #
+ string DecoderMethodName = "decodeSrcRegOrImmDeferred9";
+ let DecoderMethod = DecoderMethodName # "<AMDGPUDisassembler::" #
opWidth # ", " # immWidth # ", " # OperandSemantics # ">";
}
@@ -1222,6 +1223,13 @@ def VSrc_bf16_Deferred : SrcRegOrImmDeferred9<VS_32, "OPW16", "OPERAND_REG_IMM_B
def VSrc_f16_Deferred : SrcRegOrImmDeferred9<VS_32, "OPW16", "OPERAND_REG_IMM_FP16_DEFERRED", 16, OperandSemantics.FP16>;
def VSrc_f32_Deferred : SrcRegOrImmDeferred9<VS_32, "OPW32", "OPERAND_REG_IMM_FP32_DEFERRED", 32, OperandSemantics.FP32>;
+// True 16 Operands
+def VSrcT_f16_Lo128_Deferred : SrcRegOrImmDeferred9<VS_16_Lo128, "OPW16",
+ "OPERAND_REG_IMM_FP16_DEFERRED", 16, OperandSemantics.FP16> {
+ let DecoderMethodName = "decodeOperand_VSrcT16_Lo128_Deferred";
+ let EncoderMethod = "getMachineOpValueT16Lo128";
+}
+
def VSrcFake16_bf16_Lo128_Deferred
: SrcRegOrImmDeferred9<VS_32_Lo128, "OPW16", "OPERAND_REG_IMM_BF16_DEFERRED", 16, OperandSemantics.BF16>;
def VSrcFake16_f16_Lo128_Deferred
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 925b60561c9d68..df41e3ceefe9e1 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -374,6 +374,12 @@ class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
}
def VOP_MADAK_F16 : VOP_MADAK <f16>;
+def VOP_MADAK_F16_t16 : VOP_MADAK <f16> {
+ let IsTrue16 = 1;
+ let IsRealTrue16 = 1;
+ let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
+ let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm);
+}
def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
@@ -399,6 +405,12 @@ class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
}
def VOP_MADMK_F16 : VOP_MADMK <f16>;
+def VOP_MADMK_F16_t16 : VOP_MADMK <f16> {
+ let IsTrue16 = 1;
+ let IsRealTrue16 = 1;
+ let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
+ let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1);
+}
def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
@@ -467,6 +479,42 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
}
def VOP_MAC_F16 : VOP_MAC <f16>;
+def VOP_MAC_F16_t16 : VOP_MAC <f16> {
+ let IsTrue16 = 1;
+ let IsRealTrue16 = 1;
+ let HasOpSel = 1;
+ let DstRC = VOPDstOperand_t16Lo128;
+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2);
+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
+ let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+ getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument
+ dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
+ DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
+ let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+ getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument
+ dpp8:$dpp8, Dpp8FI:$fi);
+ let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue*/, 1/*IsVOP3Encoding*/>.ret;
+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
+ let Src0VOP3DPP = VGPRSrc_16;
+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
+ let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
+}
def VOP_MAC_F16_fake16 : VOP_MAC <f16> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
@@ -998,6 +1046,9 @@ let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in {
let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in {
def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
}
+let True16Predicate = UseRealTrue16Insts in {
+ def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">;
+}
let True16Predicate = UseFakeTrue16Insts in {
def V_FMAMK_F16_fake16 : VOP2_Pseudo <"v_fmamk_f16_fake16", VOP_MADMK_F16_fake16, [], "">;
}
@@ -1006,6 +1057,9 @@ let isCommutable = 1 in {
let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in {
def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">;
}
+let True16Predicate = UseRealTrue16Insts in {
+ def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">;
+}
let True16Predicate = UseFakeTrue16Insts in {
def V_FMAAK_F16_fake16 : VOP2_Pseudo <"v_fmaak_f16_fake16", VOP_MADAK_F16_fake16, [], "">;
}
@@ -1020,6 +1074,9 @@ let SubtargetPredicate = isGFX10Plus in {
let True16Predicate = NotHasTrue16BitInsts in {
defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>;
}
+let True16Predicate = UseRealTrue16Insts in {
+ defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>;
+}
let True16Predicate = UseFakeTrue16Insts in {
defm V_FMAC_F16_fake16 : VOP2Inst <"v_fmac_f16_fake16", VOP_MAC_F16_fake16>;
}
@@ -1692,8 +1749,8 @@ multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, stri
VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", /*IsSingle*/1>;
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> {
- defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
- defm OpName#"_fake16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_fake16">;
+ defm _t16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
+ defm _fake16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_fake16">;
}
multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> :
@@ -1712,7 +1769,14 @@ multiclass VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmN
multiclass VOP2_Real_FULL_t16_gfx11_gfx12<bits<6> op, string asmName,
string opName = NAME> :
- VOP2_Real_FULL_with_name_gfx11_gfx12<op, opName, asmName>;
+ VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
+ VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
+
+multiclass VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmName,
+ string opName = NAME> {
+ defm _t16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_t16">;
+ defm _fake16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_fake16">;
+}
multiclass VOP2_Real_FULL_gfx11<bits<6> op> :
VOP2_Real_FULL<GFX11Gen, op>;
@@ -1747,15 +1811,15 @@ defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16
defm V_SUBREV_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">;
defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
-defm V_FMAC_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x036, "v_fmac_f16">;
+defm V_FMAC_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x036, "v_fmac_f16">;
defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
defm V_LDEXP_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
-defm V_FMAMK_F16_fake16 : VOP2Only_Real_MADK_t16_gfx11_gfx12<0x037, "v_fmamk_f16">;
-defm V_FMAAK_F16_fake16 : VOP2Only_Real_MADK_t16_gfx11_gfx12<0x038, "v_fmaak_f16">;
+defm V_FMAMK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037, "v_fmamk_f16">;
+defm V_FMAAK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">;
// VOP3 only.
defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
index 2a4b3ea2017013..0f2852fc531ed4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s
@@ -648,50 +648,56 @@ v_dot2c_f32_f16 v5, src_scc, v2
v_dot2c_f32_f16 v255, 0xfe0b, v255
// GFX11: encoding: [0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, v1, v2, 0xfe0b
+v_fmaak_f16 v5.l, v1.l, v2.l, 0xfe0b
// GFX11: encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, v127, v2, 0xfe0b
+v_fmaak_f16 v5.l, v127.l, v2.l, 0xfe0b
// GFX11: encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, s1, v2, 0xfe0b
+v_fmaak_f16 v5.l, v1.h, v2.l, 0xfe0b
+// GFX11: encoding: [0x81,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+
+v_fmaak_f16 v5.l, v127.h, v2.l, 0xfe0b
+// GFX11: encoding: [0xff,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+
+v_fmaak_f16 v5.l, s1, v2.l, 0xfe0b
// GFX11: encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, s105, v2, 0xfe0b
+v_fmaak_f16 v5.l, s105, v2.l, 0xfe0b
// GFX11: encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b
+v_fmaak_f16 v5.l, vcc_lo, v2.l, 0xfe0b
// GFX11: encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b
+v_fmaak_f16 v5.l, vcc_hi, v2.l, 0xfe0b
// GFX11: encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, ttmp15, v2, 0xfe0b
+v_fmaak_f16 v5.l, ttmp15, v2.l, 0xfe0b
// GFX11: encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, m0, v2, 0xfe0b
+v_fmaak_f16 v5.l, m0, v2.l, 0xfe0b
// GFX11: encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, exec_lo, v2, 0xfe0b
+v_fmaak_f16 v5.l, exec_lo, v2.l, 0xfe0b
// GFX11: encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, exec_hi, v2, 0xfe0b
+v_fmaak_f16 v5.l, exec_hi, v2.l, 0xfe0b
// GFX11: encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, null, v2, 0xfe0b
+v_fmaak_f16 v5.l, null, v2.l, 0xfe0b
// GFX11: encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, -1, v2, 0xfe0b
+v_fmaak_f16 v5.l, -1, v2.l, 0xfe0b
// GFX11: encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, 0.5, v2, 0xfe0b
-// GFX11: encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+v_fmaak_f16 v127.l, 0.5, v127.l, 0xfe0b
+// GFX11: encoding: [0xf0,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v5, src_scc, v2, 0xfe0b
-// GFX11: encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00]
+v_fmaak_f16 v5.h, src_scc, v2.h, 0xfe0b
+// GFX11: encoding: [0xfd,0x04,0x0b,0x71,0x0b,0xfe,0x00,0x00]
-v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b
-// GFX11: encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00]
+v_fmaak_f16 v127.h, 0xfe0b, v127.h, 0xfe0b
+// GFX11: encoding: [0xff,0xfe,0xff,0x71,0x0b,0xfe,0x00,0x00]
v_fmaak_f32 v5, v1, v2, 0xaf123456
// GFX11: encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf]
@@ -783,50 +789,56 @@ v_fmac_dx9_zero_f32 v5, src_scc, v2
v_fmac_dx9_zero_f32 v255, 0xaf123456, v255
// GFX11: encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf]
-v_fmac_f16 v5, v1, v2
+v_fmac_f16 v5.l, v1.l, v2.l
// GFX11: encoding: [0x01,0x05,0x0a,0x6c]
-v_fmac_f16 v5, v127, v2
+v_fmac_f16 v5.l, v127.l, v2.l
// GFX11: encoding: [0x7f,0x05,0x0a,0x6c]
-v_fmac_f16 v5, s1, v2
+v_fmac_f16 v5.l, v1.h, v2.l
+// GFX11: encoding: [0x81,0x05,0x0a,0x6c]
+
+v_fmac_f16 v5.l, v127.h, v2.l
+// GFX11: encoding: [0xff,0x05,0x0a,0x6c]
+
+v_fmac_f16 v5.l, s1, v2.l
// GFX11: encoding: [0x01,0x04,0x0a,0x6c]
-v_fmac_f16 v5, s105, v2
+v_fmac_f16 v5.l, s105, v2.l
// GFX11: encoding: [0x69,0x04,0x0a,0x6c]
-v_fmac_f16 v5, vcc_lo, v2
+v_fmac_f16 v5.l, vcc_lo, v2.l
// GFX11: encoding: [0x6a,0x04,0x0a,0x6c]
-v_fmac_f16 v5, vcc_hi, v2
+v_fmac_f16 v5.l, vcc_hi, v2.l
// GFX11: encoding: [0x6b,0x04,0x0a,0x6c]
-v_fmac_f16 v5, ttmp15, v2
+v_fmac_f16 v5.l, ttmp15, v2.l
// GFX11: encoding: [0x7b,0x04,0x0a,0x6c]
-v_fmac_f16 v5, m0, v2
+v_fmac_f16 v5.l, m0, v2.l
// GFX11: encoding: [0x7d,0x04,0x0a,0x6c]
-v_fmac_f16 v5, exec_lo, v2
+v_fmac_f16 v5.l, exec_lo, v2.l
// GFX11: encoding: [0x7e,0x04,0x0a,0x6c]
-v_fmac_f16 v5, exec_hi, v2
+v_fmac_f16 v5.l, exec_hi, v2.l
// GFX11: encoding: [0x7f,0x04,0x0a,0x6c]
-v_fmac_f16 v5, null, v2
+v_fmac_f16 v5.l, null, v2.l
// GFX11: encoding: [0x7c,0x04,0x0a,0x6c]
-v_fmac_f16 v5, -1, v2
+v_fmac_f16 v5.l, -1, v2.l
// GFX11: encoding: [0xc1,0x04,0x0a,0x6c]
-v_fmac_f16 v5, 0.5, v2
-// GFX11: encoding: [0xf0,0x04,0x0a,0x6c]
+v_fmac_f16 v127.l, 0.5, v127.l
+// GFX11: encoding: [0xf0,0xfe,0xfe,0x6c]
-v_fmac_f16 v5, src_scc, v2
-// GFX11: encoding: [0xfd,0x04,0x0a,0x6c]
+v_fmac_f16 v5.h, src_scc, v2.h
+// GFX11: encoding: [0xfd,0x04,0x0b,0x6d]
-v_fmac_f16 v127, 0xfe0b, v127
-// GFX11: encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00]
+v_fmac_f16 v127.h, 0xfe0b, v127.h
+// GFX11: encoding: [0xff,0xfe,0xff,0x6d,0x0b,0xfe,0x00,0x00]
v_fmac_f32 v5, v1, v2
// GFX11: encoding: [0x01,0x05,0x0a,0x56]
@@ -918,50 +930,56 @@ v_fmac_legacy_f32 v5, src_scc, v2
v_fmac_legacy_f32 v255, 0xaf123456, v255
// GFX11: encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf]
-v_fmamk_f16 v5, v1, 0xfe0b, v3
+v_fmamk_f16 v5.l, v1.l, 0xfe0b, v3.l
// GFX11: encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, v127, 0xfe0b, v3
+v_fmamk_f16 v5.l, v127.l, 0xfe0b, v3.l
// GFX11: encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, s1, 0xfe0b, v3
+v_fmamk_f16 v5.l, v1.h, 0xfe0b, v3.l
+// GFX11: encoding: [0x81,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
+
+v_fmamk_f16 v5.l, v127.h, 0xfe0b, v3.l
+// GFX11: encoding: [0xff,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
+
+v_fmamk_f16 v5.l, s1, 0xfe0b, v3.l
// GFX11: encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, s105, 0xfe0b, v3
+v_fmamk_f16 v5.l, s105, 0xfe0b, v3.l
// GFX11: encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3
+v_fmamk_f16 v5.l, vcc_lo, 0xfe0b, v3.l
// GFX11: encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3
+v_fmamk_f16 v5.l, vcc_hi, 0xfe0b, v3.l
// GFX11: encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, ttmp15, 0xfe0b, v3
+v_fmamk_f16 v5.l, ttmp15, 0xfe0b, v3.l
// GFX11: encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, m0, 0xfe0b, v3
+v_fmamk_f16 v5.l, m0, 0xfe0b, v3.l
// GFX11: encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, exec_lo, 0xfe0b, v3
+v_fmamk_f16 v5.l, exec_lo, 0xfe0b, v3.l
// GFX11: encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, exec_hi, 0xfe0b, v3
+v_fmamk_f16 v5.l, exec_hi, 0xfe0b, v3.l
// GFX11: encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, null, 0xfe0b, v3
+v_fmamk_f16 v5.l, null, 0xfe0b, v3.l
// GFX11: encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, -1, 0xfe0b, v3
+v_fmamk_f16 v5.l, -1, 0xfe0b, v3.l
// GFX11: encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, 0.5, 0xfe0b, v3
-// GFX11: encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
+v_fmamk_f16 v127.l, 0.5, 0xfe0b, v127.l
+// GFX11: encoding: [0xf0,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v5, src_scc, 0xfe0b, v3
-// GFX11: encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00]
+v_fmamk_f16 v5.h, src_scc, 0xfe0b, v3.h
+// GFX11: encoding: [0xfd,0x06,0x0b,0x6f,0x0b,0xfe,0x00,0x00]
-v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127
-// GFX11: encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00]
+v_fmamk_f16 v127.h, 0xfe0b, 0xfe0b, v127.h
+// GFX11: encoding: [0xff,0xfe,0xff,0x6f,0x0b,0xfe,0x00,0x00]
v_fmamk_f32 v5, v1, 0xaf123456, v3
// GFX11: encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s
index 3eff00bb96e475..f40278cb9c42e7 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s
@@ -629,47 +629,47 @@ v_dot2c_f32_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 f
v_dot2c_f32_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: encoding: [0xfa,0xfe,0xff,0x05,0xff,0x6f,0xf5,0x30]
-v_fmac_f16 v5, v1, v2 quad_perm:[3,2,1,0]
+v_fmac_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff]
-v_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3]
+v_fmac_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff]
-v_fmac_f16 v5, v1, v2 row_mirror
+v_fmac_f16 v5.l, v1.l, v2.l row_mirror
// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff]
-v_fmac_f16 v5, v1, v2 row_half_mirror
+v_fmac_f16 v5.l, v1.l, v2.l row_half_mirror
// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff]
-v_fmac_f16 v5, v1, v2 row_shl:1
+v_fmac_f16 v5.l, v1.l, v2.l row_shl:1
// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff]
-v_fmac_f16 v5, v1, v2 row_shl:15
+v_fmac_f16 v5.l, v...
[truncated]
|
@@ -467,6 +479,42 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v | |||
} | |||
|
|||
def VOP_MAC_F16 : VOP_MAC <f16>; | |||
def VOP_MAC_F16_t16 : VOP_MAC <f16> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a lot of copy paste. Is there a way to avoid replicating so many lets?
Support true16 format for VOP2 instructions in MC
This patch updates the true16 and fake16 vop_profile for the following instructions and update the asm/dasm tests:
v_fmac_f16
v_fmamk_f16
v_fmaak_f16
It seems vop2_t16_promote.s files are not yet updated with true16 flag in the previous batch update. It will be updated seperately