Skip to content

Commit

Permalink
[RISCV] Remove {s,u}int_to_fp custom op action for f16/bf16 (#111471)
Browse files Browse the repository at this point in the history
It turns out that {s,u}int_to_fp nodes get their operation action from
their operand's type, not the result type, so we don't need to set it
for fp16 or bf16. vp_{s,u}int_to_fp uses the result type though so we
need to keep it.

This also means that we can lower int_to_fp for fixed length bf16
vectors already, so this adds tests for that.

The cost model test changes are due to BasicTTIImpl's getCastInstrCost
not taking into account that int_to_fp needs its legal type swapped.
This can be fixed in a later patch, but its worth noting that the
affected types in the tests currently crash when lowered anyway (due to
them needing split at LMUL > 8)
  • Loading branch information
lukel97 authored Oct 10, 2024
1 parent 6779376 commit a3cd269
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 18 deletions.
9 changes: 3 additions & 6 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,9 +1071,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::VP_SINT_TO_FP,
ISD::VP_UINT_TO_FP},
VT, Custom);
setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
Expand Down Expand Up @@ -1343,9 +1341,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(
{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
Custom);
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
VT, Custom);
setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
if (Subtarget.hasStdExtZfhmin()) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/RISCV/cast-half.ll
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ define void @sitofp() {
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half>
Expand Down Expand Up @@ -988,7 +988,7 @@ define void @sitofp() {
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half>
Expand Down Expand Up @@ -1208,7 +1208,7 @@ define void @uitofp() {
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half>
; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half>
Expand Down Expand Up @@ -1354,7 +1354,7 @@ define void @uitofp() {
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half>
; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half>
Expand Down
66 changes: 62 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64

define void @fp2si_v2f32_v2i32(ptr %x, ptr %y) {
; CHECK-LABEL: fp2si_v2f32_v2i32:
Expand Down Expand Up @@ -432,6 +432,64 @@ define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) {
ret void
}

define void @fp2si_v2bf16_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: fp2si_v2bf16_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v9
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <2 x bfloat>, ptr %x
%d = fptosi <2 x bfloat> %a to <2 x i64>
store <2 x i64> %d, ptr %y
ret void
}

define void @fp2ui_v2bf16_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: fp2ui_v2bf16_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v9
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <2 x bfloat>, ptr %x
%d = fptoui <2 x bfloat> %a to <2 x i64>
store <2 x i64> %d, ptr %y
ret void
}

define <2 x i1> @fp2si_v2bf16_v2i1(<2 x bfloat> %x) {
; CHECK-LABEL: fp2si_v2bf16_v2i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%z = fptosi <2 x bfloat> %x to <2 x i1>
ret <2 x i1> %z
}

define <2 x i1> @fp2ui_v2bf16_v2i1(<2 x bfloat> %x) {
; CHECK-LABEL: fp2ui_v2bf16_v2i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%z = fptoui <2 x bfloat> %x to <2 x i1>
ret <2 x i1> %z
}

define void @fp2si_v2f16_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: fp2si_v2f16_v2i64:
; CHECK: # %bb.0:
Expand Down
124 changes: 120 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64

define void @si2fp_v2i32_v2f32(ptr %x, ptr %y) {
; CHECK-LABEL: si2fp_v2i32_v2f32:
Expand Down Expand Up @@ -418,6 +418,122 @@ define <8 x double> @ui2fp_v8i1_v8f64(<8 x i1> %x) {
ret <8 x double> %z
}

define void @si2fp_v2i64_v2bf16(ptr %x, ptr %y) {
; CHECK-LABEL: si2fp_v2i64_v2bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vfncvt.f.x.w v9, v8
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %x
%d = sitofp <2 x i64> %a to <2 x bfloat>
store <2 x bfloat> %d, ptr %y
ret void
}

define void @ui2fp_v2i64_v2bf16(ptr %x, ptr %y) {
; CHECK-LABEL: ui2fp_v2i64_v2bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vfncvt.f.xu.w v9, v8
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %x
%d = uitofp <2 x i64> %a to <2 x bfloat>
store <2 x bfloat> %d, ptr %y
ret void
}

define <2 x bfloat> @si2fp_v2i1_v2bf16(<2 x i1> %x) {
; CHECK-LABEL: si2fp_v2i1_v2bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
; CHECK-NEXT: vfwcvt.f.x.v v9, v8
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
%z = sitofp <2 x i1> %x to <2 x bfloat>
ret <2 x bfloat> %z
}

define <2 x bfloat> @ui2fp_v2i1_v2bf16(<2 x i1> %x) {
; CHECK-LABEL: ui2fp_v2i1_v2bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
%z = uitofp <2 x i1> %x to <2 x bfloat>
ret <2 x bfloat> %z
}

define void @si2fp_v8i64_v8bf16(ptr %x, ptr %y) {
; CHECK-LABEL: si2fp_v8i64_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vfncvt.f.x.w v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x i64>, ptr %x
%d = sitofp <8 x i64> %a to <8 x bfloat>
store <8 x bfloat> %d, ptr %y
ret void
}

define void @ui2fp_v8i64_v8bf16(ptr %x, ptr %y) {
; CHECK-LABEL: ui2fp_v8i64_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vfncvt.f.xu.w v12, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x i64>, ptr %x
%d = uitofp <8 x i64> %a to <8 x bfloat>
store <8 x bfloat> %d, ptr %y
ret void
}

define <8 x bfloat> @si2fp_v8i1_v8bf16(<8 x i1> %x) {
; CHECK-LABEL: si2fp_v8i1_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
; CHECK-NEXT: vfwcvt.f.x.v v10, v8
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
%z = sitofp <8 x i1> %x to <8 x bfloat>
ret <8 x bfloat> %z
}

define <8 x bfloat> @ui2fp_v8i1_v8bf16(<8 x i1> %x) {
; CHECK-LABEL: ui2fp_v8i1_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vfwcvt.f.xu.v v10, v8
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
%z = uitofp <8 x i1> %x to <8 x bfloat>
ret <8 x bfloat> %z
}

define void @si2fp_v2i64_v2f16(ptr %x, ptr %y) {
; CHECK-LABEL: si2fp_v2i64_v2f16:
; CHECK: # %bb.0:
Expand Down

0 comments on commit a3cd269

Please sign in to comment.