Skip to content

Commit

Permalink
[RISCV] Lower fixed-length {insert,extract}_vector_elt on zvfhmin/zvf…
Browse files Browse the repository at this point in the history
…bfmin (#114927)

RISCVTargetLowering::lower{INSERT,EXTRACT}_VECTOR_ELT already handles
f16 and bf16 scalable vectors after #110221, so we can reuse it for
fixed-length vectors.
  • Loading branch information
lukel97 authored Nov 5, 2024
1 parent a8f8089 commit aea6b25
Show file tree
Hide file tree
Showing 6 changed files with 1,758 additions and 971 deletions.
9 changes: 4 additions & 5 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1333,7 +1333,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// expansion to a build_vector of 0s.
setOperationAction(ISD::UNDEF, VT, Custom);

setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_REVERSE,
ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},
VT, Custom);
Expand Down Expand Up @@ -1404,10 +1405,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
continue;
}

setOperationAction({ISD::BUILD_VECTOR,
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
ISD::SCALAR_TO_VECTOR},
VT, Custom);
setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,
Custom);

setOperationAction(
{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
Expand Down
167 changes: 143 additions & 24 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32NOM
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32M
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64NOM
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64M

; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M

define i8 @extractelt_v16i8(ptr %x) nounwind {
; CHECK-LABEL: extractelt_v16i8:
Expand Down Expand Up @@ -66,14 +69,37 @@ define i64 @extractelt_v2i64(ptr %x) nounwind {
ret i64 %b
}

define half @extractelt_v8f16(ptr %x) nounwind {
; CHECK-LABEL: extractelt_v8f16:
define bfloat @extractelt_v8bf16(ptr %x) nounwind {
; CHECK-LABEL: extractelt_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = extractelement <8 x bfloat> %a, i32 7
ret bfloat %b
}

define half @extractelt_v8f16(ptr %x) nounwind {
; ZVFH-LABEL: extractelt_v8f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vslidedown.vi v8, v8, 7
; ZVFH-NEXT: vfmv.f.s fa0, v8
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: extractelt_v8f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
; ZVFHMIN-NEXT: vmv.x.s a0, v8
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = extractelement <8 x half> %a, i32 7
ret half %b
Expand Down Expand Up @@ -171,15 +197,40 @@ define i64 @extractelt_v4i64(ptr %x) nounwind {
ret i64 %b
}

define half @extractelt_v16f16(ptr %x) nounwind {
; CHECK-LABEL: extractelt_v16f16:
define bfloat @extractelt_v16bf16(ptr %x) nounwind {
; CHECK-LABEL: extractelt_v16bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
%a = load <16 x bfloat>, ptr %x
%b = extractelement <16 x bfloat> %a, i32 7
ret bfloat %b
}

define half @extractelt_v16f16(ptr %x) nounwind {
; ZVFH-LABEL: extractelt_v16f16:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; ZVFH-NEXT: vslidedown.vi v8, v8, 7
; ZVFH-NEXT: vfmv.f.s fa0, v8
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: extractelt_v16f16:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
; ZVFHMIN-NEXT: vmv.x.s a0, v8
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = extractelement <16 x half> %a, i32 7
ret half %b
Expand Down Expand Up @@ -398,15 +449,49 @@ define i64 @extractelt_v2i64_idx(ptr %x, i32 zeroext %idx) nounwind {
ret i64 %c
}

define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v8f16_idx:
define bfloat @extractelt_v8bf16_idx(ptr %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v8bf16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vfadd.vv v8, v8, v8
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfadd.vv v8, v10, v10
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vslidedown.vx v8, v10, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
%a = load <8 x bfloat>, ptr %x
%b = fadd <8 x bfloat> %a, %a
%c = extractelement <8 x bfloat> %b, i32 %idx
ret bfloat %c
}

define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind {
; ZVFH-LABEL: extractelt_v8f16_idx:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vfadd.vv v8, v8, v8
; ZVFH-NEXT: vslidedown.vx v8, v8, a1
; ZVFH-NEXT: vfmv.f.s fa0, v8
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: extractelt_v8f16_idx:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vslidedown.vx v8, v10, a1
; ZVFHMIN-NEXT: vmv.x.s a0, v8
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = fadd <8 x half> %a, %a
%c = extractelement <8 x half> %b, i32 %idx
Expand Down Expand Up @@ -513,15 +598,49 @@ define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
ret i64 %c
}

define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v16f16_idx:
define bfloat @extractelt_v16bf16_idx(ptr %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v16bf16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vfadd.vv v8, v8, v8
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vfadd.vv v8, v12, v12
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
; CHECK-NEXT: vslidedown.vx v8, v12, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
%a = load <16 x bfloat>, ptr %x
%b = fadd <16 x bfloat> %a, %a
%c = extractelement <16 x bfloat> %b, i32 %idx
ret bfloat %c
}

define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
; ZVFH-LABEL: extractelt_v16f16_idx:
; ZVFH: # %bb.0:
; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: vfadd.vv v8, v8, v8
; ZVFH-NEXT: vslidedown.vx v8, v8, a1
; ZVFH-NEXT: vfmv.f.s fa0, v8
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: extractelt_v16f16_idx:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVFHMIN-NEXT: vfadd.vv v8, v12, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
; ZVFHMIN-NEXT: vslidedown.vx v8, v12, a1
; ZVFHMIN-NEXT: vmv.x.s a0, v8
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = fadd <16 x half> %a, %a
%c = extractelement <16 x half> %b, i32 %idx
Expand Down Expand Up @@ -939,8 +1058,8 @@ define i32 @extractelt_mul_v4i32(<4 x i32> %x) {
define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
; RV32NOM-LABEL: extractelt_sdiv_v4i32:
; RV32NOM: # %bb.0:
; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0)
; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
; RV32NOM-NEXT: lui a0, %hi(.LCPI46_0)
; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI46_0)
; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32NOM-NEXT: vle32.v v9, (a0)
; RV32NOM-NEXT: vmulh.vv v9, v8, v9
Expand Down Expand Up @@ -975,8 +1094,8 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
;
; RV64NOM-LABEL: extractelt_sdiv_v4i32:
; RV64NOM: # %bb.0:
; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0)
; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
; RV64NOM-NEXT: lui a0, %hi(.LCPI46_0)
; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI46_0)
; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64NOM-NEXT: vle32.v v9, (a0)
; RV64NOM-NEXT: vmulh.vv v9, v8, v9
Expand Down
48 changes: 4 additions & 44 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,58 +21,18 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) {
;
; RV32-ZFBFMIN-LABEL: splat_idx_v4bf16:
; RV32-ZFBFMIN: # %bb.0:
; RV32-ZFBFMIN-NEXT: addi sp, sp, -48
; RV32-ZFBFMIN-NEXT: .cfi_def_cfa_offset 48
; RV32-ZFBFMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32-ZFBFMIN-NEXT: .cfi_offset ra, -4
; RV32-ZFBFMIN-NEXT: csrr a1, vlenb
; RV32-ZFBFMIN-NEXT: sub sp, sp, a1
; RV32-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
; RV32-ZFBFMIN-NEXT: addi a1, sp, 32
; RV32-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; RV32-ZFBFMIN-NEXT: andi a0, a0, 3
; RV32-ZFBFMIN-NEXT: li a1, 2
; RV32-ZFBFMIN-NEXT: call __mulsi3
; RV32-ZFBFMIN-NEXT: addi a1, sp, 16
; RV32-ZFBFMIN-NEXT: add a0, a1, a0
; RV32-ZFBFMIN-NEXT: addi a2, sp, 32
; RV32-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV32-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-ZFBFMIN-NEXT: vse16.v v8, (a1)
; RV32-ZFBFMIN-NEXT: lh a0, 0(a0)
; RV32-ZFBFMIN-NEXT: vslidedown.vx v8, v8, a0
; RV32-ZFBFMIN-NEXT: vmv.x.s a0, v8
; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0
; RV32-ZFBFMIN-NEXT: csrr a0, vlenb
; RV32-ZFBFMIN-NEXT: add sp, sp, a0
; RV32-ZFBFMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32-ZFBFMIN-NEXT: addi sp, sp, 48
; RV32-ZFBFMIN-NEXT: ret
;
; RV64-ZFBFMIN-LABEL: splat_idx_v4bf16:
; RV64-ZFBFMIN: # %bb.0:
; RV64-ZFBFMIN-NEXT: addi sp, sp, -48
; RV64-ZFBFMIN-NEXT: .cfi_def_cfa_offset 48
; RV64-ZFBFMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64-ZFBFMIN-NEXT: .cfi_offset ra, -8
; RV64-ZFBFMIN-NEXT: csrr a1, vlenb
; RV64-ZFBFMIN-NEXT: sub sp, sp, a1
; RV64-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
; RV64-ZFBFMIN-NEXT: addi a1, sp, 32
; RV64-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; RV64-ZFBFMIN-NEXT: andi a0, a0, 3
; RV64-ZFBFMIN-NEXT: li a1, 2
; RV64-ZFBFMIN-NEXT: call __muldi3
; RV64-ZFBFMIN-NEXT: addi a1, sp, 16
; RV64-ZFBFMIN-NEXT: add a0, a1, a0
; RV64-ZFBFMIN-NEXT: addi a2, sp, 32
; RV64-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV64-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64-ZFBFMIN-NEXT: vse16.v v8, (a1)
; RV64-ZFBFMIN-NEXT: lh a0, 0(a0)
; RV64-ZFBFMIN-NEXT: vslidedown.vx v8, v8, a0
; RV64-ZFBFMIN-NEXT: vmv.x.s a0, v8
; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0
; RV64-ZFBFMIN-NEXT: csrr a0, vlenb
; RV64-ZFBFMIN-NEXT: add sp, sp, a0
; RV64-ZFBFMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-ZFBFMIN-NEXT: addi sp, sp, 48
; RV64-ZFBFMIN-NEXT: ret
%x = extractelement <4 x bfloat> %v, i64 %idx
%ins = insertelement <4 x bfloat> poison, bfloat %x, i32 0
Expand Down
48 changes: 4 additions & 44 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -201,58 +201,18 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) {
;
; RV32-ZFHMIN-LABEL: splat_idx_v4f16:
; RV32-ZFHMIN: # %bb.0:
; RV32-ZFHMIN-NEXT: addi sp, sp, -48
; RV32-ZFHMIN-NEXT: .cfi_def_cfa_offset 48
; RV32-ZFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32-ZFHMIN-NEXT: .cfi_offset ra, -4
; RV32-ZFHMIN-NEXT: csrr a1, vlenb
; RV32-ZFHMIN-NEXT: sub sp, sp, a1
; RV32-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
; RV32-ZFHMIN-NEXT: addi a1, sp, 32
; RV32-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; RV32-ZFHMIN-NEXT: andi a0, a0, 3
; RV32-ZFHMIN-NEXT: li a1, 2
; RV32-ZFHMIN-NEXT: call __mulsi3
; RV32-ZFHMIN-NEXT: addi a1, sp, 16
; RV32-ZFHMIN-NEXT: add a0, a1, a0
; RV32-ZFHMIN-NEXT: addi a2, sp, 32
; RV32-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV32-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-ZFHMIN-NEXT: vse16.v v8, (a1)
; RV32-ZFHMIN-NEXT: lh a0, 0(a0)
; RV32-ZFHMIN-NEXT: vslidedown.vx v8, v8, a0
; RV32-ZFHMIN-NEXT: vmv.x.s a0, v8
; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0
; RV32-ZFHMIN-NEXT: csrr a0, vlenb
; RV32-ZFHMIN-NEXT: add sp, sp, a0
; RV32-ZFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32-ZFHMIN-NEXT: addi sp, sp, 48
; RV32-ZFHMIN-NEXT: ret
;
; RV64-ZFHMIN-LABEL: splat_idx_v4f16:
; RV64-ZFHMIN: # %bb.0:
; RV64-ZFHMIN-NEXT: addi sp, sp, -48
; RV64-ZFHMIN-NEXT: .cfi_def_cfa_offset 48
; RV64-ZFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64-ZFHMIN-NEXT: .cfi_offset ra, -8
; RV64-ZFHMIN-NEXT: csrr a1, vlenb
; RV64-ZFHMIN-NEXT: sub sp, sp, a1
; RV64-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
; RV64-ZFHMIN-NEXT: addi a1, sp, 32
; RV64-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
; RV64-ZFHMIN-NEXT: andi a0, a0, 3
; RV64-ZFHMIN-NEXT: li a1, 2
; RV64-ZFHMIN-NEXT: call __muldi3
; RV64-ZFHMIN-NEXT: addi a1, sp, 16
; RV64-ZFHMIN-NEXT: add a0, a1, a0
; RV64-ZFHMIN-NEXT: addi a2, sp, 32
; RV64-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV64-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64-ZFHMIN-NEXT: vse16.v v8, (a1)
; RV64-ZFHMIN-NEXT: lh a0, 0(a0)
; RV64-ZFHMIN-NEXT: vslidedown.vx v8, v8, a0
; RV64-ZFHMIN-NEXT: vmv.x.s a0, v8
; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0
; RV64-ZFHMIN-NEXT: csrr a0, vlenb
; RV64-ZFHMIN-NEXT: add sp, sp, a0
; RV64-ZFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-ZFHMIN-NEXT: addi sp, sp, 48
; RV64-ZFHMIN-NEXT: ret
%x = extractelement <4 x half> %v, i64 %idx
%ins = insertelement <4 x half> poison, half %x, i32 0
Expand Down
Loading

0 comments on commit aea6b25

Please sign in to comment.