-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to cover all combinations. #115189
Open
paulwalker-arm
wants to merge
2
commits into
llvm:main
Choose a base branch
from
paulwalker-arm:sve-dup-lane
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+622
−48
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
…ombinations. Adds missing bfloat patterns for unpacked scalable vectors. Adds patterns for splatting extracts from fixed length vectors.
@llvm/pr-subscribers-backend-aarch64 Author: Paul Walker (paulwalker-arm) ChangesAdds missing bfloat patterns for unpacked scalable vectors. Patch is 36.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115189.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 5cfcc01afd20f3..f542c7a34ad60e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -59,6 +59,57 @@ class SVEType<ValueType VT> {
!eq(VT, nxv8f16): nxv2f16,
!eq(VT, nxv8bf16): nxv2bf16,
true : untyped);
+
+ // The 64-bit vector subreg of VT.
+ ValueType DSub = !cond(
+ !eq(VT, nxv16i8): v8i8,
+ !eq(VT, nxv8i16): v4i16,
+ !eq(VT, nxv4i32): v2i32,
+ !eq(VT, nxv2i64): v1i64,
+ !eq(VT, nxv2f16): v4f16,
+ !eq(VT, nxv4f16): v4f16,
+ !eq(VT, nxv8f16): v4f16,
+ !eq(VT, nxv2f32): v2f32,
+ !eq(VT, nxv4f32): v2f32,
+ !eq(VT, nxv2f64): v1f64,
+ !eq(VT, nxv2bf16): v4bf16,
+ !eq(VT, nxv4bf16): v4bf16,
+ !eq(VT, nxv8bf16): v4bf16,
+ true : untyped);
+
+ // The 128-bit vector subreg of VT.
+ ValueType ZSub = !cond(
+ !eq(VT, nxv16i8): v16i8,
+ !eq(VT, nxv8i16): v8i16,
+ !eq(VT, nxv4i32): v4i32,
+ !eq(VT, nxv2i64): v2i64,
+ !eq(VT, nxv2f16): v8f16,
+ !eq(VT, nxv4f16): v8f16,
+ !eq(VT, nxv8f16): v8f16,
+ !eq(VT, nxv2f32): v4f32,
+ !eq(VT, nxv4f32): v4f32,
+ !eq(VT, nxv2f64): v2f64,
+ !eq(VT, nxv2bf16): v8bf16,
+ !eq(VT, nxv4bf16): v8bf16,
+ !eq(VT, nxv8bf16): v8bf16,
+ true : untyped);
+
+ // The legal scalar used to hold a vector element.
+ ValueType EltAsScalar = !cond(
+ !eq(VT, nxv16i8): i32,
+ !eq(VT, nxv8i16): i32,
+ !eq(VT, nxv4i32): i32,
+ !eq(VT, nxv2i64): i64,
+ !eq(VT, nxv2f16): f16,
+ !eq(VT, nxv4f16): f16,
+ !eq(VT, nxv8f16): f16,
+ !eq(VT, nxv2f32): f32,
+ !eq(VT, nxv4f32): f32,
+ !eq(VT, nxv2f64): f64,
+ !eq(VT, nxv2bf16): bf16,
+ !eq(VT, nxv4bf16): bf16,
+ !eq(VT, nxv8bf16): bf16,
+ true : untyped);
}
def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
@@ -1402,29 +1453,67 @@ multiclass sve_int_perm_dup_i<string asm> {
def : InstAlias<"mov $Zd, $Qn",
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
- // Duplicate extracted element of vector into all vector elements
+ // Duplicate an extracted vector element across a vector.
+
def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))),
(!cast<Instruction>(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>;
- def : Pat<(nxv8i16 (splat_vector (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv4i32 (splat_vector (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2i64 (splat_vector (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv8f16 (splat_vector (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv8bf16 (splat_vector (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv4f16 (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2f16 (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv4f32 (splat_vector (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2f32 (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v16i8 V128:$vec), sve_elm_idx_extdup_b:$index)))),
+ (!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_b:$index)>;
+ def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v8i8 V64:$vec), sve_elm_idx_extdup_b:$index)))),
+ (!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_b:$index)>;
+
+ foreach VT = [nxv8i16, nxv2f16, nxv4f16, nxv8f16, nxv2bf16, nxv4bf16, nxv8bf16] in {
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_h:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_h:$index)>;
+ }
+
+ foreach VT = [nxv4i32, nxv2f32, nxv4f32 ] in {
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_s:$index)>;
+ }
+
+ foreach VT = [nxv2i64, nxv2f64] in {
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_d:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_d:$index)>;
+ }
+
+ // When extracting from an unpacked vector the index must be scaled to account
+ // for the "holes" in the underlying packed vector type. We get the scaling
+ // for free by "promoting" the element type to one whose underlying vector type
+ // is packed.
+
+ foreach VT = [nxv2f16, nxv4f16, nxv8f16] in {
+ def : Pat<(VT (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ }
+
+ foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
+ def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv4bf16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv2bf16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ }
+
+ foreach VT = [nxv2f32, nxv4f32] in {
+ def : Pat<(VT (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ }
+
+ // Duplicate an indexed 128-bit segment across a vector.
def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)),
(!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
index 8c9661730f1f94..0cf8aec52fe258 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sve | FileCheck %s
-define <vscale x 16 x i8> @dup_extract_i8(<vscale x 16 x i8> %data) {
-; CHECK-LABEL: dup_extract_i8:
+define <vscale x 16 x i8> @dup_extract_nxv16i8_nxv16i8(<vscale x 16 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, z0.b[1]
; CHECK-NEXT: ret
@@ -12,8 +12,32 @@ define <vscale x 16 x i8> @dup_extract_i8(<vscale x 16 x i8> %data) {
ret <vscale x 16 x i8> %.splat
}
-define <vscale x 8 x i16> @dup_extract_i16(<vscale x 8 x i16> %data) {
-; CHECK-LABEL: dup_extract_i16:
+define <vscale x 16 x i8> @dup_extract_nxv16i8_v16i8(<16 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.b, z0.b[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <16 x i8> %data, i8 1
+ %.splatinsert = insertelement <vscale x 16 x i8> poison, i8 %1, i32 0
+ %.splat = shufflevector <vscale x 16 x i8> %.splatinsert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ ret <vscale x 16 x i8> %.splat
+}
+
+define <vscale x 16 x i8> @dup_extract_nxv16i8_v8i8(<8 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.b, z0.b[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x i8> %data, i8 1
+ %.splatinsert = insertelement <vscale x 16 x i8> poison, i8 %1, i32 0
+ %.splat = shufflevector <vscale x 16 x i8> %.splatinsert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ ret <vscale x 16 x i8> %.splat
+}
+
+define <vscale x 8 x i16> @dup_extract_nxv8i16_nxv8i16(<vscale x 8 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
@@ -23,8 +47,32 @@ define <vscale x 8 x i16> @dup_extract_i16(<vscale x 8 x i16> %data) {
ret <vscale x 8 x i16> %.splat
}
-define <vscale x 4 x i32> @dup_extract_i32(<vscale x 4 x i32> %data) {
-; CHECK-LABEL: dup_extract_i32:
+define <vscale x 8 x i16> @dup_extract_nxv8i16_v8i16(<8 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x i16> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x i16> poison, i16 %1, i32 0
+ %.splat = shufflevector <vscale x 8 x i16> %.splatinsert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x i16> %.splat
+}
+
+define <vscale x 8 x i16> @dup_extract_nxv8i16_v4i16(<4 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x i16> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x i16> poison, i16 %1, i32 0
+ %.splat = shufflevector <vscale x 8 x i16> %.splatinsert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x i16> %.splat
+}
+
+define <vscale x 4 x i32> @dup_extract_nxv4i32_nxv4i32(<vscale x 4 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
@@ -34,8 +82,32 @@ define <vscale x 4 x i32> @dup_extract_i32(<vscale x 4 x i32> %data) {
ret <vscale x 4 x i32> %.splat
}
-define <vscale x 2 x i64> @dup_extract_i64(<vscale x 2 x i64> %data) {
-; CHECK-LABEL: dup_extract_i64:
+define <vscale x 4 x i32> @dup_extract_nxv4i32_v4i32(<4 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x i32> %data, i32 1
+ %.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %1, i32 0
+ %.splat = shufflevector <vscale x 4 x i32> %.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x i32> %.splat
+}
+
+define <vscale x 4 x i32> @dup_extract_nxv4i32_v2i32(<2 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <2 x i32> %data, i32 1
+ %.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %1, i32 0
+ %.splat = shufflevector <vscale x 4 x i32> %.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x i32> %.splat
+}
+
+define <vscale x 2 x i64> @dup_extract_nxv2i64_nxv2i64(<vscale x 2 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: ret
@@ -45,8 +117,31 @@ define <vscale x 2 x i64> @dup_extract_i64(<vscale x 2 x i64> %data) {
ret <vscale x 2 x i64> %.splat
}
-define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
-; CHECK-LABEL: dup_extract_f16:
+define <vscale x 2 x i64> @dup_extract_nxv2i64_v2i64(<2 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <2 x i64> %data, i64 1
+ %.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0
+ %.splat = shufflevector <vscale x 2 x i64> %.splatinsert, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x i64> %.splat
+}
+
+define <vscale x 2 x i64> @dup_extract_nxv2i64_v1i64(<1 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, x8
+; CHECK-NEXT: ret
+ %1 = extractelement <1 x i64> %data, i64 1
+ %.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0
+ %.splat = shufflevector <vscale x 2 x i64> %.splatinsert, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x i64> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
@@ -56,19 +151,133 @@ define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
ret <vscale x 8 x half> %.splat
}
-define <vscale x 4 x half> @dup_extract_f16_4(<vscale x 4 x half> %data) {
-; CHECK-LABEL: dup_extract_f16_4:
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
%1 = extractelement <vscale x 4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
%.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x half> %.splat
}
-define <vscale x 2 x half> @dup...
[truncated]
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Adds missing bfloat patterns for unpacked scalable vectors.
Adds patterns for splatting extracts from fixed length vectors.