Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to cover all combinations. #115189

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

paulwalker-arm
Copy link
Collaborator

Adds missing bfloat patterns for unpacked scalable vectors.
Adds patterns for splatting extracts from fixed length vectors.

…ombinations.

Adds missing bfloat patterns for unpacked scalable vectors.
Adds patterns for splatting extracts from fixed length vectors.
@llvmbot
Copy link
Collaborator

llvmbot commented Nov 6, 2024

@llvm/pr-subscribers-backend-aarch64

Author: Paul Walker (paulwalker-arm)

Changes

Adds missing bfloat patterns for unpacked scalable vectors.
Adds patterns for splatting extracts from fixed length vectors.


Patch is 36.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115189.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+109-20)
  • (modified) llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll (+513-28)
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 5cfcc01afd20f3..f542c7a34ad60e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -59,6 +59,57 @@ class SVEType<ValueType VT> {
     !eq(VT, nxv8f16): nxv2f16,
     !eq(VT, nxv8bf16): nxv2bf16,
     true : untyped);
+
+  // The 64-bit vector subreg of VT.
+  ValueType DSub = !cond(
+    !eq(VT, nxv16i8): v8i8,
+    !eq(VT, nxv8i16): v4i16,
+    !eq(VT, nxv4i32): v2i32,
+    !eq(VT, nxv2i64): v1i64,
+    !eq(VT, nxv2f16): v4f16,
+    !eq(VT, nxv4f16): v4f16,
+    !eq(VT, nxv8f16): v4f16,
+    !eq(VT, nxv2f32): v2f32,
+    !eq(VT, nxv4f32): v2f32,
+    !eq(VT, nxv2f64): v1f64,
+    !eq(VT, nxv2bf16): v4bf16,
+    !eq(VT, nxv4bf16): v4bf16,
+    !eq(VT, nxv8bf16): v4bf16,
+    true : untyped);
+
+    // The 128-bit vector subreg of VT.
+  ValueType ZSub = !cond(
+    !eq(VT, nxv16i8): v16i8,
+    !eq(VT, nxv8i16): v8i16,
+    !eq(VT, nxv4i32): v4i32,
+    !eq(VT, nxv2i64): v2i64,
+    !eq(VT, nxv2f16): v8f16,
+    !eq(VT, nxv4f16): v8f16,
+    !eq(VT, nxv8f16): v8f16,
+    !eq(VT, nxv2f32): v4f32,
+    !eq(VT, nxv4f32): v4f32,
+    !eq(VT, nxv2f64): v2f64,
+    !eq(VT, nxv2bf16): v8bf16,
+    !eq(VT, nxv4bf16): v8bf16,
+    !eq(VT, nxv8bf16): v8bf16,
+    true : untyped);
+
+  // The legal scalar used to hold a vector element.
+  ValueType EltAsScalar = !cond(
+    !eq(VT, nxv16i8): i32,
+    !eq(VT, nxv8i16): i32,
+    !eq(VT, nxv4i32): i32,
+    !eq(VT, nxv2i64): i64,
+    !eq(VT, nxv2f16): f16,
+    !eq(VT, nxv4f16): f16,
+    !eq(VT, nxv8f16): f16,
+    !eq(VT, nxv2f32): f32,
+    !eq(VT, nxv4f32): f32,
+    !eq(VT, nxv2f64): f64,
+    !eq(VT, nxv2bf16): bf16,
+    !eq(VT, nxv4bf16): bf16,
+    !eq(VT, nxv8bf16): bf16,
+    true : untyped);
 }
 
 def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
@@ -1402,29 +1453,67 @@ multiclass sve_int_perm_dup_i<string asm> {
   def : InstAlias<"mov $Zd, $Qn",
                   (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
 
-  // Duplicate extracted element of vector into all vector elements
+  // Duplicate an extracted vector element across a vector.
+
   def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))),
             (!cast<Instruction>(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>;
-  def : Pat<(nxv8i16 (splat_vector (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
-            (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
-  def : Pat<(nxv4i32 (splat_vector (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
-            (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
-  def : Pat<(nxv2i64 (splat_vector (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
-            (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
-  def : Pat<(nxv8f16 (splat_vector (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
-            (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
-  def : Pat<(nxv8bf16 (splat_vector (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
-            (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
-  def : Pat<(nxv4f16 (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
-            (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
-  def : Pat<(nxv2f16 (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
-            (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
-  def : Pat<(nxv4f32 (splat_vector (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
-            (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
-  def : Pat<(nxv2f32 (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
-            (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
-  def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+  def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v16i8 V128:$vec), sve_elm_idx_extdup_b:$index)))),
+            (!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_b:$index)>;
+  def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v8i8 V64:$vec), sve_elm_idx_extdup_b:$index)))),
+            (!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_b:$index)>;
+
+  foreach VT = [nxv8i16, nxv2f16, nxv4f16, nxv8f16, nxv2bf16, nxv4bf16, nxv8bf16] in {
+    def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
+              (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
+    def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_h:$index)))),
+              (!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_h:$index)>;
+    def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_h:$index)))),
+              (!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_h:$index)>;
+  }
+
+  foreach VT = [nxv4i32, nxv2f32, nxv4f32 ] in {
+    def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+              (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+    def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_s:$index)))),
+              (!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_s:$index)>;
+    def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_s:$index)))),
+              (!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_s:$index)>;
+  }
+
+  foreach VT = [nxv2i64, nxv2f64] in {
+    def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+              (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+    def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_d:$index)))),
+              (!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_d:$index)>;
+    def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_d:$index)))),
+              (!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_d:$index)>;
+  }
+
+  // When extracting from an unpacked vector the index must be scaled to account
+  // for the "holes" in the underlying packed vector type. We get the scaling
+  // for free by "promoting" the element type to one whose underlying vector type
+  // is packed.
+
+  foreach VT = [nxv2f16, nxv4f16, nxv8f16] in {
+    def : Pat<(VT (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+              (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+    def : Pat<(VT (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+              (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+  }
+
+  foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
+    def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv4bf16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+              (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+    def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv2bf16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+              (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+  }
+
+  foreach VT = [nxv2f32, nxv4f32] in {
+    def : Pat<(VT (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
             (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+  }
+
+  // Duplicate an indexed 128-bit segment across a vector.
 
   def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)),
             (!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
index 8c9661730f1f94..0cf8aec52fe258 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sve | FileCheck %s
 
-define <vscale x 16 x i8> @dup_extract_i8(<vscale x 16 x i8> %data) {
-; CHECK-LABEL: dup_extract_i8:
+define <vscale x 16 x i8> @dup_extract_nxv16i8_nxv16i8(<vscale x 16 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.b, z0.b[1]
 ; CHECK-NEXT:    ret
@@ -12,8 +12,32 @@ define <vscale x 16 x i8> @dup_extract_i8(<vscale x 16 x i8> %data) {
   ret <vscale x 16 x i8> %.splat
 }
 
-define <vscale x 8 x i16> @dup_extract_i16(<vscale x 8 x i16> %data) {
-; CHECK-LABEL: dup_extract_i16:
+define <vscale x 16 x i8> @dup_extract_nxv16i8_v16i8(<16 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    mov z0.b, z0.b[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <16 x i8> %data, i8 1
+  %.splatinsert = insertelement <vscale x 16 x i8> poison, i8 %1, i32 0
+  %.splat = shufflevector <vscale x 16 x i8> %.splatinsert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  ret <vscale x 16 x i8> %.splat
+}
+
+define <vscale x 16 x i8> @dup_extract_nxv16i8_v8i8(<8 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    mov z0.b, z0.b[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <8 x i8> %data, i8 1
+  %.splatinsert = insertelement <vscale x 16 x i8> poison, i8 %1, i32 0
+  %.splat = shufflevector <vscale x 16 x i8> %.splatinsert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  ret <vscale x 16 x i8> %.splat
+}
+
+define <vscale x 8 x i16> @dup_extract_nxv8i16_nxv8i16(<vscale x 8 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
@@ -23,8 +47,32 @@ define <vscale x 8 x i16> @dup_extract_i16(<vscale x 8 x i16> %data) {
   ret <vscale x 8 x i16> %.splat
 }
 
-define <vscale x 4 x i32> @dup_extract_i32(<vscale x 4 x i32> %data) {
-; CHECK-LABEL: dup_extract_i32:
+define <vscale x 8 x i16> @dup_extract_nxv8i16_v8i16(<8 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <8 x i16> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x i16> poison, i16 %1, i32 0
+  %.splat = shufflevector <vscale x 8 x i16> %.splatinsert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x i16> %.splat
+}
+
+define <vscale x 8 x i16> @dup_extract_nxv8i16_v4i16(<4 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <4 x i16> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x i16> poison, i16 %1, i32 0
+  %.splat = shufflevector <vscale x 8 x i16> %.splatinsert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x i16> %.splat
+}
+
+define <vscale x 4 x i32> @dup_extract_nxv4i32_nxv4i32(<vscale x 4 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_nxv4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
@@ -34,8 +82,32 @@ define <vscale x 4 x i32> @dup_extract_i32(<vscale x 4 x i32> %data) {
   ret <vscale x 4 x i32> %.splat
 }
 
-define <vscale x 2 x i64> @dup_extract_i64(<vscale x 2 x i64> %data) {
-; CHECK-LABEL: dup_extract_i64:
+define <vscale x 4 x i32> @dup_extract_nxv4i32_v4i32(<4 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <4 x i32> %data, i32 1
+  %.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %1, i32 0
+  %.splat = shufflevector <vscale x 4 x i32> %.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x i32> %.splat
+}
+
+define <vscale x 4 x i32> @dup_extract_nxv4i32_v2i32(<2 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <2 x i32> %data, i32 1
+  %.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %1, i32 0
+  %.splat = shufflevector <vscale x 4 x i32> %.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x i32> %.splat
+}
+
+define <vscale x 2 x i64> @dup_extract_nxv2i64_nxv2i64(<vscale x 2 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
 ; CHECK-NEXT:    ret
@@ -45,8 +117,31 @@ define <vscale x 2 x i64> @dup_extract_i64(<vscale x 2 x i64> %data) {
   ret <vscale x 2 x i64> %.splat
 }
 
-define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
-; CHECK-LABEL: dup_extract_f16:
+define <vscale x 2 x i64> @dup_extract_nxv2i64_v2i64(<2 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <2 x i64> %data, i64 1
+  %.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0
+  %.splat = shufflevector <vscale x 2 x i64> %.splatinsert, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  ret <vscale x 2 x i64> %.splat
+}
+
+define <vscale x 2 x i64> @dup_extract_nxv2i64_v1i64(<1 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_v1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, x8
+; CHECK-NEXT:    ret
+  %1 = extractelement <1 x i64> %data, i64 1
+  %.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0
+  %.splat = shufflevector <vscale x 2 x i64> %.splatinsert, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  ret <vscale x 2 x i64> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
@@ -56,19 +151,133 @@ define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
   ret <vscale x 8 x half> %.splat
 }
 
-define <vscale x 4 x half> @dup_extract_f16_4(<vscale x 4 x half> %data) {
-; CHECK-LABEL: dup_extract_f16_4:
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <vscale x 4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 2 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <8 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 8 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 2 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_v8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <8 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_v4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <4 x half> %data, i16 1
   %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
   %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
   ret <vscale x 4 x half> %.splat
 }
 
-define <vscale x 2 x half> @dup...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants