diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 4b5336fac33ea4..5812295f73b5a2 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -846,8 +846,10 @@ class MachineBasicBlock /// Return the first instruction in MBB after I that is not a PHI, label or /// debug. This is the correct point to insert copies at the beginning of a - /// basic block. - iterator SkipPHIsLabelsAndDebug(iterator I, bool SkipPseudoOp = true); + /// basic block. \p Reg is the register being used by a spill or defined for a + /// restore/split during register allocation. + iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg = Register(), + bool SkipPseudoOp = true); /// Returns an iterator to the first terminator instruction of this basic /// block. If a terminator does not exist, it returns end(). diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 8e7499ac626a74..c83c11d4e776fa 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1988,8 +1988,10 @@ class TargetInstrInfo : public MCInstrInfo { /// True if the instruction is bound to the top of its basic block and no /// other instructions shall be inserted before it. This can be implemented - /// to prevent register allocator to insert spills before such instructions. - virtual bool isBasicBlockPrologue(const MachineInstr &MI) const { + /// to prevent register allocator to insert spills for \p Reg before such + /// instructions. + virtual bool isBasicBlockPrologue(const MachineInstr &MI, + Register Reg = Register()) const { return false; } diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index 75504ef32250c5..4d668c53f7156b 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -461,7 +461,8 @@ class StatepointState { if (EHPad && !RC.hasReload(Reg, RegToSlotIdx[Reg], EHPad)) { RC.recordReload(Reg, RegToSlotIdx[Reg], EHPad); - auto EHPadInsertPoint = EHPad->SkipPHIsLabelsAndDebug(EHPad->begin()); + auto EHPadInsertPoint = + EHPad->SkipPHIsLabelsAndDebug(EHPad->begin(), Reg); insertReloadBefore(Reg, EHPadInsertPoint, EHPad); LLVM_DEBUG(dbgs() << "...also reload at EHPad " << printMBBReference(*EHPad) << "\n"); diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 71d58b2e9e18d7..2740265f75340b 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -469,7 +469,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def); MachineBasicBlock::iterator MII; if (SrcVNI->isPHIDef()) - MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin()); + MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin(), SrcReg); else { MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def); assert(DefMI && "Defining instruction disappeared"); diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index d9e22685faf5f5..4410fb7ecd23b6 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -223,13 +223,13 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { MachineBasicBlock::iterator MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I, - bool SkipPseudoOp) { + Register Reg, bool SkipPseudoOp) { const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); iterator E = end(); while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() || (SkipPseudoOp && I->isPseudoProbe()) || - TII->isBasicBlockPrologue(*I))) + TII->isBasicBlockPrologue(*I, Reg))) ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values // inside the bundle. diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp index 1664c304f643c3..b1c862210932bc 100644 --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -795,8 +795,10 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { return Start; } - VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB, - MBB.SkipPHIsLabelsAndDebug(MBB.begin())); + unsigned RegIdx = 0; + Register Reg = LIS.getInterval(Edit->get(RegIdx)).reg(); + VNInfo *VNI = defFromParent(RegIdx, ParentVNI, Start, MBB, + MBB.SkipPHIsLabelsAndDebug(MBB.begin(), Reg)); RegAssign.insert(Start, VNI->def, OpenIdx); LLVM_DEBUG(dump()); return VNI->def; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 5f78dfff1e9885..2751c6b4ea9987 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -8476,16 +8476,25 @@ unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg, return AMDGPU::COPY; } -bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { +bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI, + Register Reg) const { // We need to handle instructions which may be inserted during register // allocation to handle the prolog. The initial prolog instruction may have // been separated from the start of the block by spills and copies inserted - // needed by the prolog. - uint16_t Opc = MI.getOpcode(); + // needed by the prolog. However, the insertions for scalar registers can + // always be placed at the BB top as they are independent of the exec mask + // value. + bool IsNullOrVectorRegister = true; + if (Reg) { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)); + } + uint16_t Opc = MI.getOpcode(); // FIXME: Copies inserted in the block prolog for live-range split should also // be included. - return (isSpillOpcode(Opc) || (!MI.isTerminator() && Opc != AMDGPU::COPY && + return IsNullOrVectorRegister && + (isSpillOpcode(Opc) || (!MI.isTerminator() && Opc != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI))); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 29f549fc29a3ce..de2820e5c013ee 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1179,7 +1179,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override; - bool isBasicBlockPrologue(const MachineInstr &MI) const override; + bool isBasicBlockPrologue(const MachineInstr &MI, + Register Reg = Register()) const override; MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll index 8098304d134229..ffbf00765adbe2 100644 --- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll +++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll @@ -168,7 +168,6 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_mov_b64 vcc, vcc ; CHECK-NEXT: s_cbranch_vccnz .LBB0_2 ; CHECK-NEXT: .LBB0_3: ; %Flow14 -; CHECK-NEXT: s_or_saveexec_b64 s[20:21], s[26:27] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_readlane_b32 s12, v5, 32 ; CHECK-NEXT: v_readlane_b32 s13, v5, 33 @@ -178,39 +177,39 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: v_readlane_b32 s17, v5, 37 ; CHECK-NEXT: v_readlane_b32 s18, v5, 38 ; CHECK-NEXT: v_readlane_b32 s19, v5, 39 -; CHECK-NEXT: v_writelane_b32 v5, s4, 56 -; CHECK-NEXT: v_writelane_b32 v5, s5, 57 -; CHECK-NEXT: v_writelane_b32 v5, s6, 58 -; CHECK-NEXT: v_writelane_b32 v5, s7, 59 -; CHECK-NEXT: v_writelane_b32 v5, s8, 60 -; CHECK-NEXT: v_writelane_b32 v5, s9, 61 -; CHECK-NEXT: v_writelane_b32 v5, s10, 62 -; CHECK-NEXT: v_writelane_b32 v5, s11, 63 -; CHECK-NEXT: v_writelane_b32 v5, s52, 40 -; CHECK-NEXT: v_writelane_b32 v5, s53, 41 -; CHECK-NEXT: v_writelane_b32 v5, s54, 42 -; CHECK-NEXT: v_writelane_b32 v5, s55, 43 -; CHECK-NEXT: v_writelane_b32 v5, s56, 44 -; CHECK-NEXT: v_writelane_b32 v5, s57, 45 -; CHECK-NEXT: v_writelane_b32 v5, s58, 46 -; CHECK-NEXT: v_writelane_b32 v5, s59, 47 -; CHECK-NEXT: v_writelane_b32 v4, s12, 0 -; CHECK-NEXT: v_writelane_b32 v5, s60, 48 -; CHECK-NEXT: v_writelane_b32 v4, s13, 1 -; CHECK-NEXT: v_writelane_b32 v5, s61, 49 -; CHECK-NEXT: v_writelane_b32 v4, s14, 2 -; CHECK-NEXT: v_writelane_b32 v5, s62, 50 -; CHECK-NEXT: v_writelane_b32 v4, s15, 3 -; CHECK-NEXT: v_writelane_b32 v5, s63, 51 -; CHECK-NEXT: v_writelane_b32 v4, s16, 4 -; CHECK-NEXT: v_writelane_b32 v5, s64, 52 -; CHECK-NEXT: v_writelane_b32 v4, s17, 5 -; CHECK-NEXT: v_writelane_b32 v5, s65, 53 -; CHECK-NEXT: v_writelane_b32 v4, s18, 6 -; CHECK-NEXT: v_writelane_b32 v5, s66, 54 -; CHECK-NEXT: v_writelane_b32 v4, s19, 7 -; CHECK-NEXT: v_writelane_b32 v5, s67, 55 -; CHECK-NEXT: s_xor_b64 exec, exec, s[20:21] +; CHECK-NEXT: v_writelane_b32 v5, s4, 40 +; CHECK-NEXT: v_writelane_b32 v5, s5, 41 +; CHECK-NEXT: v_writelane_b32 v5, s6, 42 +; CHECK-NEXT: v_writelane_b32 v5, s7, 43 +; CHECK-NEXT: v_writelane_b32 v5, s8, 44 +; CHECK-NEXT: v_writelane_b32 v5, s9, 45 +; CHECK-NEXT: v_writelane_b32 v5, s10, 46 +; CHECK-NEXT: v_writelane_b32 v5, s11, 47 +; CHECK-NEXT: v_writelane_b32 v5, s12, 48 +; CHECK-NEXT: v_writelane_b32 v5, s13, 49 +; CHECK-NEXT: v_writelane_b32 v5, s14, 50 +; CHECK-NEXT: v_writelane_b32 v5, s15, 51 +; CHECK-NEXT: v_writelane_b32 v5, s16, 52 +; CHECK-NEXT: v_writelane_b32 v5, s17, 53 +; CHECK-NEXT: v_writelane_b32 v5, s18, 54 +; CHECK-NEXT: v_writelane_b32 v5, s19, 55 +; CHECK-NEXT: v_writelane_b32 v5, s52, 56 +; CHECK-NEXT: v_writelane_b32 v4, s60, 0 +; CHECK-NEXT: v_writelane_b32 v5, s53, 57 +; CHECK-NEXT: v_writelane_b32 v4, s61, 1 +; CHECK-NEXT: v_writelane_b32 v5, s54, 58 +; CHECK-NEXT: v_writelane_b32 v4, s62, 2 +; CHECK-NEXT: v_writelane_b32 v5, s55, 59 +; CHECK-NEXT: v_writelane_b32 v4, s63, 3 +; CHECK-NEXT: v_writelane_b32 v5, s56, 60 +; CHECK-NEXT: v_writelane_b32 v4, s64, 4 +; CHECK-NEXT: v_writelane_b32 v5, s57, 61 +; CHECK-NEXT: v_writelane_b32 v4, s65, 5 +; CHECK-NEXT: v_writelane_b32 v5, s58, 62 +; CHECK-NEXT: v_writelane_b32 v4, s66, 6 +; CHECK-NEXT: v_writelane_b32 v5, s59, 63 +; CHECK-NEXT: v_writelane_b32 v4, s67, 7 +; CHECK-NEXT: s_andn2_saveexec_b64 s[20:21], s[26:27] ; CHECK-NEXT: s_cbranch_execz .LBB0_10 ; CHECK-NEXT: ; %bb.4: ; %bb32 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[24:25] @@ -265,39 +264,35 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_waitcnt vmcnt(1) ; CHECK-NEXT: buffer_store_dwordx4 v[2:5], off, s[8:11], 0 ; CHECK-NEXT: .LBB0_6: ; %Flow12 -; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[22:23] +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[22:23] +; CHECK-NEXT: v_readlane_b32 s52, v5, 40 +; CHECK-NEXT: v_readlane_b32 s53, v5, 41 +; CHECK-NEXT: v_readlane_b32 s54, v5, 42 +; CHECK-NEXT: v_readlane_b32 s55, v5, 43 +; CHECK-NEXT: v_readlane_b32 s56, v5, 44 +; CHECK-NEXT: v_readlane_b32 s57, v5, 45 +; CHECK-NEXT: v_readlane_b32 s58, v5, 46 +; CHECK-NEXT: v_readlane_b32 s59, v5, 47 +; CHECK-NEXT: v_readlane_b32 s60, v5, 48 +; CHECK-NEXT: v_readlane_b32 s61, v5, 49 +; CHECK-NEXT: v_readlane_b32 s62, v5, 50 +; CHECK-NEXT: v_readlane_b32 s63, v5, 51 +; CHECK-NEXT: v_readlane_b32 s64, v5, 52 +; CHECK-NEXT: v_readlane_b32 s65, v5, 53 +; CHECK-NEXT: v_readlane_b32 s66, v5, 54 +; CHECK-NEXT: v_readlane_b32 s67, v5, 55 +; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_cbranch_execz .LBB0_9 ; CHECK-NEXT: ; %bb.7: ; %bb33.preheader ; CHECK-NEXT: s_mov_b32 s8, 0 ; CHECK-NEXT: s_mov_b32 s6, s8 -; CHECK-NEXT: v_readlane_b32 s36, v5, 40 ; CHECK-NEXT: s_mov_b32 s7, s8 ; CHECK-NEXT: v_mov_b32_e32 v2, s6 -; CHECK-NEXT: v_readlane_b32 s37, v5, 41 +; CHECK-NEXT: v_readlane_b32 s36, v5, 56 ; CHECK-NEXT: s_mov_b32 s9, s8 ; CHECK-NEXT: s_mov_b32 s10, s8 ; CHECK-NEXT: s_mov_b32 s11, s8 ; CHECK-NEXT: v_mov_b32_e32 v3, s7 -; CHECK-NEXT: v_readlane_b32 s38, v5, 42 -; CHECK-NEXT: v_readlane_b32 s39, v5, 43 -; CHECK-NEXT: v_readlane_b32 s40, v5, 44 -; CHECK-NEXT: v_readlane_b32 s41, v5, 45 -; CHECK-NEXT: v_readlane_b32 s42, v5, 46 -; CHECK-NEXT: v_readlane_b32 s43, v5, 47 -; CHECK-NEXT: v_readlane_b32 s44, v5, 48 -; CHECK-NEXT: v_readlane_b32 s45, v5, 49 -; CHECK-NEXT: v_readlane_b32 s46, v5, 50 -; CHECK-NEXT: v_readlane_b32 s47, v5, 51 -; CHECK-NEXT: v_readlane_b32 s48, v5, 52 -; CHECK-NEXT: v_readlane_b32 s49, v5, 53 -; CHECK-NEXT: v_readlane_b32 s50, v5, 54 -; CHECK-NEXT: v_readlane_b32 s51, v5, 55 -; CHECK-NEXT: s_mov_b64 s[12:13], s[36:37] -; CHECK-NEXT: s_mov_b64 s[14:15], s[38:39] -; CHECK-NEXT: s_mov_b64 s[16:17], s[40:41] -; CHECK-NEXT: s_mov_b64 s[18:19], s[42:43] -; CHECK-NEXT: image_sample_lz v6, v[2:3], s[36:43], s[8:11] dmask:0x1 -; CHECK-NEXT: v_readlane_b32 s36, v5, 56 ; CHECK-NEXT: v_readlane_b32 s37, v5, 57 ; CHECK-NEXT: v_readlane_b32 s38, v5, 58 ; CHECK-NEXT: v_readlane_b32 s39, v5, 59 @@ -305,19 +300,25 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: v_readlane_b32 s41, v5, 61 ; CHECK-NEXT: v_readlane_b32 s42, v5, 62 ; CHECK-NEXT: v_readlane_b32 s43, v5, 63 +; CHECK-NEXT: s_nop 4 +; CHECK-NEXT: image_sample_lz v6, v[2:3], s[36:43], s[8:11] dmask:0x1 +; CHECK-NEXT: image_sample_lz v7, v[2:3], s[52:59], s[8:11] dmask:0x1 ; CHECK-NEXT: ; kill: killed $vgpr2_vgpr3 +; CHECK-NEXT: s_mov_b64 s[12:13], s[36:37] ; CHECK-NEXT: s_and_b64 vcc, exec, 0 ; CHECK-NEXT: v_readlane_b32 s44, v4, 0 ; CHECK-NEXT: v_readlane_b32 s45, v4, 1 ; CHECK-NEXT: v_readlane_b32 s46, v4, 2 ; CHECK-NEXT: v_readlane_b32 s47, v4, 3 -; CHECK-NEXT: image_sample_lz v7, v[2:3], s[36:43], s[8:11] dmask:0x1 ; CHECK-NEXT: v_readlane_b32 s48, v4, 4 ; CHECK-NEXT: v_readlane_b32 s49, v4, 5 ; CHECK-NEXT: v_readlane_b32 s50, v4, 6 ; CHECK-NEXT: v_readlane_b32 s51, v4, 7 +; CHECK-NEXT: s_mov_b64 s[14:15], s[38:39] +; CHECK-NEXT: s_mov_b64 s[16:17], s[40:41] +; CHECK-NEXT: s_mov_b64 s[18:19], s[42:43] ; CHECK-NEXT: ; kill: killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 -; CHECK-NEXT: ; kill: killed $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 +; CHECK-NEXT: ; kill: killed $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 ; CHECK-NEXT: ; kill: killed $sgpr8_sgpr9_sgpr10 killed $sgpr11 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_sub_f32_e32 v2, v7, v6 diff --git a/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir b/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir index a5cceb622d3a4e..dca9ffad7e800c 100644 --- a/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir +++ b/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir @@ -1,6 +1,8 @@ -# RUN: not llc --crash -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -run-pass=greedy -filetype=null %s +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -run-pass=greedy --stress-regalloc=6 --verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -; This test would crash while trying to split a liverange during register allocator. +# The spills/copies during RA for scalar register block LiveIns should be inserted at the beginning of the block. +# The COPY inserted in bb.9 during liverange split should precede the SPILL that was inserted earlier in the flow. --- name: test_kernel @@ -129,14 +131,297 @@ machineFunctionInfo: stackPtrOffsetReg: '$sgpr32' sgprForEXECCopy: '$sgpr105' body: | - bb.0: + ; GCN-LABEL: name: test_kernel + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: dead undef [[DEF1:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF + ; GCN-NEXT: SI_SPILL_S32_SAVE $sgpr1, %stack.15, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.15, addrspace 5) + ; GCN-NEXT: undef [[COPY:%[0-9]+]].sub1:sgpr_64 = COPY $sgpr0 + ; GCN-NEXT: SI_SPILL_S64_SAVE [[COPY]], %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) + ; GCN-NEXT: undef [[V_READFIRSTLANE_B32_:%[0-9]+]].sub0:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF]], implicit $exec + ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]].sub1:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF]], implicit $exec + ; GCN-NEXT: undef [[V_READFIRSTLANE_B32_1:%[0-9]+]].sub0:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF]], implicit $exec + ; GCN-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]].sub1:sgpr_64 = IMPLICIT_DEF + ; GCN-NEXT: SI_SPILL_S64_SAVE [[V_READFIRSTLANE_B32_1]], %stack.19, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.19, align 4, addrspace 5) + ; GCN-NEXT: undef [[V_READFIRSTLANE_B32_2:%[0-9]+]].sub0:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF]], implicit $exec + ; GCN-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]].sub1:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF]], implicit $exec + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 0 + ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 0 + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_MOV_B32_1]], %stack.17, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.17, addrspace 5) + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: KILL [[DEF2]] + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef [[V_READFIRSTLANE_B32_2]], 132, 0 :: ("amdgpu-noclobber" load (s128), align 8, addrspace 1) + ; GCN-NEXT: SI_SPILL_S128_SAVE [[S_LOAD_DWORDX4_IMM]], %stack.14, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.14, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef [[V_READFIRSTLANE_B32_2]], 188, 0 :: ("amdgpu-noclobber" load (s256), align 8, addrspace 1) + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.4, implicit undef $scc + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.4(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 -1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_MOV_B32_]], %stack.9, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.9, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM undef [[V_READFIRSTLANE_B32_2]], 120, 0 :: ("amdgpu-noclobber" load (s64), align 16, addrspace 1) + ; GCN-NEXT: SI_SPILL_S64_SAVE [[S_LOAD_DWORDX2_IMM]], %stack.18, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.18, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM1:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef [[V_READFIRSTLANE_B32_2]], 352, 0 :: ("amdgpu-noclobber" load (s256), align 16, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM1]], %stack.10, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.10, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %97:sreg_64, 0, 0 + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_LOAD_DWORD_IMM]], %stack.11, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.11, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM2:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef [[V_READFIRSTLANE_B32_2]], 652, 0 :: ("amdgpu-noclobber" load (s256), align 8, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM2]], %stack.6, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.6, align 4, addrspace 5) + ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; GCN-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_MOV_B64_]], 0, 0 :: ("amdgpu-noclobber" load (s32), align 8, addrspace 1) + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_LOAD_DWORD_IMM1]], %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: SI_SPILL_S64_SAVE [[V_READFIRSTLANE_B32_2]], %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM3:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[V_READFIRSTLANE_B32_2]], 688, 0 :: ("amdgpu-noclobber" load (s256), align 16, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM3]], %stack.4, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.4, align 4, addrspace 5) + ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 0 + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.6, implicit undef $scc + ; GCN-NEXT: S_BRANCH %bb.5 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.5: + ; GCN-NEXT: successors: %bb.6(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 -1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.6: + ; GCN-NEXT: successors: %bb.7(0x40000000), %bb.10(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_MOV_B32_3]], %stack.5, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.5, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sgpr_32 = S_LOAD_DWORD_IMM undef %123:sgpr_64, 0, 0 :: ("amdgpu-noclobber" load (s32), align 16, addrspace 1) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM4:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %124:sgpr_64, 152, 0 :: ("amdgpu-noclobber" load (s256), align 4, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM4]], %stack.20, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.20, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM5:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %125:sgpr_64, 220, 0 :: ("amdgpu-noclobber" load (s256), align 4, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM5]], %stack.16, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.16, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM6:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %126:sgpr_64, 384, 0 :: ("amdgpu-noclobber" load (s256), align 4, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM6]], %stack.13, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.13, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM undef %127:sgpr_64, 440, 0 :: ("amdgpu-noclobber" load (s512), align 8, addrspace 1) + ; GCN-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM undef %128:sgpr_64, 584, 0 :: ("amdgpu-noclobber" load (s512), align 16, addrspace 1) + ; GCN-NEXT: SI_SPILL_S512_SAVE [[S_LOAD_DWORDX16_IMM1]], %stack.12, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.12, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORDX8_IMM7:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[V_READFIRSTLANE_B32_]], 156, 0 :: ("amdgpu-noclobber" load (s256), align 8, addrspace 1) + ; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM7]], %stack.8, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.8, align 4, addrspace 5) + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.19, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.19, align 4, addrspace 5) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sgpr_32 = S_LOAD_DWORD_IMM [[SI_SPILL_S64_RESTORE]], 0, 0 :: ("amdgpu-noclobber" load (s32), align 8, addrspace 1) + ; GCN-NEXT: SI_SPILL_S32_SAVE [[S_LOAD_DWORD_IMM3]], %stack.7, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.7, addrspace 5) + ; GCN-NEXT: SI_SPILL_S64_SAVE [[V_READFIRSTLANE_B32_]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[V_READFIRSTLANE_B32_]] + ; GCN-NEXT: dead [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY1]], 0, 0 :: ("amdgpu-noclobber" load (s32), addrspace 1) + ; GCN-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; GCN-NEXT: [[S_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_MOV_B64_1]], 0, 0 :: ("amdgpu-noclobber" load (s32), addrspace 1) + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE1:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: undef [[COPY2:%[0-9]+]].sub1:sgpr_64 = COPY [[SI_SPILL_S64_RESTORE1]].sub1 + ; GCN-NEXT: [[COPY2:%[0-9]+]].sub0:sgpr_64 = S_MOV_B32 1 + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.10, implicit undef $scc + ; GCN-NEXT: S_BRANCH %bb.7 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.7: + ; GCN-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: SI_SPILL_S64_SAVE [[COPY2]], %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) + ; GCN-NEXT: undef [[V_READFIRSTLANE_B32_3:%[0-9]+]].sub0:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF1]].sub0, implicit $exec + ; GCN-NEXT: dead [[V_READFIRSTLANE_B32_3:%[0-9]+]].sub1:sgpr_64 = V_READFIRSTLANE_B32 undef [[DEF1]].sub1, implicit $exec + ; GCN-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; GCN-NEXT: $vcc = COPY [[DEF3]] + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.9, implicit $vcc + ; GCN-NEXT: S_BRANCH %bb.8 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.8: + ; GCN-NEXT: successors: %bb.9(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 -1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.9: + ; GCN-NEXT: successors: %bb.10(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = COPY [[S_MOV_B32_4]] + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.10: + ; GCN-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORD_IMM2]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.17, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.17, addrspace 5) + ; GCN-NEXT: dead [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[V_CMP_GT_F32_e64_]], [[SI_SPILL_S32_RESTORE]], implicit-def dead $scc + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE1:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.15, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.15, addrspace 5) + ; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S32_RESTORE1]], 0, implicit-def $scc + ; GCN-NEXT: dead [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE2:%[0-9]+]]:sreg_64_xexec = SI_SPILL_S64_RESTORE %stack.18, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.18, align 4, addrspace 5) + ; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S64_RESTORE2]].sub1, 0, implicit-def $scc + ; GCN-NEXT: dead [[DEF5:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.20, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.20, align 4, addrspace 5) + ; GCN-NEXT: undef [[COPY3:%[0-9]+]].sub0:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE]].sub0 { + ; GCN-NEXT: internal [[COPY3]].sub2:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE]].sub2 + ; GCN-NEXT: internal [[COPY3]].sub4:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE]].sub4 + ; GCN-NEXT: internal [[COPY3]].sub7:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE]].sub7 + ; GCN-NEXT: } + ; GCN-NEXT: dead [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY3]].sub7, [[S_LOAD_DWORD_IMM5]], implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY3]].sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_2:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY3]].sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_3:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY3]].sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LOAD_DWORDX8_IMM]].sub0, undef [[S_OR_B32_]], implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_4:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub1, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_5:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_6:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub3, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_7:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_8:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub5, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_9:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub6, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S128_RESTORE:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.14, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.14, align 4, addrspace 5) + ; GCN-NEXT: undef [[COPY4:%[0-9]+]].sub0_sub1_sub2:sgpr_128 = COPY [[SI_SPILL_S128_RESTORE]].sub0_sub1_sub2 + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_10:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY4]].sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_11:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY4]].sub1, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_12:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY4]].sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[DEF6:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF5]], [[DEF6]], implicit-def dead $scc + ; GCN-NEXT: dead [[DEF7:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[DEF9:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[DEF10:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[DEF11:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF11]], undef [[DEF11]], implicit-def dead $scc + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE1:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.16, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.16, align 4, addrspace 5) + ; GCN-NEXT: undef [[COPY5:%[0-9]+]].sub0:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE1]].sub0 { + ; GCN-NEXT: internal [[COPY5]].sub2:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE1]].sub2 + ; GCN-NEXT: internal [[COPY5]].sub5:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE1]].sub5 + ; GCN-NEXT: internal [[COPY5]].sub7:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE1]].sub7 + ; GCN-NEXT: } + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_13:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY5]].sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_AND_B32_3:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[V_CMP_GT_F32_e64_8]], undef [[V_CMP_GT_F32_e64_9]], implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_14:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY5]].sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY5]].sub5, [[COPY5]].sub7, implicit-def dead $scc + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE2:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.10, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.10, align 4, addrspace 5) + ; GCN-NEXT: undef [[COPY6:%[0-9]+]].lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE2]].lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16 + ; GCN-NEXT: dead [[S_OR_B32_3:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY6]].sub0, [[COPY6]].sub1, implicit-def dead $scc + ; GCN-NEXT: dead [[S_OR_B32_4:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY6]].sub2, undef [[S_OR_B32_3]], implicit-def dead $scc + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE2:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.9, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.9, addrspace 5) + ; GCN-NEXT: dead [[S_AND_B32_4:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[S_OR_B32_3]], [[SI_SPILL_S32_RESTORE2]], implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_15:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY6]].sub3, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_16:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY6]].sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_17:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY6]].sub5, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_18:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY6]].sub6, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE3:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.11, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.11, addrspace 5) + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_19:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[SI_SPILL_S32_RESTORE3]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE3:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.13, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.13, align 4, addrspace 5) + ; GCN-NEXT: undef [[COPY7:%[0-9]+]].sub0:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE3]].sub0 { + ; GCN-NEXT: internal [[COPY7]].sub2:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE3]].sub2 + ; GCN-NEXT: internal [[COPY7]].sub4:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE3]].sub4 + ; GCN-NEXT: internal [[COPY7]].sub7:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE3]].sub7 + ; GCN-NEXT: } + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_20:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY7]].sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_21:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY7]].sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[DEF12:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_22:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY7]].sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_AND_B32_5:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF12]], undef [[V_CMP_GT_F32_e64_20]], implicit-def dead $scc + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY7]].sub7, 0, implicit-def $scc + ; GCN-NEXT: undef [[COPY8:%[0-9]+]].sub0:sgpr_512 = COPY [[S_LOAD_DWORDX16_IMM]].sub0 { + ; GCN-NEXT: internal [[COPY8]].sub2:sgpr_512 = COPY [[S_LOAD_DWORDX16_IMM]].sub2 + ; GCN-NEXT: internal [[COPY8]].sub4:sgpr_512 = COPY [[S_LOAD_DWORDX16_IMM]].sub4 + ; GCN-NEXT: internal [[COPY8]].sub6:sgpr_512 = COPY [[S_LOAD_DWORDX16_IMM]].sub6 + ; GCN-NEXT: internal [[COPY8]].sub9:sgpr_512 = COPY [[S_LOAD_DWORDX16_IMM]].sub9 + ; GCN-NEXT: internal [[COPY8]].sub10:sgpr_512 = COPY [[S_LOAD_DWORDX16_IMM]].sub10 + ; GCN-NEXT: internal [[COPY8]].sub13:sgpr_512 = COPY [[S_LOAD_DWORDX16_IMM]].sub13 + ; GCN-NEXT: internal [[COPY8]].sub14:sgpr_512 = COPY [[S_LOAD_DWORDX16_IMM]].sub14 + ; GCN-NEXT: } + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_23:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY8]].sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_24:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY8]].sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_25:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY8]].sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_26:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY8]].sub6, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_AND_B32_6:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[V_CMP_GT_F32_e64_23]], undef [[V_CMP_GT_F32_e64_23]], implicit-def dead $scc + ; GCN-NEXT: dead [[S_OR_B32_5:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY8]].sub10, [[COPY8]].sub9, implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_27:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY8]].sub13, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_28:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY8]].sub14, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.12, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.12, align 4, addrspace 5) + ; GCN-NEXT: undef [[COPY9:%[0-9]+]].sub1:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub1 { + ; GCN-NEXT: internal [[COPY9]].sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub5 + ; GCN-NEXT: internal [[COPY9]].sub6:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub6 + ; GCN-NEXT: internal [[COPY9]].sub9:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub9 + ; GCN-NEXT: internal [[COPY9]].sub10:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10 + ; GCN-NEXT: internal [[COPY9]].sub12:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub12 + ; GCN-NEXT: internal [[COPY9]].sub15:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub15 + ; GCN-NEXT: } + ; GCN-NEXT: S_CMP_EQ_U32 [[COPY9]].sub1, 0, implicit-def $scc + ; GCN-NEXT: dead [[DEF13:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_29:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY9]].sub5, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_30:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY9]].sub6, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[DEF14:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_31:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY9]].sub9, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_32:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY9]].sub10, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[DEF15:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[S_AND_B32_7:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF15]], undef [[DEF14]], implicit-def dead $scc + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_33:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY9]].sub12, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE4:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.6, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.6, align 4, addrspace 5) + ; GCN-NEXT: undef [[COPY10:%[0-9]+]].lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE4]].lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16 + ; GCN-NEXT: dead [[S_OR_B32_6:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY10]].sub0, [[COPY9]].sub15, implicit-def dead $scc + ; GCN-NEXT: dead [[DEF16:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_34:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY10]].sub1, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_35:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY10]].sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[DEF17:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_36:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY10]].sub3, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_37:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY10]].sub4, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[DEF18:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_38:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY10]].sub5, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_39:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY10]].sub6, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_AND_B32_8:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF18]], undef [[DEF17]], implicit-def dead $scc + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE5:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.4, align 4, addrspace 5) + ; GCN-NEXT: undef [[COPY11:%[0-9]+]].sub0_sub1_sub2_sub3_sub4_sub5:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE5]].sub0_sub1_sub2_sub3_sub4_sub5 + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_40:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY11]].sub0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_41:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY11]].sub1, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE4:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_42:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[SI_SPILL_S32_RESTORE4]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_43:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY11]].sub2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_44:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY11]].sub3, 0, implicit $mode, implicit $exec + ; GCN-NEXT: dead [[S_OR_B32_7:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY11]].sub4, [[COPY11]].sub5, implicit-def dead $scc + ; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S32_RESTORE4]], 0, implicit-def $scc + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE5:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.5, addrspace 5) + ; GCN-NEXT: dead [[S_AND_B32_9:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[S_OR_B32_7]], [[SI_SPILL_S32_RESTORE5]], implicit-def dead $scc + ; GCN-NEXT: dead [[S_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY2]], 0, 0 :: ("amdgpu-noclobber" load (s32), align 8, addrspace 1) + ; GCN-NEXT: [[SI_SPILL_S256_RESTORE6:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.8, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.8, align 4, addrspace 5) + ; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S256_RESTORE6]].sub7, 0, implicit-def $scc + ; GCN-NEXT: [[SI_SPILL_S32_RESTORE6:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.7, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.7, addrspace 5) + ; GCN-NEXT: dead [[V_CMP_GT_F32_e64_45:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[SI_SPILL_S32_RESTORE6]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[DEF19:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: dead [[S_AND_B32_10:%[0-9]+]]:sreg_32 = S_AND_B32 [[DEF19]], undef [[S_LOAD_DWORD_IMM6]], implicit-def dead $scc + ; GCN-NEXT: dead [[S_AND_B32_11:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[S_AND_B32_10]], [[S_MOV_B32_2]], implicit-def dead $scc + ; GCN-NEXT: $vcc = COPY undef [[S_AND_B32_11]] + ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.12, implicit $vcc + ; GCN-NEXT: S_BRANCH %bb.11 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.11: + ; GCN-NEXT: successors: %bb.12(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.12: + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE3:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5) + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], [[SI_SPILL_S64_RESTORE3]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GCN-NEXT: [[SI_SPILL_S64_RESTORE4:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], [[SI_SPILL_S64_RESTORE4]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; GCN-NEXT: S_ENDPGM 0 + bb.0: successors: %bb.1, %bb.2 liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13 %0:vgpr_32 = IMPLICIT_DEF undef %1.sub1:vreg_64 = IMPLICIT_DEF - %109:sgpr_32 = COPY undef $sgpr1 - undef %93.sub1:sgpr_64 = COPY undef $sgpr0 + %109:sgpr_32 = COPY $sgpr1 + undef %93.sub1:sgpr_64 = COPY $sgpr0 undef %106.sub0:sgpr_64 = V_READFIRSTLANE_B32 undef %0, implicit $exec %106.sub1:sgpr_64 = V_READFIRSTLANE_B32 undef %0, implicit $exec undef %105.sub0:sgpr_64 = V_READFIRSTLANE_B32 undef %0, implicit $exec