-
Notifications
You must be signed in to change notification settings - Fork 116
/
Copy pathSubgroupClusteredReduction.lgc
51 lines (47 loc) · 3.28 KB
/
SubgroupClusteredReduction.lgc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc
; RUN: lgc -o - --mcpu=gfx1100 --emit-llvm %s | FileCheck -check-prefixes=CHECK %s
define dllexport spir_func i32 @fn(i32 %value1, i32 %value2) !lgc.shaderstage !0 {
.entry:
%r1 = call i32 (...) @lgc.create.subgroup.clustered.reduction.i32(i32 11, i32 %value1, i32 64)
%r2 = call i32 (...) @lgc.create.subgroup.clustered.reduction.i32(i32 11, i32 %value2, i32 32)
%r = add i32 %r1, %r2
ret i32 %r
}
declare i32 @lgc.create.subgroup.clustered.reduction.i32(...)
; ShaderStage::Compute
!0 = !{i32 7}
; Setting Threadgroup Dimensions to 64 x 1 x 1
!llpc.compute.mode = !{!1}
!1 = !{i32 64, i32 1, i32 1}
; CHECK-LABEL: @_amdgpu_cs_main(
; CHECK-NEXT: .entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[VALUE1:%.*]], i32 0)
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP0]], i32 177, i32 15, i32 15, i1 true)
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP2]], i32 78, i32 15, i32 15, i1 true)
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP4]], i32 321, i32 15, i32 15, i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP6]], i32 320, i32 15, i32 15, i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 undef, i32 [[TMP8]], i32 -1, i32 -1, i1 true, i1 false)
; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.permlane64{{(.i32)?}}(i32 [[TMP10]])
; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 [[TMP12]])
; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[VALUE2:%.*]], i32 0)
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP14]], i32 177, i32 15, i32 15, i1 true)
; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP16]], i32 78, i32 15, i32 15, i1 true)
; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP18]], i32 321, i32 15, i32 15, i1 true)
; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]]
; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP20]], i32 320, i32 15, i32 15, i1 true)
; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP20]], [[TMP21]]
; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 undef, i32 [[TMP22]], i32 -1, i32 -1, i1 true, i1 false)
; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP22]], [[TMP23]]
; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP24]])
; CHECK-NEXT: [[R:%.*]] = add i32 [[R2]], [[R1]]
; CHECK-NEXT: ret i32 [[R]]
;