-
Notifications
You must be signed in to change notification settings - Fork 116
/
Copy pathScalarizeInputWithDynamicIndexUser.lgc
100 lines (90 loc) · 6.28 KB
/
ScalarizeInputWithDynamicIndexUser.lgc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function lgc.shader.FS.main
; Check that if the generic input has an extract instruction user whose index is dynamic, the input should be scalarized for each component.
; RUN: lgc -mcpu=gfx1010 -stop-after=lgc-collect-resource-usage %s -o=- | FileCheck %s
; ModuleID = 'lgcPipeline'
source_filename = "lgcPipeline"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn--amdpal"
; Function Attrs: nounwind
define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !15 {
.entry:
%0 = call <4 x float> (...) @lgc.load.vertex.input__v4f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 poison, i32 poison)
%1 = call <4 x float> (...) @lgc.load.vertex.input__v4f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison)
%2 = call <4 x float> (...) @lgc.load.vertex.input__v4f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison)
call void (...) @lgc.create.write.generic.output(<4 x float> %2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef)
call void (...) @lgc.create.write.generic.output(<4 x float> %1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 undef)
call void (...) @lgc.create.write.generic.output(<4 x float> %0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 undef)
ret void
}
; Function Attrs: nounwind readonly willreturn
declare <4 x float> @lgc.load.vertex.input__v4f32(...) local_unnamed_addr #1
; Function Attrs: nounwind
declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0
; Function Attrs: nounwind
define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !lgc.shaderstage !16 {
; CHECK-LABEL: @lgc.shader.FS.main(
; CHECK-NEXT: .entry:
; CHECK-NEXT: [[INTERPPERSPCENTER:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[TMP0:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 2, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]])
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 2, i32 0, i32 1, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]])
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[TMP2]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 2, i32 0, i32 2, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]])
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP4]], i64 2
; CHECK-NEXT: [[TMP6:%.*]] = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 2, i32 0, i32 3, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]])
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP6]], i64 3
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 12816, i32 0, i32 4)
; CHECK-NEXT: [[TMP9:%.*]] = select i1 false, i32 12816, i32 [[TMP8]]
; CHECK-NEXT: [[IDX0:%.*]] = select i1 false, i32 0, i32 [[TMP9]]
; CHECK-NEXT: [[INPUT1:%.*]] = extractelement <4 x float> [[TMP7]], i32 [[IDX0]]
; CHECK-NEXT: [[OUTPUT:%.*]] = insertelement <4 x float> undef, float [[INPUT1]], i32 0
; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[OUTPUT]]) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: ret void
;
.entry:
%input0 = call <4 x float> (...) @lgc.create.read.generic.input__v4f32(i32 2, i32 0, i32 0, i32 0, i32 16, i32 poison)
%idx0 = call i32 (...) @lgc.create.extract.bit.field__i32(i32 12816, i32 0, i32 4, i1 false)
%input1 = extractelement <4 x float> %input0, i32 %idx0
%output = insertelement <4 x float> undef, float %input1, i32 0
call void (...) @lgc.create.write.generic.output(<4 x float> %output, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef)
ret void
}
; Function Attrs: nounwind readnone
declare i32 @lgc.create.extract.bit.field__i32(...) local_unnamed_addr #3
; Function Attrs: nounwind readonly willreturn
declare <4 x float> @lgc.create.read.generic.input__v4f32(...) local_unnamed_addr #5
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly willreturn }
attributes #2 = { argmemonly nofree nosync nounwind willreturn }
attributes #3 = { nounwind readnone }
attributes #4 = { nofree nosync nounwind readnone speculatable willreturn }
attributes #5 = { nounwind readonly willreturn }
!llpc.shader.mode.VS = !{!0}
!llpc.shader.mode.FS = !{!0}
!lgc.client = !{!1}
!lgc.options = !{!2}
!lgc.options.VS = !{!3}
!lgc.options.FS = !{!4}
!lgc.user.data.nodes = !{!5, !6, !7}
!lgc.vertex.inputs = !{!8, !9, !10}
!lgc.color.export.formats = !{!11}
!lgc.input.assembly.state = !{!12}
!lgc.rasterizer.state = !{!13}
!amdgpu.pal.metadata.msgpack = !{!14}
!0 = !{i32 0, i32 0, i32 0, i32 1}
!1 = !{!"Vulkan"}
!2 = !{i32 -1861977700, i32 -726079217, i32 -248869534, i32 2063972137, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 1}
!3 = !{i32 1581426283, i32 -161330761, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800}
!4 = !{i32 -1618560552, i32 -939433038, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800}
!5 = !{!"StreamOutTableVaPtr", i32 0, i32 0, i32 0, i32 1, i32 0}
!6 = !{!"DescriptorBuffer", i32 6, i32 0, i32 1, i32 4, i32 0, i32 0, i32 4}
!7 = !{!"IndirectUserDataVaPtr", i32 0, i32 0, i32 5, i32 1, i32 12}
!8 = !{i32 0, i32 0, i32 0, i32 12, i32 11, i32 7, i32 -1}
!9 = !{i32 1, i32 1, i32 0, i32 12, i32 10, i32 0, i32 -1}
!10 = !{i32 2, i32 2, i32 0, i32 4, i32 4, i32 7, i32 -1}
!11 = !{i32 10, i32 0, i32 1, i32 1}
!12 = !{i32 2, i32 3}
!13 = !{i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 1}
!14 = !{!"\82\B0amdpal.pipelines\91\84\AA.registers\80\B0.spill_threshold\CE\FF\FF\FF\FF\B0.user_data_limit\00\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\D2=9\F1\99\1BM\8E\CF\A8y\98tI\99\D7'\AD.llpc_version\A446.1\AEamdpal.version\92\02\03"}
!15 = !{i32 1}
!16 = !{i32 6}