forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[LLVM] Add 'ExpandVariadicsPass' to LTO default pipeline (llvm#100479)
Summary: This pass expands variadic functions into non-variadic function calls according to the target ABI. Currently, this is used as the lowering for the NVPTX and AMDGPU targets. This pass is currently only run late in the target's backend. However, during LTO we want to run it before the inliner pass so that the expanded functions can be inlined using standard heuristics. This pass is a no-op for unsupported targets, so this won't apply to any code that isn't already using it.
- Loading branch information
Showing
3 changed files
with
46 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: opt -mtriple=amdgcn-- -S -passes='lto<O2>' < %s | FileCheck %s | ||
target triple = "amdgcn-amd-amdhsa" | ||
|
||
; We use the ExpandVariadics pass to lower variadic functions so they can be | ||
; inlined. | ||
|
||
define i32 @foo() { | ||
; CHECK-LABEL: define i32 @foo( | ||
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { | ||
; CHECK-NEXT: [[ENTRY:.*:]] | ||
; CHECK-NEXT: ret i32 6 | ||
; | ||
entry: | ||
%call = tail call i32 (i32, ...) @vararg(i32 poison, i32 noundef 1, i32 noundef 2, i32 noundef 3) | ||
ret i32 %call | ||
} | ||
|
||
define internal i32 @vararg(i32 %first, ...) { | ||
entry: | ||
%vlist = alloca ptr, align 8 | ||
call void @llvm.va_start.p0(ptr %vlist) | ||
%vlist.promoted = load ptr, ptr %vlist, align 8 | ||
%argp.a = getelementptr inbounds i8, ptr %vlist.promoted, i64 4 | ||
store ptr %argp.a, ptr %vlist, align 8 | ||
%a = load i32, ptr %vlist.promoted, align 4 | ||
%argp.b = getelementptr inbounds i8, ptr %vlist.promoted, i64 8 | ||
store ptr %argp.b, ptr %vlist, align 8 | ||
%b = load i32, ptr %argp.a, align 4 | ||
%sum = add nsw i32 %b, %a | ||
%argp.c = getelementptr inbounds i8, ptr %vlist.promoted, i64 12 | ||
store ptr %argp.c, ptr %vlist, align 8 | ||
%c = load i32, ptr %argp.b, align 4 | ||
%ret = add nsw i32 %c, %sum | ||
call void @llvm.va_end.p0(ptr %vlist) | ||
ret i32 %ret | ||
} | ||
|
||
declare void @llvm.va_start.p0(ptr) | ||
|
||
declare void @llvm.va_end.p0(ptr) |