[LLVM] Add 'ExpandVariadicsPass' to LTO default pipeline (llvm#100479)

Summary: This pass expands variadic functions into non-variadic function calls according to the target ABI. Currently, this is used as the lowering for the NVPTX and AMDGPU targets. This pass is currently only run late in the target's backend. However, during LTO we want to run it before the inliner pass so that the expanded functions can be inlined using standard heuristics. This pass is a no-op for unsupported targets, so this won't apply to any code that isn't already using it.
jsjodin · Jul 25, 2024 · 8758091 · 8758091
1 parent a0c5907
commit 8758091
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 0 deletions.
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/IPO/GlobalDCE.h"
@@ -1874,6 +1875,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
                                                 PTO.EagerlyInvalidateAnalyses));
 
+  // Lower variadic functions for supported targets prior to inlining.
+  MPM.addPass(ExpandVariadicsPass(ExpandVariadicsMode::Optimize));
+
   // Note: historically, the PruneEH pass was run first to deduce nounwind and
   // generally clean up exception handling overhead. It isn't clear this is
   // valuable as the inliner doesn't currently care whether it is inlining an

diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -69,6 +69,7 @@
 ; CHECK-O23SZ-NEXT: Running pass: InstCombinePass
 ; CHECK-O23SZ-NEXT: Running pass: AggressiveInstCombinePass
 ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
+; CHECK-O23SZ-NEXT: Running pass: ExpandVariadicsPass
 ; CHECK-O23SZ-NEXT: Running pass: ModuleInlinerWrapperPass
 ; CHECK-O23SZ-NEXT: Running analysis: InlineAdvisorAnalysis
 ; CHECK-O23SZ-NEXT: Running pass: InlinerPass

diff --git a/llvm/test/Transforms/PhaseOrdering/varargs.ll b/llvm/test/Transforms/PhaseOrdering/varargs.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-- -S -passes='lto<O2>' < %s | FileCheck %s
+target triple = "amdgcn-amd-amdhsa"
+
+; We use the ExpandVariadics pass to lower variadic functions so they can be
+; inlined.
+
+define i32 @foo() {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i32 6
+;
+entry:
+  %call = tail call i32 (i32, ...) @vararg(i32 poison, i32 noundef 1, i32 noundef 2, i32 noundef 3)
+  ret i32 %call
+}
+
+define internal i32 @vararg(i32 %first, ...) {
+entry:
+  %vlist = alloca ptr, align 8
+  call void @llvm.va_start.p0(ptr %vlist)
+  %vlist.promoted = load ptr, ptr %vlist, align 8
+  %argp.a = getelementptr inbounds i8, ptr %vlist.promoted, i64 4
+  store ptr %argp.a, ptr %vlist, align 8
+  %a = load i32, ptr %vlist.promoted, align 4
+  %argp.b = getelementptr inbounds i8, ptr %vlist.promoted, i64 8
+  store ptr %argp.b, ptr %vlist, align 8
+  %b = load i32, ptr %argp.a, align 4
+  %sum = add nsw i32 %b, %a
+  %argp.c = getelementptr inbounds i8, ptr %vlist.promoted, i64 12
+  store ptr %argp.c, ptr %vlist, align 8
+  %c = load i32, ptr %argp.b, align 4
+  %ret = add nsw i32 %c, %sum
+  call void @llvm.va_end.p0(ptr %vlist)
+  ret i32 %ret
+}
+
+declare void @llvm.va_start.p0(ptr)
+
+declare void @llvm.va_end.p0(ptr)