From 9bfd70d5f6e906588a067029b2c4b51c34e69efa Mon Sep 17 00:00:00 2001 From: Shivarama Rao Date: Fri, 26 Jul 2024 05:53:35 +0000 Subject: [PATCH] Fix for Issue #1413 (cherry-pick #179 to release_18x) PGMATH has AVX512 runtime functions and can be executed only when the application is compiled in avx512 mode. The VecFuncs.def has no information about the TargetOptions and avx512 functions are selected even in avx2 mode. This issue is fixed by creating separate table for AVX512 functions and using them only when avx512 mode is specified. --- clang/lib/CodeGen/BackendUtil.cpp | 14 +- .../include/llvm/Analysis/TargetLibraryInfo.h | 1 + llvm/include/llvm/Analysis/VecFuncs.def | 226 ++++++++++-------- .../llvm/Frontend/Driver/CodeGenOptions.h | 3 +- llvm/lib/Analysis/TargetLibraryInfo.cpp | 10 + llvm/lib/Frontend/Driver/CodeGenOptions.cpp | 6 +- 6 files changed, 151 insertions(+), 109 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7877e20d77f7..db43a6246ad2 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -557,8 +557,11 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, raw_pwrite_stream &OS, raw_pwrite_stream *DwoOS) { // Add LibraryInfo. - std::unique_ptr TLII( - llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib())); + bool TargetHasAVX512 = + std::find(TargetOpts.Features.begin(), TargetOpts.Features.end(), + "+avx512f") != TargetOpts.Features.end(); + std::unique_ptr TLII(llvm::driver::createTLII( + TargetTriple, CodeGenOpts.getVecLib(), TargetHasAVX512)); CodeGenPasses.add(new TargetLibraryInfoWrapperPass(*TLII)); // Normal mode, emit a .s or .o file by running the code generator. Note, @@ -890,8 +893,11 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // Register the target library analysis directly and give it a customized // preset TLI. - std::unique_ptr TLII( - llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib())); + bool TargetHasAVX512 = + std::find(TargetOpts.Features.begin(), TargetOpts.Features.end(), + "+avx512f") != TargetOpts.Features.end(); + std::unique_ptr TLII(llvm::driver::createTLII( + TargetTriple, CodeGenOpts.getVecLib(), TargetHasAVX512)); FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); // Register all the basic analyses with the managers. diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 0a03c7cb955d..2c986b7750f2 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -130,6 +130,7 @@ class TargetLibraryInfoImpl { MASSV, // IBM MASS vector library. #ifdef ENABLE_CLASSIC_FLANG PGMATH, // PGI math library. + PGMATH_AVX512, // PGI math library (AVX512 subset). #endif SVML, // Intel short vector math library. SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions. diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 1cabefc95e32..a41bb1398c5d 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -1243,418 +1243,437 @@ TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fd_sin_1", "__fd_sin_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_sin_1", "__fd_sin_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_sin_1", "__fd_sin_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_sin_1", "__fs_sin_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_sin_1", "__fs_sin_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_sin_1", "__fs_sin_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_sin_1", "__pd_sin_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_sin_1", "__pd_sin_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_sin_1", "__pd_sin_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_sin_1", "__ps_sin_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_sin_1", "__ps_sin_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_sin_1", "__ps_sin_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_sin_1", "__rd_sin_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_sin_1", "__rd_sin_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_sin_1", "__rd_sin_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_sin_1", "__rs_sin_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_sin_1", "__rs_sin_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_sin_1", "__rs_sin_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fd_cos_1", "__fd_cos_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_cos_1", "__fd_cos_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_cos_1", "__fd_cos_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_cos_1", "__fs_cos_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_cos_1", "__fs_cos_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_cos_1", "__fs_cos_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_cos_1", "__pd_cos_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_cos_1", "__pd_cos_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_cos_1", "__pd_cos_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_cos_1", "__ps_cos_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_cos_1", "__ps_cos_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_cos_1", "__ps_cos_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_cos_1", "__rd_cos_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_cos_1", "__rd_cos_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_cos_1", "__rd_cos_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_cos_1", "__rs_cos_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_cos_1", "__rs_cos_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_cos_1", "__rs_cos_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fd_tan_1", "__fd_tan_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_tan_1", "__fd_tan_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_tan_1", "__fd_tan_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_tan_1", "__fs_tan_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_tan_1", "__fs_tan_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_tan_1", "__fs_tan_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_tan_1", "__pd_tan_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_tan_1", "__pd_tan_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_tan_1", "__pd_tan_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_tan_1", "__ps_tan_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_tan_1", "__ps_tan_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_tan_1", "__ps_tan_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_tan_1", "__rd_tan_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_tan_1", "__rd_tan_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_tan_1", "__rd_tan_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_tan_1", "__rs_tan_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_tan_1", "__rs_tan_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_tan_1", "__rs_tan_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fd_sinh_1", "__fd_sinh_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_sinh_1", "__fd_sinh_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_sinh_1", "__fd_sinh_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_sinh_1", "__fs_sinh_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_sinh_1", "__fs_sinh_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_sinh_1", "__fs_sinh_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_sinh_1", "__pd_sinh_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_sinh_1", "__pd_sinh_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_sinh_1", "__pd_sinh_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_sinh_1", "__ps_sinh_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_sinh_1", "__ps_sinh_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_sinh_1", "__ps_sinh_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_sinh_1", "__rd_sinh_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_sinh_1", "__rd_sinh_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_sinh_1", "__rd_sinh_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_sinh_1", "__rs_sinh_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_sinh_1", "__rs_sinh_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_sinh_1", "__rs_sinh_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fd_cosh_1", "__fd_cosh_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_cosh_1", "__fd_cosh_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_cosh_1", "__fd_cosh_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_cosh_1", "__fs_cosh_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_cosh_1", "__fs_cosh_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_cosh_1", "__fs_cosh_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_cosh_1", "__pd_cosh_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_cosh_1", "__pd_cosh_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_cosh_1", "__pd_cosh_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_cosh_1", "__ps_cosh_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_cosh_1", "__ps_cosh_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_cosh_1", "__ps_cosh_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_cosh_1", "__rd_cosh_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_cosh_1", "__rd_cosh_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_cosh_1", "__rd_cosh_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_cosh_1", "__rs_cosh_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_cosh_1", "__rs_cosh_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_cosh_1", "__rs_cosh_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fd_tanh_1", "__fd_tanh_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_tanh_1", "__fd_tanh_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_tanh_1", "__fd_tanh_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_tanh_1", "__fs_tanh_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_tanh_1", "__fs_tanh_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_tanh_1", "__fs_tanh_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_tanh_1", "__pd_tanh_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_tanh_1", "__pd_tanh_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_tanh_1", "__pd_tanh_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_tanh_1", "__ps_tanh_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_tanh_1", "__ps_tanh_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_tanh_1", "__ps_tanh_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_tanh_1", "__rd_tanh_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_tanh_1", "__rd_tanh_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_tanh_1", "__rd_tanh_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_tanh_1", "__rs_tanh_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_tanh_1", "__rs_tanh_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_tanh_1", "__rs_tanh_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fd_asin_1", "__fd_asin_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_asin_1", "__fd_asin_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_asin_1", "__fd_asin_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_asin_1", "__fs_asin_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_asin_1", "__fs_asin_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_asin_1", "__fs_asin_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_asin_1", "__pd_asin_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_asin_1", "__pd_asin_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_asin_1", "__pd_asin_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_asin_1", "__ps_asin_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_asin_1", "__ps_asin_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_asin_1", "__ps_asin_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_asin_1", "__rd_asin_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_asin_1", "__rd_asin_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_asin_1", "__rd_asin_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_asin_1", "__rs_asin_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_asin_1", "__rs_asin_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_asin_1", "__rs_asin_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fd_acos_1", "__fd_acos_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_acos_1", "__fd_acos_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_acos_1", "__fd_acos_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_acos_1", "__fs_acos_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_acos_1", "__fs_acos_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_acos_1", "__fs_acos_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_acos_1", "__pd_acos_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_acos_1", "__pd_acos_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_acos_1", "__pd_acos_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_acos_1", "__ps_acos_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_acos_1", "__ps_acos_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_acos_1", "__ps_acos_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_acos_1", "__rd_acos_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_acos_1", "__rd_acos_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_acos_1", "__rd_acos_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_acos_1", "__rs_acos_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_acos_1", "__rs_acos_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_acos_1", "__rs_acos_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fd_atan_1", "__fd_atan_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_atan_1", "__fd_atan_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_atan_1", "__fd_atan_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_atan_1", "__fs_atan_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_atan_1", "__fs_atan_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_atan_1", "__fs_atan_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_atan_1", "__pd_atan_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_atan_1", "__pd_atan_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_atan_1", "__pd_atan_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_atan_1", "__ps_atan_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_atan_1", "__ps_atan_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_atan_1", "__ps_atan_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_atan_1", "__rd_atan_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_atan_1", "__rd_atan_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_atan_1", "__rd_atan_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_atan_1", "__rs_atan_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_atan_1", "__rs_atan_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_atan_1", "__rs_atan_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fd_atan2_1", "__fd_atan2_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__fd_atan2_1", "__fd_atan2_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__fd_atan2_1", "__fd_atan2_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__fs_atan2_1", "__fs_atan2_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__fs_atan2_1", "__fs_atan2_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__fs_atan2_1", "__fs_atan2_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__pd_atan2_1", "__pd_atan2_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__pd_atan2_1", "__pd_atan2_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__pd_atan2_1", "__pd_atan2_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__ps_atan2_1", "__ps_atan2_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__ps_atan2_1", "__ps_atan2_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__ps_atan2_1", "__ps_atan2_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__rd_atan2_1", "__rd_atan2_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__rd_atan2_1", "__rd_atan2_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__rd_atan2_1", "__rd_atan2_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__rs_atan2_1", "__rs_atan2_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__rs_atan2_1", "__rs_atan2_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__rs_atan2_1", "__rs_atan2_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__fd_pow_1", "__fd_pow_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__fd_pow_1", "__fd_pow_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__fd_pow_1", "__fd_pow_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__fs_pow_1", "__fs_pow_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__fs_pow_1", "__fs_pow_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__fs_pow_1", "__fs_pow_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__pd_pow_1", "__pd_pow_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__pd_pow_1", "__pd_pow_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__pd_pow_1", "__pd_pow_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__ps_pow_1", "__ps_pow_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__ps_pow_1", "__ps_pow_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__ps_pow_1", "__ps_pow_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__rd_pow_1", "__rd_pow_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__rd_pow_1", "__rd_pow_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__rd_pow_1", "__rd_pow_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__rs_pow_1", "__rs_pow_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__rs_pow_1", "__rs_pow_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__rs_pow_1", "__rs_pow_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__fs_powi_1", "__fs_powi_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__fs_powi_1", "__fs_powi_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__fs_powi_1", "__fs_powi_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__ps_powi_1", "__ps_powi_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__ps_powi_1", "__ps_powi_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__ps_powi_1", "__ps_powi_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__rs_powi_1", "__rs_powi_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__rs_powi_1", "__rs_powi_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__rs_powi_1", "__rs_powi_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__fd_powi1_1", "__fd_powi1_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__fd_powi1_1", "__fd_powi1_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__fd_powi1_1", "__fd_powi1_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__fs_powi1_1", "__fs_powi1_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__fs_powi1_1", "__fs_powi1_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__fs_powi1_1", "__fs_powi1_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__pd_powi1_1", "__pd_powi1_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__pd_powi1_1", "__pd_powi1_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__pd_powi1_1", "__pd_powi1_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__ps_powi1_1", "__ps_powi1_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__ps_powi1_1", "__ps_powi1_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__ps_powi1_1", "__ps_powi1_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__rd_powi1_1", "__rd_powi1_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__rd_powi1_1", "__rd_powi1_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__rd_powi1_1", "__rd_powi1_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__rs_powi1_1", "__rs_powi1_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__rs_powi1_1", "__rs_powi1_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__rs_powi1_1", "__rs_powi1_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__fd_powk_1", "__fd_powk_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__fd_powk_1", "__fd_powk_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__fd_powk_1", "__fd_powk_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__fs_powk_1", "__fs_powk_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__fs_powk_1", "__fs_powk_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__fs_powk_1", "__fs_powk_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__pd_powk_1", "__pd_powk_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__pd_powk_1", "__pd_powk_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__pd_powk_1", "__pd_powk_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__ps_powk_1", "__ps_powk_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__ps_powk_1", "__ps_powk_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__ps_powk_1", "__ps_powk_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__rd_powk_1", "__rd_powk_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__rd_powk_1", "__rd_powk_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__rd_powk_1", "__rd_powk_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__rs_powk_1", "__rs_powk_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__rs_powk_1", "__rs_powk_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__rs_powk_1", "__rs_powk_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__fd_powk1_1", "__fd_powk1_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__fd_powk1_1", "__fd_powk1_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__fd_powk1_1", "__fd_powk1_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__fs_powk1_1", "__fs_powk1_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__fs_powk1_1", "__fs_powk1_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__fs_powk1_1", "__fs_powk1_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__pd_powk1_1", "__pd_powk1_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__pd_powk1_1", "__pd_powk1_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__pd_powk1_1", "__pd_powk1_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__ps_powk1_1", "__ps_powk1_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__ps_powk1_1", "__ps_powk1_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__ps_powk1_1", "__ps_powk1_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__rd_powk1_1", "__rd_powk1_2", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("__rd_powk1_1", "__rd_powk1_4", FIXED(4), "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("__rd_powk1_1", "__rd_powk1_8", FIXED(8), "_ZGV_LLVM_N8vv") TLI_DEFINE_VECFUNC("__rs_powk1_1", "__rs_powk1_4", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("__rs_powk1_1", "__rs_powk1_8", FIXED(8), "_ZGV_LLVM_N8vv") -TLI_DEFINE_VECFUNC("__rs_powk1_1", "__rs_powk1_16", FIXED(16), "_ZGV_LLVM_N16vv") TLI_DEFINE_VECFUNC("__fd_log10_1", "__fd_log10_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_log10_1", "__fd_log10_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_log10_1", "__fd_log10_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_log10_1", "__fs_log10_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_log10_1", "__fs_log10_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_log10_1", "__fs_log10_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_log10_1", "__pd_log10_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_log10_1", "__pd_log10_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_log10_1", "__pd_log10_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_log10_1", "__ps_log10_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_log10_1", "__ps_log10_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_log10_1", "__ps_log10_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_log10_1", "__rd_log10_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_log10_1", "__rd_log10_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_log10_1", "__rd_log10_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_log10_1", "__rs_log10_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_log10_1", "__rs_log10_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_log10_1", "__rs_log10_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fd_log_1", "__fd_log_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__fd_log_1", "__fd_log_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__fd_log_1", "__fd_log_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__fs_log_1", "__fs_log_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_log_1", "__fs_log_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_log_1", "__fs_log_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_log_1", "__pd_log_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_log_1", "__pd_log_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_log_1", "__pd_log_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_log_1", "__ps_log_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_log_1", "__ps_log_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_log_1", "__ps_log_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_log_1", "__rd_log_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_log_1", "__rd_log_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_log_1", "__rd_log_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_log_1", "__rs_log_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_log_1", "__rs_log_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__rs_log_1", "__rs_log_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__fs_exp_1", "__fs_exp_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__fs_exp_1", "__fs_exp_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__fs_exp_1", "__fs_exp_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__pd_exp_1", "__pd_exp_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__pd_exp_1", "__pd_exp_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__pd_exp_1", "__pd_exp_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__ps_exp_1", "__ps_exp_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__ps_exp_1", "__ps_exp_8", FIXED(8), "_ZGV_LLVM_N8v") -TLI_DEFINE_VECFUNC("__ps_exp_1", "__ps_exp_16", FIXED(16), "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__rd_exp_1", "__rd_exp_2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__rd_exp_1", "__rd_exp_4", FIXED(4), "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("__rd_exp_1", "__rd_exp_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_4", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_8", FIXED(8), "_ZGV_LLVM_N8v") + +#elif defined(TLI_DEFINE_PGMATH_X86_AVX512_VECFUNCS) +TLI_DEFINE_VECFUNC("__fd_sin_1", "__fd_sin_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_sin_1", "__fs_sin_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_sin_1", "__pd_sin_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_sin_1", "__ps_sin_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_sin_1", "__rd_sin_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_sin_1", "__rs_sin_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fd_cos_1", "__fd_cos_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_cos_1", "__fs_cos_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_cos_1", "__pd_cos_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_cos_1", "__ps_cos_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_cos_1", "__rd_cos_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_cos_1", "__rs_cos_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fd_tan_1", "__fd_tan_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_tan_1", "__fs_tan_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_tan_1", "__pd_tan_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_tan_1", "__ps_tan_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_tan_1", "__rd_tan_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_tan_1", "__rs_tan_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fd_sinh_1", "__fd_sinh_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_sinh_1", "__fs_sinh_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_sinh_1", "__pd_sinh_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_sinh_1", "__ps_sinh_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_sinh_1", "__rd_sinh_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_sinh_1", "__rs_sinh_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fd_cosh_1", "__fd_cosh_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_cosh_1", "__fs_cosh_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_cosh_1", "__pd_cosh_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_cosh_1", "__ps_cosh_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_cosh_1", "__rd_cosh_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_cosh_1", "__rs_cosh_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fd_tanh_1", "__fd_tanh_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_tanh_1", "__fs_tanh_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_tanh_1", "__pd_tanh_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_tanh_1", "__ps_tanh_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_tanh_1", "__rd_tanh_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_tanh_1", "__rs_tanh_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fd_asin_1", "__fd_asin_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_asin_1", "__fs_asin_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_asin_1", "__pd_asin_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_asin_1", "__ps_asin_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_asin_1", "__rd_asin_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_asin_1", "__rs_asin_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fd_acos_1", "__fd_acos_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_acos_1", "__fs_acos_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_acos_1", "__pd_acos_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_acos_1", "__ps_acos_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_acos_1", "__rd_acos_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_acos_1", "__rs_acos_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fd_atan_1", "__fd_atan_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_atan_1", "__fs_atan_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_atan_1", "__pd_atan_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_atan_1", "__ps_atan_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_atan_1", "__rd_atan_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_atan_1", "__rs_atan_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fd_atan2_1", "__fd_atan2_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__fs_atan2_1", "__fs_atan2_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__pd_atan2_1", "__pd_atan2_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__ps_atan2_1", "__ps_atan2_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__rd_atan2_1", "__rd_atan2_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__rs_atan2_1", "__rs_atan2_16", FIXED(16), "_ZGV_LLVM_N16vv") + +TLI_DEFINE_VECFUNC("__fd_pow_1", "__fd_pow_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__fs_pow_1", "__fs_pow_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__pd_pow_1", "__pd_pow_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__ps_pow_1", "__ps_pow_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__rd_pow_1", "__rd_pow_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__rs_pow_1", "__rs_pow_16", FIXED(16), "_ZGV_LLVM_N16vv") + +TLI_DEFINE_VECFUNC("__fs_powi_1", "__fs_powi_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__ps_powi_1", "__ps_powi_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__rs_powi_1", "__rs_powi_16", FIXED(16), "_ZGV_LLVM_N16vv") + +TLI_DEFINE_VECFUNC("__fd_powi1_1", "__fd_powi1_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__fs_powi1_1", "__fs_powi1_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__pd_powi1_1", "__pd_powi1_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__ps_powi1_1", "__ps_powi1_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__rd_powi1_1", "__rd_powi1_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__rs_powi1_1", "__rs_powi1_16", FIXED(16), "_ZGV_LLVM_N16vv") + +TLI_DEFINE_VECFUNC("__fd_powk_1", "__fd_powk_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__fs_powk_1", "__fs_powk_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__pd_powk_1", "__pd_powk_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__ps_powk_1", "__ps_powk_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__rd_powk_1", "__rd_powk_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__rs_powk_1", "__rs_powk_16", FIXED(16), "_ZGV_LLVM_N16vv") + +TLI_DEFINE_VECFUNC("__fd_powk1_1", "__fd_powk1_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__fs_powk1_1", "__fs_powk1_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__pd_powk1_1", "__pd_powk1_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__ps_powk1_1", "__ps_powk1_16", FIXED(16), "_ZGV_LLVM_N16vv") +TLI_DEFINE_VECFUNC("__rd_powk1_1", "__rd_powk1_8", FIXED(8), "_ZGV_LLVM_N8vv") +TLI_DEFINE_VECFUNC("__rs_powk1_1", "__rs_powk1_16", FIXED(16), "_ZGV_LLVM_N16vv") + +TLI_DEFINE_VECFUNC("__fd_log10_1", "__fd_log10_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_log10_1", "__fs_log10_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_log10_1", "__pd_log10_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_log10_1", "__ps_log10_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_log10_1", "__rd_log10_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_log10_1", "__rs_log10_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fd_log_1", "__fd_log_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__fs_log_1", "__fs_log_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_log_1", "__pd_log_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_log_1", "__ps_log_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_log_1", "__rd_log_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__rs_log_1", "__rs_log_16", FIXED(16), "_ZGV_LLVM_N16v") + +TLI_DEFINE_VECFUNC("__fs_exp_1", "__fs_exp_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__pd_exp_1", "__pd_exp_8", FIXED(8), "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__ps_exp_1", "__ps_exp_16", FIXED(16), "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__rd_exp_1", "__rd_exp_8", FIXED(8), "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_16", FIXED(16), "_ZGV_LLVM_N16v") #else @@ -1679,3 +1698,4 @@ TLI_DEFINE_VECFUNC("__rs_exp_1", "__rs_exp_16", FIXED(16), "_ZGV_LLVM_N16v") #undef TLI_DEFINE_ARMPL_VECFUNCS #undef TLI_DEFINE_PGMATH_AARCH64_VECFUNCS #undef TLI_DEFINE_PGMATH_X86_VECFUNCS +#undef TLI_DEFINE_PGMATH_X86_AVX512_VECFUNCS diff --git a/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h b/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h index 1af8d2c69ab4..f00880bfb762 100644 --- a/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h +++ b/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h @@ -36,7 +36,8 @@ enum class VectorLibrary { }; TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, - VectorLibrary Veclib); + VectorLibrary Veclib, + bool targetAVX512); } // end namespace llvm::driver diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index d6db8c3e41ef..ca7fd5bebd42 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -35,6 +35,8 @@ static cl::opt ClVectorLibrary( #ifdef ENABLE_CLASSIC_FLANG clEnumValN(TargetLibraryInfoImpl::PGMATH, "PGMATH", "PGI math library"), + clEnumValN(TargetLibraryInfoImpl::PGMATH_AVX512, "PGMATH_AVX512", + "PGI math library (AVX512)"), #endif clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", "Intel SVML library"), @@ -1299,6 +1301,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( } break; } + case PGMATH_AVX512: { + const VecDesc VecFuncs[] = { + #define TLI_DEFINE_PGMATH_X86_AVX512_VECFUNCS + #include "llvm/Analysis/VecFuncs.def" + }; + addVectorizableFunctions(VecFuncs); + break; + } #endif case NoLibrary: diff --git a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp index 694450bcbc1f..00f852ae0348 100644 --- a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp +++ b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp @@ -13,7 +13,8 @@ namespace llvm::driver { TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, - driver::VectorLibrary Veclib) { + driver::VectorLibrary Veclib, + bool TargetHasAVX512) { TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); using VectorLibrary = llvm::driver::VectorLibrary; @@ -34,6 +35,9 @@ TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, case VectorLibrary::PGMATH: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::PGMATH, TargetTriple); + if (TargetHasAVX512) + TLII->addVectorizableFunctionsFromVecLib( + TargetLibraryInfoImpl::PGMATH_AVX512, TargetTriple); break; #endif case VectorLibrary::SVML: