From 2f34923549bb1c491156adc7691430ccc1427f04 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Wed, 20 Mar 2024 10:18:22 -0700 Subject: [PATCH] Revert "GH-37848: [C++][Gandiva] Migrate LLVM JIT engine from MCJIT to ORC v2/LLJIT (#39098)" This reverts commit 83cba25017a5c3a03e47f1851f242fa284f93533. --- cpp/cmake_modules/FindLLVMAlt.cmake | 2 +- cpp/src/gandiva/configuration.h | 17 +- cpp/src/gandiva/engine.cc | 357 ++++++++-------------- cpp/src/gandiva/engine.h | 46 +-- cpp/src/gandiva/engine_llvm_test.cc | 26 +- cpp/src/gandiva/filter.cc | 8 +- cpp/src/gandiva/filter.h | 2 +- cpp/src/gandiva/llvm_generator.cc | 23 +- cpp/src/gandiva/llvm_generator.h | 12 +- cpp/src/gandiva/llvm_generator_test.cc | 21 +- cpp/src/gandiva/projector.cc | 8 +- cpp/src/gandiva/projector.h | 2 +- cpp/src/gandiva/tests/micro_benchmarks.cc | 31 -- cpp/src/gandiva/tests/test_util.cc | 4 - cpp/src/gandiva/tests/test_util.h | 2 - python/pyarrow/gandiva.pyx | 59 +--- python/pyarrow/includes/libgandiva.pxd | 14 +- python/pyarrow/tests/test_gandiva.py | 6 +- 18 files changed, 199 insertions(+), 441 deletions(-) diff --git a/cpp/cmake_modules/FindLLVMAlt.cmake b/cpp/cmake_modules/FindLLVMAlt.cmake index 2730f829817f6..69f680824b082 100644 --- a/cpp/cmake_modules/FindLLVMAlt.cmake +++ b/cpp/cmake_modules/FindLLVMAlt.cmake @@ -93,8 +93,8 @@ if(LLVM_FOUND) debuginfodwarf ipo linker + mcjit native - orcjit target) if(LLVM_VERSION_MAJOR GREATER_EQUAL 14) list(APPEND LLVM_TARGET_COMPONENTS passes) diff --git a/cpp/src/gandiva/configuration.h b/cpp/src/gandiva/configuration.h index 620c58537f963..f43a2b190731f 100644 --- a/cpp/src/gandiva/configuration.h +++ b/cpp/src/gandiva/configuration.h @@ -37,12 +37,10 @@ class GANDIVA_EXPORT Configuration { explicit Configuration(bool optimize, std::shared_ptr function_registry = - gandiva::default_function_registry(), - bool dump_ir = false) + gandiva::default_function_registry()) : optimize_(optimize), target_host_cpu_(true), - function_registry_(std::move(function_registry)), - dump_ir_(dump_ir) {} + function_registry_(function_registry) {} Configuration() : Configuration(true) {} @@ -52,13 +50,11 @@ class GANDIVA_EXPORT Configuration { bool optimize() const { return optimize_; } bool target_host_cpu() const { return target_host_cpu_; } - bool dump_ir() const { return dump_ir_; } std::shared_ptr function_registry() const { return function_registry_; } void set_optimize(bool optimize) { optimize_ = optimize; } - void set_dump_ir(bool dump_ir) { dump_ir_ = dump_ir; } void target_host_cpu(bool target_host_cpu) { target_host_cpu_ = target_host_cpu; } void set_function_registry(std::shared_ptr function_registry) { function_registry_ = std::move(function_registry); @@ -69,9 +65,6 @@ class GANDIVA_EXPORT Configuration { bool target_host_cpu_; /* set the mcpu flag to host cpu while compiling llvm ir */ std::shared_ptr function_registry_; /* function registry that may contain external functions */ - // flag indicating if IR dumping is needed, defaults to false, and turning it on will - // negatively affect performance - bool dump_ir_ = false; }; /// \brief configuration builder for gandiva @@ -90,12 +83,6 @@ class GANDIVA_EXPORT ConfigurationBuilder { return configuration; } - std::shared_ptr build_with_ir_dumping(bool dump_ir) { - std::shared_ptr configuration( - new Configuration(true, gandiva::default_function_registry(), dump_ir)); - return configuration; - } - std::shared_ptr build( std::shared_ptr function_registry) { std::shared_ptr configuration( diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index fc047f2ac0763..1cea1fd2cbf30 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -31,8 +31,7 @@ #include #include -#include -#include +#include "arrow/util/logging.h" #if defined(_MSC_VER) #pragma warning(push) @@ -47,14 +46,13 @@ #include #include #include -#include +#include #include #include #include #include #include #include -#include #if LLVM_VERSION_MAJOR >= 17 #include #else @@ -88,13 +86,6 @@ #include #include -// JITLink is available in LLVM 9+ -// but the `InProcessMemoryManager::Create` API was added since LLVM 14 -#if LLVM_VERSION_MAJOR >= 14 && !defined(_WIN32) -#define JIT_LINK_SUPPORTED -#include -#endif - #if defined(_MSC_VER) #pragma warning(pop) #endif @@ -112,136 +103,9 @@ extern const size_t kPrecompiledBitcodeSize; std::once_flag llvm_init_once_flag; static bool llvm_init = false; static llvm::StringRef cpu_name; -static std::vector cpu_attrs; +static llvm::SmallVector cpu_attrs; std::once_flag register_exported_funcs_flag; -template -arrow::Result AsArrowResult(llvm::Expected& expected, - const std::string& error_context) { - if (!expected) { - return Status::CodeGenError(error_context, llvm::toString(expected.takeError())); - } - return std::move(expected.get()); -} - -Result MakeTargetMachineBuilder( - const Configuration& conf) { - llvm::orc::JITTargetMachineBuilder jtmb( - (llvm::Triple(llvm::sys::getDefaultTargetTriple()))); - if (conf.target_host_cpu()) { - jtmb.setCPU(cpu_name.str()); - jtmb.addFeatures(cpu_attrs); - } - auto const opt_level = - conf.optimize() ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None; - jtmb.setCodeGenOptLevel(opt_level); - return jtmb; -} - -std::string DumpModuleIR(const llvm::Module& module) { - std::string ir; - llvm::raw_string_ostream stream(ir); - module.print(stream, nullptr); - return ir; -} - -void AddAbsoluteSymbol(llvm::orc::LLJIT& lljit, const std::string& name, - void* function_ptr) { - llvm::orc::MangleAndInterner mangle(lljit.getExecutionSession(), lljit.getDataLayout()); - - // https://github.com/llvm/llvm-project/commit/8b1771bd9f304be39d4dcbdcccedb6d3bcd18200#diff-77984a824d9182e5c67a481740f3bc5da78d5bd4cf6e1716a083ddb30a4a4931 - // LLVM 17 introduced ExecutorSymbolDef and move most of ORC APIs to ExecutorAddr -#if LLVM_VERSION_MAJOR >= 17 - llvm::orc::ExecutorSymbolDef symbol( - llvm::orc::ExecutorAddr(reinterpret_cast(function_ptr)), - llvm::JITSymbolFlags::Exported); -#else - llvm::JITEvaluatedSymbol symbol(reinterpret_cast(function_ptr), - llvm::JITSymbolFlags::Exported); -#endif - - auto error = lljit.getMainJITDylib().define( - llvm::orc::absoluteSymbols({{mangle(name), symbol}})); - llvm::cantFail(std::move(error)); -} - -// add current process symbol to dylib -// LLVM >= 18 does this automatically -void AddProcessSymbol(llvm::orc::LLJIT& lljit) { - lljit.getMainJITDylib().addGenerator( - llvm::cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( - lljit.getDataLayout().getGlobalPrefix()))); - // the `atexit` symbol cannot be found for ASAN -#ifdef ADDRESS_SANITIZER - if (!lljit.lookup("atexit")) { - AddAbsoluteSymbol(lljit, "atexit", reinterpret_cast(atexit)); - } -#endif -} - -#ifdef JIT_LINK_SUPPORTED -Result> CreateMemmoryManager() { - auto maybe_mem_manager = llvm::jitlink::InProcessMemoryManager::Create(); - return AsArrowResult(maybe_mem_manager, "Could not create memory manager: "); -} - -Status UseJITLinkIfEnabled(llvm::orc::LLJITBuilder& jit_builder) { - static auto maybe_use_jit_link = ::arrow::internal::GetEnvVar("GANDIVA_USE_JIT_LINK"); - if (maybe_use_jit_link.ok()) { - ARROW_ASSIGN_OR_RAISE(static auto memory_manager, CreateMemmoryManager()); - jit_builder.setObjectLinkingLayerCreator( - [&](llvm::orc::ExecutionSession& ES, const llvm::Triple& TT) { - return std::make_unique(ES, *memory_manager); - }); - } - return Status::OK(); -} -#endif - -Result> BuildJIT( - llvm::orc::JITTargetMachineBuilder jtmb, - std::optional>& object_cache) { - llvm::orc::LLJITBuilder jit_builder; - -#ifdef JIT_LINK_SUPPORTED - ARROW_RETURN_NOT_OK(UseJITLinkIfEnabled(jit_builder)); -#endif - - jit_builder.setJITTargetMachineBuilder(std::move(jtmb)); - if (object_cache.has_value()) { - jit_builder.setCompileFunctionCreator( - [&object_cache](llvm::orc::JITTargetMachineBuilder JTMB) - -> llvm::Expected> { - auto target_machine = JTMB.createTargetMachine(); - if (!target_machine) { - return target_machine.takeError(); - } - // after compilation, the object code will be stored into the given object - // cache - return std::make_unique( - std::move(*target_machine), &object_cache.value().get()); - }); - } - auto maybe_jit = jit_builder.create(); - ARROW_ASSIGN_OR_RAISE(auto jit, - AsArrowResult(maybe_jit, "Could not create LLJIT instance: ")); - - AddProcessSymbol(*jit); - return jit; -} - -Status Engine::SetLLVMObjectCache(GandivaObjectCache& object_cache) { - auto cached_buffer = object_cache.getObject(nullptr); - if (cached_buffer) { - auto error = lljit_->addObjectFile(std::move(cached_buffer)); - if (error) { - return Status::CodeGenError("Failed to add cached object file to LLJIT: ", - llvm::toString(std::move(error))); - } - } - return Status::OK(); -} - void Engine::InitOnce() { DCHECK_EQ(llvm_init, false); @@ -263,34 +127,28 @@ void Engine::InitOnce() { } } ARROW_LOG(INFO) << "Detected CPU Name : " << cpu_name.str(); - ARROW_LOG(INFO) << "Detected CPU Features: [" << cpu_attrs_str << "]"; + ARROW_LOG(INFO) << "Detected CPU Features:" << cpu_attrs_str; llvm_init = true; } Engine::Engine(const std::shared_ptr& conf, - std::unique_ptr lljit, - std::unique_ptr target_machine, bool cached) - : context_(std::make_unique()), - lljit_(std::move(lljit)), + std::unique_ptr ctx, + std::unique_ptr engine, llvm::Module* module, + bool cached) + : context_(std::move(ctx)), + execution_engine_(std::move(engine)), ir_builder_(std::make_unique>(*context_)), + module_(module), types_(*context_), optimize_(conf->optimize()), cached_(cached), - function_registry_(conf->function_registry()), - target_machine_(std::move(target_machine)), - conf_(conf) { - // LLVM 10 doesn't like the expr function name to be the same as the module name - auto module_id = "gdv_module_" + std::to_string(reinterpret_cast(this)); - module_ = std::make_unique(module_id, *context_); -} - -Engine::~Engine() {} + function_registry_(conf->function_registry()) {} Status Engine::Init() { std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs); - // Add mappings for global functions that can be accessed from LLVM/IR module. ARROW_RETURN_NOT_OK(AddGlobalMappings()); + return Status::OK(); } @@ -305,32 +163,101 @@ Status Engine::LoadFunctionIRs() { } /// factory method to construct the engine. -Result> Engine::Make( - const std::shared_ptr& conf, bool cached, - std::optional> object_cache) { +Status Engine::Make(const std::shared_ptr& conf, bool cached, + std::unique_ptr* out) { std::call_once(llvm_init_once_flag, InitOnce); - ARROW_ASSIGN_OR_RAISE(auto jtmb, MakeTargetMachineBuilder(*conf)); - ARROW_ASSIGN_OR_RAISE(auto jit, BuildJIT(jtmb, object_cache)); - auto maybe_tm = jtmb.createTargetMachine(); - ARROW_ASSIGN_OR_RAISE(auto target_machine, - AsArrowResult(maybe_tm, "Could not create target machine: ")); + auto ctx = std::make_unique(); + auto module = std::make_unique("codegen", *ctx); - std::unique_ptr engine{ - new Engine(conf, std::move(jit), std::move(target_machine), cached)}; + // Capture before moving, ExecutionEngine does not allow retrieving the + // original Module. + auto module_ptr = module.get(); + + auto opt_level = + conf->optimize() ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None; + + // Note that the lifetime of the error string is not captured by the + // ExecutionEngine but only for the lifetime of the builder. Found by + // inspecting LLVM sources. + std::string builder_error; + + llvm::EngineBuilder engine_builder(std::move(module)); + + engine_builder.setEngineKind(llvm::EngineKind::JIT) + .setOptLevel(opt_level) + .setErrorStr(&builder_error); + + if (conf->target_host_cpu()) { + engine_builder.setMCPU(cpu_name); + engine_builder.setMAttrs(cpu_attrs); + } + std::unique_ptr exec_engine{engine_builder.create()}; + + if (exec_engine == nullptr) { + return Status::CodeGenError("Could not instantiate llvm::ExecutionEngine: ", + builder_error); + } + std::unique_ptr engine{ + new Engine(conf, std::move(ctx), std::move(exec_engine), module_ptr, cached)}; ARROW_RETURN_NOT_OK(engine->Init()); - return engine; + *out = std::move(engine); + return Status::OK(); +} + +// This method was modified from its original version for a part of MLIR +// Original source from +// https://github.com/llvm/llvm-project/blob/9f2ce5b915a505a5488a5cf91bb0a8efa9ddfff7/mlir/lib/ExecutionEngine/ExecutionEngine.cpp +// The original copyright notice follows. + +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +static void SetDataLayout(llvm::Module* module) { + auto target_triple = llvm::sys::getDefaultTargetTriple(); + std::string error_message; + auto target = llvm::TargetRegistry::lookupTarget(target_triple, error_message); + if (!target) { + return; + } + + std::string cpu(llvm::sys::getHostCPUName()); + llvm::SubtargetFeatures features; + llvm::StringMap host_features; + + if (llvm::sys::getHostCPUFeatures(host_features)) { + for (auto& f : host_features) { + features.AddFeature(f.first(), f.second); + } + } + + std::unique_ptr machine( + target->createTargetMachine(target_triple, cpu, features.getString(), {}, {})); + + module->setDataLayout(machine->createDataLayout()); +} +// end of the modified method from MLIR + +template +static arrow::Result AsArrowResult(llvm::Expected& expected) { + if (!expected) { + std::string str; + llvm::raw_string_ostream stream(str); + stream << expected.takeError(); + return Status::CodeGenError(stream.str()); + } + return std::move(expected.get()); } static arrow::Status VerifyAndLinkModule( - llvm::Module& dest_module, + llvm::Module* dest_module, llvm::Expected> src_module_or_error) { - ARROW_ASSIGN_OR_RAISE( - auto src_ir_module, - AsArrowResult(src_module_or_error, "Failed to verify and link module: ")); + ARROW_ASSIGN_OR_RAISE(auto src_ir_module, AsArrowResult(src_module_or_error)); - src_ir_module->setDataLayout(dest_module.getDataLayout()); + // set dataLayout + SetDataLayout(src_ir_module.get()); std::string error_info; llvm::raw_string_ostream error_stream(error_info); @@ -338,21 +265,16 @@ static arrow::Status VerifyAndLinkModule( llvm::verifyModule(*src_ir_module, &error_stream), Status::CodeGenError("verify of IR Module failed: " + error_stream.str())); - ARROW_RETURN_IF(llvm::Linker::linkModules(dest_module, std::move(src_ir_module)), + ARROW_RETURN_IF(llvm::Linker::linkModules(*dest_module, std::move(src_ir_module)), Status::CodeGenError("failed to link IR Modules")); return Status::OK(); } -llvm::Module* Engine::module() { - DCHECK(!module_finalized_) << "module cannot be accessed after finalized"; - return module_.get(); -} - // Handling for pre-compiled IR libraries. Status Engine::LoadPreCompiledIR() { - auto const bitcode = llvm::StringRef(reinterpret_cast(kPrecompiledBitcode), - kPrecompiledBitcodeSize); + auto bitcode = llvm::StringRef(reinterpret_cast(kPrecompiledBitcode), + kPrecompiledBitcodeSize); /// Read from file into memory buffer. llvm::ErrorOr> buffer_or_error = @@ -369,14 +291,14 @@ Status Engine::LoadPreCompiledIR() { llvm::getOwningLazyBitcodeModule(std::move(buffer), *context()); // NOTE: llvm::handleAllErrors() fails linking with RTTI-disabled LLVM builds // (ARROW-5148) - ARROW_RETURN_NOT_OK(VerifyAndLinkModule(*module_, std::move(module_or_error))); + ARROW_RETURN_NOT_OK(VerifyAndLinkModule(module_, std::move(module_or_error))); return Status::OK(); } static llvm::MemoryBufferRef AsLLVMMemoryBuffer(const arrow::Buffer& arrow_buffer) { - auto const data = reinterpret_cast(arrow_buffer.data()); - auto const size = arrow_buffer.size(); - return {llvm::StringRef(data, size), "external_bitcode"}; + auto data = reinterpret_cast(arrow_buffer.data()); + auto size = arrow_buffer.size(); + return llvm::MemoryBufferRef(llvm::StringRef(data, size), "external_bitcode"); } Status Engine::LoadExternalPreCompiledIR() { @@ -384,7 +306,7 @@ Status Engine::LoadExternalPreCompiledIR() { for (auto const& buffer : buffers) { auto llvm_memory_buffer_ref = AsLLVMMemoryBuffer(*buffer); auto module_or_error = llvm::parseBitcodeFile(llvm_memory_buffer_ref, *context()); - ARROW_RETURN_NOT_OK(VerifyAndLinkModule(*module_, std::move(module_or_error))); + ARROW_RETURN_NOT_OK(VerifyAndLinkModule(module_, std::move(module_or_error))); } return Status::OK(); @@ -464,8 +386,7 @@ static void OptimizeModuleWithLegacyPassManager(llvm::Module& module, std::unique_ptr pass_manager( new llvm::legacy::PassManager()); - pass_manager->add( - llvm::createTargetTransformInfoWrapperPass(std::move(target_analysis))); + pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis)); pass_manager->add(llvm::createFunctionInliningPass()); pass_manager->add(llvm::createInstructionCombiningPass()); pass_manager->add(llvm::createPromoteMemoryToRegisterPass()); @@ -490,64 +411,40 @@ Status Engine::FinalizeModule() { ARROW_RETURN_NOT_OK(RemoveUnusedFunctions()); if (optimize_) { - auto target_analysis = target_machine_->getTargetIRAnalysis(); + auto target_analysis = execution_engine_->getTargetMachine()->getTargetIRAnalysis(); + // misc passes to allow for inlining, vectorization, .. #if LLVM_VERSION_MAJOR >= 14 - OptimizeModuleWithNewPassManager(*module_, std::move(target_analysis)); + OptimizeModuleWithNewPassManager(*module_, target_analysis); #else - OptimizeModuleWithLegacyPassManager(*module_, std::move(target_analysis)); + OptimizeModuleWithLegacyPassManager(*module_, target_analysis); #endif } ARROW_RETURN_IF(llvm::verifyModule(*module_, &llvm::errs()), Status::CodeGenError("Module verification failed after optimizer")); - - // print the module IR and save it for later use if IR dumping is needed - // since the module will be moved to construct LLJIT instance, and it is not - // available after LLJIT instance is constructed - if (conf_->dump_ir()) { - module_ir_ = DumpModuleIR(*module_); - } - - llvm::orc::ThreadSafeModule tsm(std::move(module_), std::move(context_)); - auto error = lljit_->addIRModule(std::move(tsm)); - if (error) { - return Status::CodeGenError("Failed to add IR module to LLJIT: ", - llvm::toString(std::move(error))); - } } + + // do the compilation + execution_engine_->finalizeObject(); module_finalized_ = true; return Status::OK(); } -Result Engine::CompiledFunction(const std::string& function) { - DCHECK(module_finalized_) - << "module must be finalized before getting compiled function"; - auto sym = lljit_->lookup(function); - if (!sym) { - return Status::CodeGenError("Failed to look up function: " + function + - " error: " + llvm::toString(sym.takeError())); - } - // Since LLVM 15, `LLJIT::lookup` returns ExecutorAddrs rather than - // JITEvaluatedSymbols -#if LLVM_VERSION_MAJOR >= 15 - auto fn_addr = sym->getValue(); -#else - auto fn_addr = sym->getAddress(); -#endif - auto fn_ptr = reinterpret_cast(fn_addr); - if (fn_ptr == nullptr) { - return Status::CodeGenError("Failed to get address for function: " + function); - } - return fn_ptr; +void* Engine::CompiledFunction(std::string& function) { + DCHECK(module_finalized_); + return reinterpret_cast(execution_engine_->getFunctionAddress(function)); } void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_type, - const std::vector& args, void* func) { - auto const prototype = llvm::FunctionType::get(ret_type, args, /*is_var_arg*/ false); - llvm::Function::Create(prototype, llvm::GlobalValue::ExternalLinkage, name, module()); - AddAbsoluteSymbol(*lljit_, name, func); + const std::vector& args, + void* function_ptr) { + constexpr bool is_var_arg = false; + auto prototype = llvm::FunctionType::get(ret_type, args, is_var_arg); + constexpr auto linkage = llvm::GlobalValue::ExternalLinkage; + auto fn = llvm::Function::Create(prototype, linkage, name, module()); + execution_engine_->addGlobalMapping(fn, function_ptr); } arrow::Status Engine::AddGlobalMappings() { @@ -556,9 +453,11 @@ arrow::Status Engine::AddGlobalMappings() { return c_funcs.AddMappings(this); } -const std::string& Engine::ir() { - DCHECK(!module_ir_.empty()) << "dump_ir in Configuration must be set for dumping IR"; - return module_ir_; +std::string Engine::DumpIR() { + std::string ir; + llvm::raw_string_ostream stream(ir); + module_->print(stream, nullptr); + return ir; } } // namespace gandiva diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 565c3f142502d..df2d8b36d9260 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -17,16 +17,11 @@ #pragma once -#include -#include #include -#include #include #include #include -#include - #include "arrow/util/logging.h" #include "arrow/util/macros.h" #include "gandiva/configuration.h" @@ -35,34 +30,23 @@ #include "gandiva/llvm_types.h" #include "gandiva/visibility.h" -namespace llvm::orc { -class LLJIT; -} // namespace llvm::orc - namespace gandiva { /// \brief LLVM Execution engine wrapper. class GANDIVA_EXPORT Engine { public: - ~Engine(); llvm::LLVMContext* context() { return context_.get(); } llvm::IRBuilder<>* ir_builder() { return ir_builder_.get(); } LLVMTypes* types() { return &types_; } - - /// Retrieve LLVM module in the engine. - /// This should only be called before `FinalizeModule` is called - llvm::Module* module(); + llvm::Module* module() { return module_; } /// Factory method to create and initialize the engine object. /// /// \param[in] config the engine configuration /// \param[in] cached flag to mark if the module is already compiled and cached - /// \param[in] object_cache an optional object_cache used for building the module - /// \return arrow::Result containing the created engine - static Result> Make( - const std::shared_ptr& config, bool cached, - std::optional> object_cache = - std::nullopt); + /// \param[out] engine the created engine + static Status Make(const std::shared_ptr& config, bool cached, + std::unique_ptr* engine); /// Add the function to the list of IR functions that need to be compiled. /// Compiling only the functions that are used by the module saves time. @@ -75,31 +59,36 @@ class GANDIVA_EXPORT Engine { Status FinalizeModule(); /// Set LLVM ObjectCache. - Status SetLLVMObjectCache(GandivaObjectCache& object_cache); + void SetLLVMObjectCache(GandivaObjectCache& object_cache) { + execution_engine_->setObjectCache(&object_cache); + } /// Get the compiled function corresponding to the irfunction. - Result CompiledFunction(const std::string& function); + void* CompiledFunction(std::string& function); // Create and add a mapping for the cpp function to make it accessible from LLVM. void AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_type, const std::vector& args, void* func); /// Return the generated IR for the module. - const std::string& ir(); + std::string DumpIR(); /// Load the function IRs that can be accessed in the module. Status LoadFunctionIRs(); private: Engine(const std::shared_ptr& conf, - std::unique_ptr lljit, - std::unique_ptr target_machine, bool cached); + std::unique_ptr ctx, + std::unique_ptr engine, llvm::Module* module, + bool cached); // Post construction init. This _must_ be called after the constructor. Status Init(); static void InitOnce(); + llvm::ExecutionEngine& execution_engine() { return *execution_engine_; } + /// load pre-compiled IR modules from precompiled_bitcode.cc and merge them into /// the main module. Status LoadPreCompiledIR(); @@ -114,9 +103,9 @@ class GANDIVA_EXPORT Engine { Status RemoveUnusedFunctions(); std::unique_ptr context_; - std::unique_ptr lljit_; + std::unique_ptr execution_engine_; std::unique_ptr> ir_builder_; - std::unique_ptr module_; + llvm::Module* module_; LLVMTypes types_; std::vector functions_to_compile_; @@ -126,9 +115,6 @@ class GANDIVA_EXPORT Engine { bool cached_; bool functions_loaded_ = false; std::shared_ptr function_registry_; - std::string module_ir_; - std::unique_ptr target_machine_; - const std::shared_ptr conf_; }; } // namespace gandiva diff --git a/cpp/src/gandiva/engine_llvm_test.cc b/cpp/src/gandiva/engine_llvm_test.cc index 78f468d13fa1f..9baaa82d2e0d3 100644 --- a/cpp/src/gandiva/engine_llvm_test.cc +++ b/cpp/src/gandiva/engine_llvm_test.cc @@ -24,14 +24,14 @@ namespace gandiva { -using add_vector_func_t = int64_t (*)(int64_t*, int); +typedef int64_t (*add_vector_func_t)(int64_t* data, int n); class TestEngine : public ::testing::Test { protected: - std::string BuildVecAdd(Engine* gdv_engine) { - auto types = gdv_engine->types(); - llvm::IRBuilder<>* builder = gdv_engine->ir_builder(); - llvm::LLVMContext* context = gdv_engine->context(); + std::string BuildVecAdd(Engine* engine) { + auto types = engine->types(); + llvm::IRBuilder<>* builder = engine->ir_builder(); + llvm::LLVMContext* context = engine->context(); // Create fn prototype : // int64_t add_longs(int64_t *elements, int32_t nelements) @@ -42,10 +42,10 @@ class TestEngine : public ::testing::Test { llvm::FunctionType::get(types->i64_type(), arguments, false /*isVarArg*/); // Create fn - std::string func_name = "add_longs_test_expr"; - gdv_engine->AddFunctionToCompile(func_name); + std::string func_name = "add_longs"; + engine->AddFunctionToCompile(func_name); llvm::Function* fn = llvm::Function::Create( - prototype, llvm::GlobalValue::ExternalLinkage, func_name, gdv_engine->module()); + prototype, llvm::GlobalValue::ExternalLinkage, func_name, engine->module()); assert(fn != nullptr); // Name the arguments @@ -99,9 +99,7 @@ class TestEngine : public ::testing::Test { return func_name; } - void BuildEngine() { - ASSERT_OK_AND_ASSIGN(engine, Engine::Make(TestConfiguration(), false)); - } + void BuildEngine() { ASSERT_OK(Engine::Make(TestConfiguration(), false, &engine)); } std::unique_ptr engine; std::shared_ptr configuration = TestConfiguration(); @@ -113,8 +111,7 @@ TEST_F(TestEngine, TestAddUnoptimised) { std::string fn_name = BuildVecAdd(engine.get()); ASSERT_OK(engine->FinalizeModule()); - ASSERT_OK_AND_ASSIGN(auto fn_ptr, engine->CompiledFunction(fn_name)); - auto add_func = reinterpret_cast(fn_ptr); + auto add_func = reinterpret_cast(engine->CompiledFunction(fn_name)); int64_t my_array[] = {1, 3, -5, 8, 10}; EXPECT_EQ(add_func(my_array, 5), 17); @@ -126,8 +123,7 @@ TEST_F(TestEngine, TestAddOptimised) { std::string fn_name = BuildVecAdd(engine.get()); ASSERT_OK(engine->FinalizeModule()); - EXPECT_OK_AND_ASSIGN(auto fn_ptr, engine->CompiledFunction(fn_name)); - auto add_func = reinterpret_cast(fn_ptr); + auto add_func = reinterpret_cast(engine->CompiledFunction(fn_name)); int64_t my_array[] = {1, 3, -5, 8, 10}; EXPECT_EQ(add_func(my_array, 5), 17); diff --git a/cpp/src/gandiva/filter.cc b/cpp/src/gandiva/filter.cc index 8a270cfdc06f2..416d97b5dbd1d 100644 --- a/cpp/src/gandiva/filter.cc +++ b/cpp/src/gandiva/filter.cc @@ -65,8 +65,8 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition, GandivaObjectCache obj_cache(cache, cache_key); // Build LLVM generator, and generate code for the specified expression - ARROW_ASSIGN_OR_RAISE(auto llvm_gen, - LLVMGenerator::Make(configuration, is_cached, obj_cache)); + std::unique_ptr llvm_gen; + ARROW_RETURN_NOT_OK(LLVMGenerator::Make(configuration, is_cached, &llvm_gen)); if (!is_cached) { // Run the validation on the expression. @@ -77,7 +77,7 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition, } // Set the object cache for LLVM - ARROW_RETURN_NOT_OK(llvm_gen->SetLLVMObjectCache(obj_cache)); + llvm_gen->SetLLVMObjectCache(obj_cache); ARROW_RETURN_NOT_OK(llvm_gen->Build({condition}, SelectionVector::Mode::MODE_NONE)); @@ -119,7 +119,7 @@ Status Filter::Evaluate(const arrow::RecordBatch& batch, return out_selection->PopulateFromBitMap(result, bitmap_size, num_rows - 1); } -const std::string& Filter::DumpIR() { return llvm_generator_->ir(); } +std::string Filter::DumpIR() { return llvm_generator_->DumpIR(); } void Filter::SetBuiltFromCache(bool flag) { built_from_cache_ = flag; } diff --git a/cpp/src/gandiva/filter.h b/cpp/src/gandiva/filter.h index b4043d93c857a..cc536bca1bb3d 100644 --- a/cpp/src/gandiva/filter.h +++ b/cpp/src/gandiva/filter.h @@ -76,7 +76,7 @@ class GANDIVA_EXPORT Filter { Status Evaluate(const arrow::RecordBatch& batch, std::shared_ptr out_selection); - const std::string& DumpIR(); + std::string DumpIR(); void SetBuiltFromCache(bool flag); diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index 480ea668faeb7..feaae33616628 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -41,15 +41,15 @@ LLVMGenerator::LLVMGenerator(bool cached, function_registry_(std::move(function_registry)), enable_ir_traces_(false) {} -Result> LLVMGenerator::Make( - const std::shared_ptr& config, bool cached, - std::optional> object_cache) { - std::unique_ptr llvm_generator( +Status LLVMGenerator::Make(const std::shared_ptr& config, bool cached, + std::unique_ptr* llvm_generator) { + std::unique_ptr llvmgen_obj( new LLVMGenerator(cached, config->function_registry())); - ARROW_ASSIGN_OR_RAISE(llvm_generator->engine_, - Engine::Make(config, cached, object_cache)); - return llvm_generator; + ARROW_RETURN_NOT_OK(Engine::Make(config, cached, &(llvmgen_obj->engine_))); + *llvm_generator = std::move(llvmgen_obj); + + return Status::OK(); } std::shared_ptr>> @@ -61,8 +61,8 @@ LLVMGenerator::GetCache() { return shared_cache; } -Status LLVMGenerator::SetLLVMObjectCache(GandivaObjectCache& object_cache) { - return engine_->SetLLVMObjectCache(object_cache); +void LLVMGenerator::SetLLVMObjectCache(GandivaObjectCache& object_cache) { + engine_->SetLLVMObjectCache(object_cache); } Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr output) { @@ -72,7 +72,7 @@ Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr out ValueValidityPairPtr value_validity; ARROW_RETURN_NOT_OK(decomposer.Decompose(*expr->root(), &value_validity)); // Generate the IR function for the decomposed expression. - auto compiled_expr = std::make_unique(value_validity, output); + std::unique_ptr compiled_expr(new CompiledExpr(value_validity, output)); std::string fn_name = "expr_" + std::to_string(idx) + "_" + std::to_string(static_cast(selection_vector_mode_)); if (!cached_) { @@ -102,8 +102,7 @@ Status LLVMGenerator::Build(const ExpressionVector& exprs, SelectionVector::Mode // setup the jit functions for each expression. for (auto& compiled_expr : compiled_exprs_) { auto fn_name = compiled_expr->GetFunctionName(mode); - ARROW_ASSIGN_OR_RAISE(auto fn_ptr, engine_->CompiledFunction(fn_name)); - auto jit_fn = reinterpret_cast(fn_ptr); + auto jit_fn = reinterpret_cast(engine_->CompiledFunction(fn_name)); compiled_expr->SetJITFunction(selection_vector_mode_, jit_fn); } diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h index 53ea1c0e48cb2..351753bfcb800 100644 --- a/cpp/src/gandiva/llvm_generator.h +++ b/cpp/src/gandiva/llvm_generator.h @@ -18,9 +18,7 @@ #pragma once #include -#include #include -#include #include #include @@ -49,17 +47,15 @@ class FunctionHolder; class GANDIVA_EXPORT LLVMGenerator { public: /// \brief Factory method to initialize the generator. - static Result> Make( - const std::shared_ptr& config, bool cached, - std::optional> object_cache = - std::nullopt); + static Status Make(const std::shared_ptr& config, bool cached, + std::unique_ptr* llvm_generator); /// \brief Get the cache to be used for LLVM ObjectCache. static std::shared_ptr>> GetCache(); /// \brief Set LLVM ObjectCache. - Status SetLLVMObjectCache(GandivaObjectCache& object_cache); + void SetLLVMObjectCache(GandivaObjectCache& object_cache); /// \brief Build the code for the expression trees for default mode with a LLVM /// ObjectCache. Each element in the vector represents an expression tree @@ -83,7 +79,7 @@ class GANDIVA_EXPORT LLVMGenerator { SelectionVector::Mode selection_vector_mode() { return selection_vector_mode_; } LLVMTypes* types() { return engine_->types(); } llvm::Module* module() { return engine_->module(); } - const std::string& ir() { return engine_->ir(); } + std::string DumpIR() { return engine_->DumpIR(); } private: explicit LLVMGenerator(bool cached, diff --git a/cpp/src/gandiva/llvm_generator_test.cc b/cpp/src/gandiva/llvm_generator_test.cc index 79654e7b78c7e..853d8ae6c3b8d 100644 --- a/cpp/src/gandiva/llvm_generator_test.cc +++ b/cpp/src/gandiva/llvm_generator_test.cc @@ -47,7 +47,8 @@ class TestLLVMGenerator : public ::testing::Test { auto external_registry = std::make_shared(); auto config = config_factory(std::move(external_registry)); - ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(config, false)); + std::unique_ptr generator; + ASSERT_OK(LLVMGenerator::Make(config, false, &generator)); auto module = generator->module(); ASSERT_OK(generator->engine_->LoadFunctionIRs()); @@ -57,7 +58,8 @@ class TestLLVMGenerator : public ::testing::Test { // Verify that a valid pc function exists for every function in the registry. TEST_F(TestLLVMGenerator, VerifyPCFunctions) { - ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(TestConfiguration(), false)); + std::unique_ptr generator; + ASSERT_OK(LLVMGenerator::Make(TestConfiguration(), false, &generator)); llvm::Module* module = generator->module(); ASSERT_OK(generator->engine_->LoadFunctionIRs()); @@ -68,8 +70,8 @@ TEST_F(TestLLVMGenerator, VerifyPCFunctions) { TEST_F(TestLLVMGenerator, TestAdd) { // Setup LLVM generator to do an arithmetic add of two vectors - ASSERT_OK_AND_ASSIGN(auto generator, - LLVMGenerator::Make(TestConfigWithIrDumping(), false)); + std::unique_ptr generator; + ASSERT_OK(LLVMGenerator::Make(TestConfiguration(), false, &generator)); Annotator annotator; auto field0 = std::make_shared("f0", arrow::int32()); @@ -98,22 +100,18 @@ TEST_F(TestLLVMGenerator, TestAdd) { auto field_sum = std::make_shared("out", arrow::int32()); auto desc_sum = annotator.CheckAndAddInputFieldDescriptor(field_sum); - // LLVM 10 doesn't like the expr function name to be the same as the module name when - // LLJIT is used - std::string fn_name = "llvm_gen_test_add_expr"; + std::string fn_name = "codegen"; ASSERT_OK(generator->engine_->LoadFunctionIRs()); ASSERT_OK(generator->CodeGenExprValue(func_dex, 4, desc_sum, 0, fn_name, SelectionVector::MODE_NONE)); ASSERT_OK(generator->engine_->FinalizeModule()); - auto const& ir = generator->engine_->ir(); + auto ir = generator->engine_->DumpIR(); EXPECT_THAT(ir, testing::HasSubstr("vector.body")); - ASSERT_OK_AND_ASSIGN(auto fn_ptr, generator->engine_->CompiledFunction(fn_name)); - ASSERT_TRUE(fn_ptr); + EvalFunc eval_func = (EvalFunc)generator->engine_->CompiledFunction(fn_name); - auto eval_func = reinterpret_cast(fn_ptr); constexpr size_t kNumRecords = 4; std::array a0{1, 2, 3, 4}; std::array a1{5, 6, 7, 8}; @@ -128,7 +126,6 @@ TEST_F(TestLLVMGenerator, TestAdd) { reinterpret_cast(out.data()), reinterpret_cast(&out_bitmap), }; std::array addr_offsets{0, 0, 0, 0, 0, 0}; - eval_func(addrs.data(), addr_offsets.data(), nullptr, nullptr, nullptr, 0 /* dummy context ptr */, kNumRecords); diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc index 520683e5ef1ce..64c283e9fe03e 100644 --- a/cpp/src/gandiva/projector.cc +++ b/cpp/src/gandiva/projector.cc @@ -80,8 +80,8 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs, GandivaObjectCache obj_cache(cache, cache_key); // Build LLVM generator, and generate code for the specified expressions - ARROW_ASSIGN_OR_RAISE(auto llvm_gen, - LLVMGenerator::Make(configuration, is_cached, obj_cache)); + std::unique_ptr llvm_gen; + ARROW_RETURN_NOT_OK(LLVMGenerator::Make(configuration, is_cached, &llvm_gen)); // Run the validation on the expressions. // Return if any of the expression is invalid since @@ -95,7 +95,7 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs, } // Set the object cache for LLVM - ARROW_RETURN_NOT_OK(llvm_gen->SetLLVMObjectCache(obj_cache)); + llvm_gen->SetLLVMObjectCache(obj_cache); ARROW_RETURN_NOT_OK(llvm_gen->Build(exprs, selection_vector_mode)); @@ -357,7 +357,7 @@ Status Projector::ValidateArrayDataCapacity(const arrow::ArrayData& array_data, return Status::OK(); } -const std::string& Projector::DumpIR() { return llvm_generator_->ir(); } +std::string Projector::DumpIR() { return llvm_generator_->DumpIR(); } void Projector::SetBuiltFromCache(bool flag) { built_from_cache_ = flag; } diff --git a/cpp/src/gandiva/projector.h b/cpp/src/gandiva/projector.h index 8f36c836f4f8d..da81e79e535c4 100644 --- a/cpp/src/gandiva/projector.h +++ b/cpp/src/gandiva/projector.h @@ -118,7 +118,7 @@ class GANDIVA_EXPORT Projector { const SelectionVector* selection_vector, const ArrayDataVector& output) const; - const std::string& DumpIR(); + std::string DumpIR(); void SetBuiltFromCache(bool flag); diff --git a/cpp/src/gandiva/tests/micro_benchmarks.cc b/cpp/src/gandiva/tests/micro_benchmarks.cc index 450e691323cae..f126b769b2010 100644 --- a/cpp/src/gandiva/tests/micro_benchmarks.cc +++ b/cpp/src/gandiva/tests/micro_benchmarks.cc @@ -16,7 +16,6 @@ // under the License. #include - #include "arrow/memory_pool.h" #include "arrow/status.h" #include "arrow/testing/gtest_util.h" @@ -421,35 +420,6 @@ static void DoDecimalAdd2(benchmark::State& state, int32_t precision, int32_t sc ASSERT_OK(status); } -static void TimedTestExprCompilation(benchmark::State& state) { - int64_t iteration = 0; - for (auto _ : state) { - // schema for input fields - auto field0 = field("f0", int64()); - auto field1 = field("f1", int64()); - auto literal = TreeExprBuilder::MakeLiteral(iteration); - auto schema = arrow::schema({field0, field1}); - - // output field - auto field_add = field("c1", int64()); - auto field_less_than = field("c2", boolean()); - - // Build expression - auto add_func = TreeExprBuilder::MakeFunction( - "add", {TreeExprBuilder::MakeField(field0), literal}, int64()); - auto less_than_func = TreeExprBuilder::MakeFunction( - "less_than", {TreeExprBuilder::MakeField(field1), literal}, boolean()); - - auto expr_0 = TreeExprBuilder::MakeExpression(add_func, field_add); - auto expr_1 = TreeExprBuilder::MakeExpression(less_than_func, field_less_than); - - std::shared_ptr projector; - ASSERT_OK(Projector::Make(schema, {expr_0, expr_1}, TestConfiguration(), &projector)); - - ++iteration; - } -} - static void DecimalAdd2Fast(benchmark::State& state) { // use lesser precision to test the fast-path DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision - 6, 18); @@ -490,7 +460,6 @@ static void DecimalAdd3Large(benchmark::State& state) { DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 18, true); } -BENCHMARK(TimedTestExprCompilation)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestAdd3)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestBigNested)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestExtractYear)->Unit(benchmark::kMicrosecond); diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc index 2ee49ffae0ed6..959ea3cd7a446 100644 --- a/cpp/src/gandiva/tests/test_util.cc +++ b/cpp/src/gandiva/tests/test_util.cc @@ -30,10 +30,6 @@ std::shared_ptr TestConfiguration() { return ConfigurationBuilder::DefaultConfiguration(); } -std::shared_ptr TestConfigWithIrDumping() { - return ConfigurationBuilder().build_with_ir_dumping(true); -} - #ifndef GANDIVA_EXTENSION_TEST_DIR #define GANDIVA_EXTENSION_TEST_DIR "." #endif diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h index d8181fe67516c..69d63732aeeaa 100644 --- a/cpp/src/gandiva/tests/test_util.h +++ b/cpp/src/gandiva/tests/test_util.h @@ -98,8 +98,6 @@ static inline ArrayPtr MakeArrowTypeArray(const std::shared_ptr std::shared_ptr TestConfiguration(); -std::shared_ptr TestConfigWithIrDumping(); - // helper function to create a Configuration with an external function registered to the // given function registry std::shared_ptr TestConfigWithFunctionRegistry( diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index 2202ec64f2962..35bbf5018f08a 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -36,7 +36,6 @@ from pyarrow.includes.libgandiva cimport ( CNode, CProjector, CFilter, CSelectionVector, _ensure_selection_mode, - CConfiguration, CConfigurationBuilder, TreeExprBuilder_MakeExpression, TreeExprBuilder_MakeFunction, @@ -584,47 +583,9 @@ cdef class TreeExprBuilder(_Weakrefable): condition.node) return Condition.create(r) -cdef class Configuration(_Weakrefable): - cdef: - shared_ptr[CConfiguration] configuration - - def __cinit__(self, bint optimize=True, bint dump_ir=False): - """ - Initialize the configuration with specified options. - - Parameters - ---------- - optimize : bool, default True - Whether to enable optimizations. - dump_ir : bool, default False - Whether to dump LLVM IR. - """ - self.configuration = CConfigurationBuilder().build() - self.configuration.get().set_optimize(optimize) - self.configuration.get().set_dump_ir(dump_ir) - - @staticmethod - cdef create(shared_ptr[CConfiguration] configuration): - """ - Create a Configuration instance from an existing CConfiguration pointer. - - Parameters - ---------- - configuration : shared_ptr[CConfiguration] - Existing CConfiguration pointer. - - Returns - ------- - Configuration instance - """ - cdef Configuration self = Configuration.__new__(Configuration) - self.configuration = configuration - return self - cpdef make_projector(Schema schema, children, MemoryPool pool, - str selection_mode="NONE", - Configuration configuration=None): + str selection_mode="NONE"): """ Construct a projection using expressions. @@ -641,8 +602,6 @@ cpdef make_projector(Schema schema, children, MemoryPool pool, Memory pool used to allocate output arrays. selection_mode : str, default "NONE" Possible values are NONE, UINT16, UINT32, UINT64. - configuration : pyarrow.gandiva.Configuration, default None - Configuration for the projector. Returns ------- @@ -653,9 +612,6 @@ cpdef make_projector(Schema schema, children, MemoryPool pool, c_vector[shared_ptr[CGandivaExpression]] c_children shared_ptr[CProjector] result - if configuration is None: - configuration = Configuration() - for child in children: if child is None: raise TypeError("Expressions must not be None") @@ -664,13 +620,12 @@ cpdef make_projector(Schema schema, children, MemoryPool pool, check_status( Projector_Make(schema.sp_schema, c_children, _ensure_selection_mode(selection_mode), - configuration.configuration, + CConfigurationBuilder.DefaultConfiguration(), &result)) return Projector.create(result, pool) -cpdef make_filter(Schema schema, Condition condition, - Configuration configuration=None): +cpdef make_filter(Schema schema, Condition condition): """ Construct a filter based on a condition. @@ -683,8 +638,6 @@ cpdef make_filter(Schema schema, Condition condition, Schema for the record batches, and the condition. condition : pyarrow.gandiva.Condition Filter condition. - configuration : pyarrow.gandiva.Configuration, default None - Configuration for the filter. Returns ------- @@ -693,12 +646,8 @@ cpdef make_filter(Schema schema, Condition condition, cdef shared_ptr[CFilter] result if condition is None: raise TypeError("Condition must not be None") - - if configuration is None: - configuration = Configuration() - check_status( - Filter_Make(schema.sp_schema, condition.condition, configuration.configuration, &result)) + Filter_Make(schema.sp_schema, condition.condition, &result)) return Filter.create(result) diff --git a/python/pyarrow/includes/libgandiva.pxd b/python/pyarrow/includes/libgandiva.pxd index 7d76576bef2b9..fa3b72bad61be 100644 --- a/python/pyarrow/includes/libgandiva.pxd +++ b/python/pyarrow/includes/libgandiva.pxd @@ -252,7 +252,6 @@ cdef extern from "gandiva/filter.h" namespace "gandiva" nogil: cdef CStatus Filter_Make \ "gandiva::Filter::Make"( shared_ptr[CSchema] schema, shared_ptr[CCondition] condition, - shared_ptr[CConfiguration] configuration, shared_ptr[CFilter]* filter) cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil: @@ -279,20 +278,9 @@ cdef extern from "gandiva/expression_registry.h" namespace "gandiva" nogil: cdef extern from "gandiva/configuration.h" namespace "gandiva" nogil: cdef cppclass CConfiguration" gandiva::Configuration": - - CConfiguration() - - CConfiguration(bint optimize, bint dump_ir) - - void set_optimize(bint optimize) - - void set_dump_ir(bint dump_ir) + pass cdef cppclass CConfigurationBuilder \ " gandiva::ConfigurationBuilder": @staticmethod shared_ptr[CConfiguration] DefaultConfiguration() - - CConfigurationBuilder() - - shared_ptr[CConfiguration] build() diff --git a/python/pyarrow/tests/test_gandiva.py b/python/pyarrow/tests/test_gandiva.py index 80d119a48530d..241cac4d83db4 100644 --- a/python/pyarrow/tests/test_gandiva.py +++ b/python/pyarrow/tests/test_gandiva.py @@ -47,9 +47,8 @@ def test_tree_exp_builder(): assert expr.result().type == pa.int32() - config = gandiva.Configuration(dump_ir=True) projector = gandiva.make_projector( - schema, [expr], pa.default_memory_pool(), "NONE", config) + schema, [expr], pa.default_memory_pool()) # Gandiva generates compute kernel function named `@expr_X` assert projector.llvm_ir.find("@expr_") != -1 @@ -105,8 +104,7 @@ def test_filter(): assert condition.result().type == pa.bool_() - config = gandiva.Configuration(dump_ir=True) - filter = gandiva.make_filter(table.schema, condition, config) + filter = gandiva.make_filter(table.schema, condition) # Gandiva generates compute kernel function named `@expr_X` assert filter.llvm_ir.find("@expr_") != -1