Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-42149: [C++] Use FetchContent for bundled ORC #43011

Merged
merged 24 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ if(POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif()

# https://cmake.org/cmake/help/latest/policy/CMP0170.html
#
# CMP0170 is for enforcing dependency populations by users with
# FETCHCONTENT_FULLY_DISCONNECTED=ON.
if(POLICY CMP0170)
cmake_policy(SET CMP0170 NEW)
endif()

set(ARROW_VERSION "17.0.0-SNAPSHOT")

string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
Expand Down
279 changes: 190 additions & 89 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2532,6 +2532,7 @@ macro(build_zlib)
set_property(TARGET ZLIB::ZLIB
PROPERTY IMPORTED_LOCATION
"${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a")
target_include_directories(ZLIB::ZLIB INTERFACE "${EMSCRIPTEN_SYSROOT}/include")
list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB)
else()
set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install")
Expand Down Expand Up @@ -4490,116 +4491,216 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include")
# ----------------------------------------------------------------------
# Apache ORC

macro(build_orc)
function(build_orc)
message(STATUS "Building Apache ORC from source")

set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install")
set(ORC_HOME "${ORC_PREFIX}")
set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include")
set(ORC_STATIC_LIB
"${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}")
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29)
fetchcontent_declare(orc
${FC_DECLARE_COMMON_OPTIONS}
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}")
prepare_fetchcontent()

set(CMAKE_UNITY_BUILD FALSE)

set(ORC_PREFER_STATIC_LZ4
OFF
CACHE BOOL "" FORCE)
get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY)
set(LZ4_HOME
${LZ4_ROOT}
CACHE STRING "" FORCE)
set(LZ4_LIBRARY
LZ4::lz4
CACHE STRING "" FORCE)

set(ORC_PREFER_STATIC_PROTOBUF
OFF
CACHE BOOL "" FORCE)
get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY)
set(PROTOBUF_HOME
${Protobuf_ROOT}
CACHE STRING "" FORCE)
# ORC uses this.
target_include_directories(${ARROW_PROTOBUF_LIBPROTOC}
INTERFACE "${PROTOBUF_INCLUDE_DIR}")
set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC})
set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF})
set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC})

set(ORC_PREFER_STATIC_SNAPPY
OFF
CACHE BOOL "" FORCE)
get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY)
set(SNAPPY_HOME
${Snappy_ROOT}
CACHE STRING "" FORCE)
set(SNAPPY_LIBRARY
${Snappy_TARGET}
CACHE STRING "" FORCE)

set(ORC_PREFER_STATIC_ZLIB
OFF
CACHE BOOL "" FORCE)
get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY)
set(ZLIB_HOME
${ZLIB_ROOT}
CACHE STRING "" FORCE)
set(ZLIB_LIBRARY
ZLIB::ZLIB
CACHE STRING "" FORCE)

set(ORC_PREFER_STATIC_ZSTD
OFF
CACHE BOOL "" FORCE)
get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY)
set(ZSTD_HOME
${ZSTD_ROOT}
CACHE STRING "" FORCE)
set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD})

set(BUILD_CPP_TESTS
OFF
CACHE BOOL "" FORCE)
set(BUILD_JAVA
OFF
CACHE BOOL "" FORCE)
set(BUILD_LIBHDFSPP
OFF
CACHE BOOL "" FORCE)
set(BUILD_TOOLS
OFF
CACHE BOOL "" FORCE)
set(INSTALL_VENDORED_LIBS
OFF
CACHE BOOL "" FORCE)
set(STOP_BUILD_ON_WARNING
OFF
CACHE BOOL "" FORCE)

# We can remove this with ORC 2.0.2 or later.
list(PREPEND CMAKE_MODULE_PATH
${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules)

fetchcontent_makeavailable(orc)

add_library(orc::orc INTERFACE IMPORTED)
target_link_libraries(orc::orc INTERFACE orc)
target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include"
"${orc_SOURCE_DIR}/c++/include")

list(APPEND ARROW_BUNDLED_STATIC_LIBS orc)
else()
set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install")
set(ORC_HOME "${ORC_PREFIX}")
set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include")
set(ORC_STATIC_LIB
"${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}"
)

get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY)
get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY)

get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY)
get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY)

get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY)
get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY)

get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY)
get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY)

set(ORC_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS}
"-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}"
-DSTOP_BUILD_ON_WARNING=OFF
-DBUILD_LIBHDFSPP=OFF
-DBUILD_JAVA=OFF
-DBUILD_TOOLS=OFF
-DBUILD_CPP_TESTS=OFF
-DINSTALL_VENDORED_LIBS=OFF
"-DLZ4_HOME=${ORC_LZ4_ROOT}"
"-DPROTOBUF_EXECUTABLE=$<TARGET_FILE:${ARROW_PROTOBUF_PROTOC}>"
"-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}"
"-DPROTOBUF_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_PROTOBUF_LIBPROTOBUF},INTERFACE_INCLUDE_DIRECTORIES>"
"-DPROTOBUF_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOBUF}>"
"-DPROTOC_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOC}>"
"-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}"
"-DSNAPPY_LIBRARY=$<TARGET_FILE:${Snappy_TARGET}>"
"-DLZ4_LIBRARY=$<TARGET_FILE:LZ4::lz4>"
"-DLZ4_STATIC_LIB=$<TARGET_FILE:LZ4::lz4>"
"-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include"
"-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}"
"-DZSTD_HOME=${ORC_ZSTD_ROOT}"
"-DZSTD_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_ZSTD_LIBZSTD},INTERFACE_INCLUDE_DIRECTORIES>"
"-DZSTD_LIBRARY=$<TARGET_FILE:${ARROW_ZSTD_LIBZSTD}>")
if(ZLIB_ROOT)
set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}")
endif()
set(ORC_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS}
"-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}"
-DSTOP_BUILD_ON_WARNING=OFF
-DBUILD_LIBHDFSPP=OFF
-DBUILD_JAVA=OFF
-DBUILD_TOOLS=OFF
-DBUILD_CPP_TESTS=OFF
-DINSTALL_VENDORED_LIBS=OFF
"-DLZ4_HOME=${ORC_LZ4_ROOT}"
"-DPROTOBUF_EXECUTABLE=$<TARGET_FILE:${ARROW_PROTOBUF_PROTOC}>"
"-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}"
"-DPROTOBUF_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_PROTOBUF_LIBPROTOBUF},INTERFACE_INCLUDE_DIRECTORIES>"
"-DPROTOBUF_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOBUF}>"
"-DPROTOC_LIBRARY=$<TARGET_FILE:${ARROW_PROTOBUF_LIBPROTOC}>"
"-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}"
"-DSNAPPY_LIBRARY=$<TARGET_FILE:${Snappy_TARGET}>"
"-DLZ4_LIBRARY=$<TARGET_FILE:LZ4::lz4>"
"-DLZ4_STATIC_LIB=$<TARGET_FILE:LZ4::lz4>"
"-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include"
"-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}"
"-DZSTD_HOME=${ORC_ZSTD_ROOT}"
"-DZSTD_INCLUDE_DIR=$<TARGET_PROPERTY:${ARROW_ZSTD_LIBZSTD},INTERFACE_INCLUDE_DIRECTORIES>"
"-DZSTD_LIBRARY=$<TARGET_FILE:${ARROW_ZSTD_LIBZSTD}>")
if(ZLIB_ROOT)
set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}")
endif()

# Work around CMake bug
file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR})
# Work around CMake bug
file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR})

externalproject_add(orc_ep
${EP_COMMON_OPTIONS}
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
CMAKE_ARGS ${ORC_CMAKE_ARGS}
DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
${ARROW_PROTOBUF_PROTOC}
${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET}
LZ4::lz4
ZLIB::ZLIB)

set(ORC_VENDORED 1)

add_library(orc::orc STATIC IMPORTED)
set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}")
target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}")
target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET})
# Protobuf generated files may use ABSL_DCHECK*() and
# absl::log_internal_check_op is needed for them.
if(TARGET absl::log_internal_check_op)
target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op)
endif()
if(NOT MSVC)
if(NOT APPLE AND ARROW_ENABLE_THREADING)
target_link_libraries(orc::orc INTERFACE Threads::Threads)
endif()
target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS})
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9")
target_link_libraries(orc::orc INTERFACE stdc++fs)
externalproject_add(orc_ep
${EP_COMMON_OPTIONS}
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
CMAKE_ARGS ${ORC_CMAKE_ARGS}
DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
${ARROW_PROTOBUF_PROTOC}
${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET}
LZ4::lz4
ZLIB::ZLIB)
add_library(orc::orc STATIC IMPORTED)
set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}")
target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}")
target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET})
# Protobuf generated files may use ABSL_DCHECK*() and
# absl::log_internal_check_op is needed for them.
if(TARGET absl::log_internal_check_op)
target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op)
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it mean we no longer support these legacy versions?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. But it seems that this is not needed with ORC 2.0.1...?
https://github.com/ursacomputing/crossbow/actions/runs/9786486604/job/27021493582 passed

Did you fix this in ORC 2.0.1? #41023 (comment)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8")
target_link_libraries(orc::orc INTERFACE c++fs)
if(NOT MSVC)
if(NOT APPLE AND ARROW_ENABLE_THREADING)
target_link_libraries(orc::orc INTERFACE Threads::Threads)
endif()
target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS})
endif()
target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF})
add_dependencies(orc::orc orc_ep)
list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc)
endif()

add_dependencies(orc::orc orc_ep)

list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc)
endmacro()
set(ORC_VENDORED
TRUE
PARENT_SCOPE)
set(ARROW_BUNDLED_STATIC_LIBS
${ARROW_BUNDLED_STATIC_LIBS}
PARENT_SCOPE)
endfunction()

if(ARROW_ORC)
resolve_dependency(orc HAVE_ALT TRUE)
target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF})
if(ORC_VENDORED)
set(ARROW_ORC_VERSION ${ARROW_ORC_BUILD_VERSION})
else()
target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF})
set(ARROW_ORC_VERSION ${orcAlt_VERSION})
message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}")
message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}")
endif()
message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}")
message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}")
endif()

# ----------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions cpp/thirdparty/versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0
ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706
ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0
ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412
ARROW_ORC_BUILD_VERSION=2.0.0
ARROW_ORC_BUILD_SHA256_CHECKSUM=9107730919c29eb39efaff1b9e36166634d1d4d9477e5fee76bfd6a8fec317df
ARROW_ORC_BUILD_VERSION=2.0.1
ARROW_ORC_BUILD_SHA256_CHECKSUM=1ffac0228aa83f04a1b1cf2788a3af5953e82587ae3a77c41900e99f2557132d
ARROW_PROTOBUF_BUILD_VERSION=v21.3
ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f
# Because of https://github.com/Tencent/rapidjson/pull/1323, we require
Expand Down
1 change: 1 addition & 0 deletions dev/tasks/linux-packages/apache-arrow/debian/rules
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ override_dh_auto_configure:
-DARROW_WITH_ZSTD=ON \
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
-DCUDAToolkit_ROOT=/usr \
-DFETCHCONTENT_FULLY_DISCONNECTED=OFF \
-DPARQUET_BUILD_EXECUTABLES=ON \
-DPARQUET_REQUIRE_ENCRYPTION=ON

Expand Down
Loading