Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ORC-1841: [C++][CI] Add UBSAN support to Cmake #2117

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix ub case
luffy-zh committed Jan 23, 2025
commit 1416159dbc4b71861debfdc8aaad3d3179adffe9
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -178,8 +178,8 @@ endif()
# Configure Undefined Behavior Sanitizer if enabled
if (ENABLE_UBSAN)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all")
message(STATUS "Undefined Behavior Sanitizer enabled")
else()
message(WARNING "Undefined Behavior Sanitizer is only supported for GCC and Clang compilers")
2 changes: 2 additions & 0 deletions c++/include/orc/Int128.hh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**

Check notice on line 1 in c++/include/orc/Int128.hh

GitHub Actions / cpp-linter

Run clang-format on c++/include/orc/Int128.hh

File c++/include/orc/Int128.hh does not conform to Custom style guidelines. (lines 196, 218)
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -193,6 +193,7 @@
* Shift left by the given number of bits.
* Values larger than 2**127 will shift into the sign bit.
*/
__attribute__((no_sanitize("shift")))
Int128& operator<<=(uint32_t bits) {
if (bits != 0) {
if (bits < 64) {
@@ -214,6 +215,7 @@
* Shift right by the given number of bits. Negative values will
* sign extend and fill with one bits.
*/
__attribute__((no_sanitize("shift")))
Int128& operator>>=(uint32_t bits) {
if (bits != 0) {
if (bits < 64) {
2 changes: 2 additions & 0 deletions c++/src/BloomFilter.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**

Check notice on line 1 in c++/src/BloomFilter.cc

GitHub Actions / cpp-linter

Run clang-format on c++/src/BloomFilter.cc

File c++/src/BloomFilter.cc does not conform to Custom style guidelines. (lines 212, 230)
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -209,6 +209,7 @@

DIAGNOSTIC_POP

__attribute__((no_sanitize("signed-integer-overflow","shift")))
void BloomFilterImpl::addHash(int64_t hash64) {
int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
// In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
@@ -226,6 +227,7 @@
}
}

__attribute__((no_sanitize("signed-integer-overflow","shift")))
bool BloomFilterImpl::testHash(int64_t hash64) const {
int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
// In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
1 change: 1 addition & 0 deletions c++/src/BloomFilter.hh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**

Check notice on line 1 in c++/src/BloomFilter.hh

GitHub Actions / cpp-linter

Run clang-format on c++/src/BloomFilter.hh

File c++/src/BloomFilter.hh does not conform to Custom style guidelines. (lines 197)
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -194,6 +194,7 @@
// Thomas Wang's integer hash function
// http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
// Put this in header file so tests can use it as well.
__attribute__((no_sanitize("signed-integer-overflow","shift")))
inline int64_t getLongHash(int64_t key) {
key = (~key) + (key << 21); // key = (key << 21) - key - 1;
key = key ^ (key >> 24);
7 changes: 6 additions & 1 deletion c++/src/ColumnReader.cc
Original file line number Diff line number Diff line change
@@ -726,6 +726,9 @@ namespace orc {
if (totalBytes <= lastBufferLength_) {
// subtract the needed bytes from the ones left over
lastBufferLength_ -= totalBytes;
if (lastBuffer_ == nullptr) {
throw ParseError("StringDirectColumnReader::skip: lastBuffer_ is null");
}
lastBuffer_ += totalBytes;
} else {
// move the stream forward after accounting for the buffered bytes
@@ -780,7 +783,9 @@ namespace orc {
byteBatch.blob.resize(totalLength);
char* ptr = byteBatch.blob.data();
while (bytesBuffered + lastBufferLength_ < totalLength) {
memcpy(ptr + bytesBuffered, lastBuffer_, lastBufferLength_);
if (lastBuffer_ != nullptr) {
memcpy(ptr + bytesBuffered, lastBuffer_, lastBufferLength_);
}
bytesBuffered += lastBufferLength_;
const void* readBuffer;
int readLength;
4 changes: 2 additions & 2 deletions c++/src/ConvertColumnReader.cc
Original file line number Diff line number Diff line change
@@ -126,13 +126,13 @@ namespace orc {
bool shouldThrow) {
constexpr bool isFileTypeFloatingPoint(std::is_floating_point<FileType>::value);
constexpr bool isReadTypeFloatingPoint(std::is_floating_point<ReadType>::value);
int64_t longValue = static_cast<int64_t>(srcValue);

if (isFileTypeFloatingPoint) {
if (isReadTypeFloatingPoint) {
destValue = static_cast<ReadType>(srcValue);
} else {
if (!canFitInLong(static_cast<double>(srcValue)) ||
!downCastToInteger(destValue, longValue)) {
!downCastToInteger(destValue, static_cast<int64_t>(srcValue))) {
handleOverflow<FileType, ReadType>(destBatch, idx, shouldThrow);
}
}
1 change: 1 addition & 0 deletions c++/src/RLE.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**

Check notice on line 1 in c++/src/RLE.cc

GitHub Actions / cpp-linter

Run clang-format on c++/src/RLE.cc

File c++/src/RLE.cc does not conform to Custom style guidelines. (lines 80)
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -77,6 +77,7 @@
add<int16_t>(data, numValues, notNull);
}

__attribute__((no_sanitize("shift")))
void RleEncoder::writeVslong(int64_t val) {
writeVulong((val << 1) ^ (val >> 63));
}
1 change: 1 addition & 0 deletions c++/src/RLE.hh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**

Check notice on line 1 in c++/src/RLE.hh

GitHub Actions / cpp-linter

Run clang-format on c++/src/RLE.hh

File c++/src/RLE.hh does not conform to Custom style guidelines. (lines 29)
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -26,6 +26,7 @@

namespace orc {

__attribute__((no_sanitize("shift")))
inline int64_t zigZag(int64_t value) {
return (value << 1) ^ (value >> 63);
}