From e16aaf7e64fef3447e8a01d8c9b37fc8392672b6 Mon Sep 17 00:00:00 2001 From: Shakyan Kushwaha Date: Wed, 29 Jan 2025 14:00:33 +0530 Subject: [PATCH 1/2] fix: Bitwise operations on signed integers (Part-2) --- velox/common/hyperloglog/DenseHll.cpp | 44 +++++++++++++-------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/velox/common/hyperloglog/DenseHll.cpp b/velox/common/hyperloglog/DenseHll.cpp index f3a3f54b2d2d..30a0f9006e49 100644 --- a/velox/common/hyperloglog/DenseHll.cpp +++ b/velox/common/hyperloglog/DenseHll.cpp @@ -24,8 +24,8 @@ namespace facebook::velox::common::hll { namespace { const int kBitsPerBucket = 4; -const int8_t kMaxDelta = (1 << kBitsPerBucket) - 1; -const int8_t kBucketMask = (1 << kBitsPerBucket) - 1; +const int8_t kMaxDelta = (1u << kBitsPerBucket) - 1; +const int8_t kBucketMask = (1u << kBitsPerBucket) - 1; constexpr double kLinearCountingMinEmptyBuckets = 0.4; /// Buckets are stored in a byte array. Each byte stored 2 buckets, 4 bits each. @@ -34,7 +34,7 @@ constexpr double kLinearCountingMinEmptyBuckets = 0.4; /// the byte for a given bucket, e.g. 0 for even and 4 for odd buckets. int8_t shiftForBucket(int32_t index) { // ((1 - bucket) % 2) * kBitsPerBucket - return ((~index) & 1) << 2; + return ((~static_cast(index)) & 1) << 2; } /// Returns the value of alpha constant. See "Practical considerations" section @@ -48,7 +48,7 @@ double alpha(int32_t indexBitLength) { case 6: return 0.709; default: - return (0.7213 / (1 + 1.079 / (1 << indexBitLength))); + return (0.7213 / (1 + 1.079 / (1u << indexBitLength))); } } @@ -132,7 +132,7 @@ void DenseHll::initialize(int8_t indexBitLength) { indexBitLength_ = indexBitLength; - auto numBuckets = 1 << indexBitLength; + auto numBuckets = 1u << indexBitLength; baselineCount_ = numBuckets; deltas_.resize(numBuckets * kBitsPerBucket / 8); } @@ -186,8 +186,8 @@ struct DenseHllView { const int8_t* overflowValues; int8_t getDelta(int32_t index) const { - int slot = index >> 1; - return (deltas[slot] >> shiftForBucket(index)) & kBucketMask; + unsigned int slot = static_cast(index) >> 1; + return (static_cast(deltas[slot]) >> shiftForBucket(index)) & kBucketMask; } int8_t getValue(int32_t index) const { @@ -203,7 +203,7 @@ struct DenseHllView { }; int64_t cardinalityImpl(const DenseHllView& hll) { - auto numBuckets = 1 << hll.indexBitLength; + auto numBuckets = 1u << hll.indexBitLength; int32_t baselineCount = 0; for (int i = 0; i < numBuckets; i++) { @@ -222,7 +222,7 @@ int64_t cardinalityImpl(const DenseHllView& hll) { double sum = 0; for (int i = 0; i < numBuckets; i++) { int value = hll.getValue(i); - sum += 1.0 / (1L << value); + sum += 1.0 / (1UL << value); } double estimate = (alpha(hll.indexBitLength) * numBuckets * numBuckets) / sum; @@ -240,7 +240,7 @@ DenseHllView deserialize(const char* serialized) { auto indexBitLength = stream.read(); auto baseline = stream.read(); - auto numBuckets = 1 << indexBitLength; + auto numBuckets = 1u << indexBitLength; // next numBuckets / 2 bytes are deltas const int8_t* deltas = stream.read(numBuckets / 2); @@ -279,20 +279,20 @@ int64_t DenseHll::cardinality(const char* serialized) { } int8_t DenseHll::getDelta(int32_t index) const { - int slot = index >> 1; + unsigned int slot = static_cast(index) >> 1; return (deltas_[slot] >> shiftForBucket(index)) & kBucketMask; } void DenseHll::setDelta(int32_t index, int8_t value) { - int slot = index >> 1; + unsigned int slot = static_cast(index) >> 1; // Clear the old value. - int8_t clearMask = static_cast(kBucketMask << shiftForBucket(index)); - deltas_[slot] &= ~clearMask; + int8_t clearMask = static_cast(static_cast(kBucketMask) << shiftForBucket(index)); + deltas_[slot] = static_cast(deltas_[slot]) & ~static_cast(clearMask); // Set the new value. - int8_t setMask = static_cast(value << shiftForBucket(index)); - deltas_[slot] |= setMask; + int8_t setMask = static_cast(static_cast(value) << shiftForBucket(index)); + deltas_[slot] |= static_cast(deltas_[slot]) | static_cast(setMask); } int8_t DenseHll::getOverflow(int32_t index) const { @@ -310,7 +310,7 @@ int DenseHll::findOverflowEntry(int32_t index) const { } void DenseHll::adjustBaselineIfNeeded() { - auto numBuckets = 1 << indexBitLength_; + auto numBuckets = 1u << indexBitLength_; while (baselineCount_ == 0) { baseline_++; @@ -389,7 +389,7 @@ int32_t DenseHll::serializedSize() const { return 1 /* type + version */ + 1 /* indexBitLength */ + 1 /* baseline */ - + (1 << indexBitLength_) / 2 /* buckets */ + + (1u << indexBitLength_) / 2 /* buckets */ + 2 /* overflow bucket count */ + 2 * overflows_ /* overflow bucket indexes */ + overflows_ /* overflow bucket values */; @@ -426,7 +426,7 @@ bool DenseHll::canDeserialize(const char* input, int size) { return false; } - auto numBuckets = 1 << indexBitLength; + auto numBuckets = 1u << indexBitLength; const int8_t* deltas = stream.read(numBuckets / 2); auto overflows = stream.read(); @@ -471,7 +471,7 @@ int32_t DenseHll::estimateInMemorySize(int8_t indexBitLength) { // Note: we don't take into account overflow entries since their number can // vary. return sizeof(indexBitLength_) + sizeof(baseline_) + sizeof(baselineCount_) + - (1 << indexBitLength) / 2; + (1u << indexBitLength) / 2; } void DenseHll::serialize(char* output) { @@ -500,7 +500,7 @@ DenseHll::DenseHll(const char* serialized, HashStringAllocator* allocator) initialize(hll.indexBitLength); baseline_ = hll.baseline; - auto numBuckets = 1 << indexBitLength_; + auto numBuckets = 1u << indexBitLength_; std::copy(hll.deltas, hll.deltas + numBuckets / 2, deltas_.data()); overflows_ = hll.overflows; @@ -553,7 +553,7 @@ void DenseHll::mergeWith(const char* serialized) { auto baseline = stream.read(); - auto numBuckets = 1 << indexBitLength_; + auto numBuckets = 1u << indexBitLength_; auto deltas = stream.read(numBuckets / 2); auto overflows = stream.read(); auto overflowBuckets = overflows ? stream.read(overflows) : nullptr; From c7c63420f516e6fcc0b86ce434be801f122612b7 Mon Sep 17 00:00:00 2001 From: Shakyan Kushwaha Date: Wed, 29 Jan 2025 19:19:30 +0530 Subject: [PATCH 2/2] fix: Bitwise operations on signed integers (Part-2) --- velox/common/hyperloglog/DenseHll.cpp | 22 +++++++++++++++------- velox/common/hyperloglog/SparseHll.cpp | 6 +++--- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/velox/common/hyperloglog/DenseHll.cpp b/velox/common/hyperloglog/DenseHll.cpp index 30a0f9006e49..0daad7f5bc87 100644 --- a/velox/common/hyperloglog/DenseHll.cpp +++ b/velox/common/hyperloglog/DenseHll.cpp @@ -15,6 +15,8 @@ */ #include "velox/common/hyperloglog/DenseHll.h" +#include + #include #include #include "velox/common/base/IOUtils.h" @@ -720,7 +722,9 @@ int32_t DenseHll::mergeWithSimd(const HllView& other, int8_t newBaseline) { // Store newDelta in deltas_[deltaIndex]. auto slot1 = deltas_[deltaIndex]; - deltas_[deltaIndex] = (newDelta << 4) | (slot1 & kBucketMask); + uint8_t highNibble = static_cast(newDelta) << 4; + uint8_t lowNibble = static_cast(slot1) & static_cast(kBucketMask); + deltas_[deltaIndex] = deltas_[deltaIndex] = highNibble | lowNibble; }); } @@ -742,8 +746,10 @@ int32_t DenseHll::mergeWithSimd(const HllView& other, int8_t newBaseline) { } // Store newDelta. - auto slot1 = deltas_[deltaIndex]; - deltas_[deltaIndex] = (((slot1 >> 4) & kBucketMask) << 4) | newDelta; + auto slot1 = static_cast(deltas_[deltaIndex]); + uint8_t upperBits = (slot1 >> 4) & static_cast(kBucketMask); + uint8_t shiftedUpperBits = upperBits << 4; + deltas_[deltaIndex] = shiftedUpperBits | static_cast(newDelta); }); } } @@ -762,8 +768,10 @@ int32_t DenseHll::mergeWithScalar(const HllView& other, int8_t newBaseline) { int8_t slot2 = other.deltas[i]; for (int shift = 4; shift >= 0; shift -= 4) { - int8_t delta1 = (slot1 >> shift) & kBucketMask; - int8_t delta2 = (slot2 >> shift) & kBucketMask; + uint8_t maskedSlot1 = static_cast(slot1) >> shift; + uint8_t maskedSlot2 = static_cast(slot2) >> shift; + int8_t delta1 = maskedSlot1 & static_cast(kBucketMask); + int8_t delta2 = maskedSlot2 & static_cast(kBucketMask); auto [newValue, overflowEntry] = computeNewValue(delta1, delta2, bucket, other); @@ -776,8 +784,8 @@ int32_t DenseHll::mergeWithScalar(const HllView& other, int8_t newBaseline) { newDelta = updateOverflow(bucket, overflowEntry, newDelta); - newSlot <<= 4; - newSlot |= newDelta; + newSlot = static_cast(newSlot) << 4; + newSlot = static_cast(newSlot) | static_cast(newDelta); bucket++; } diff --git a/velox/common/hyperloglog/SparseHll.cpp b/velox/common/hyperloglog/SparseHll.cpp index 27d290dd10ef..925ca5412574 100644 --- a/velox/common/hyperloglog/SparseHll.cpp +++ b/velox/common/hyperloglog/SparseHll.cpp @@ -31,7 +31,7 @@ inline uint32_t decodeIndex(uint32_t entry) { } inline uint32_t decodeValue(uint32_t entry) { - return entry & ((1 << kValueBitLength) - 1); + return entry & ((1u << kValueBitLength) - 1); } int searchIndex( @@ -41,7 +41,7 @@ int searchIndex( int high = entries.size() - 1; while (low <= high) { - int middle = (low + high) >> 1; + int middle = static_cast(low + high) >> 1; auto middleIndex = decodeIndex(entries[middle]); @@ -93,7 +93,7 @@ int64_t SparseHll::cardinality() const { // 2^kIndexBitLength buckets available due to the fact that we're // recording the raw leading kIndexBitLength of the hash. This produces // much better precision while in the sparse regime. - static const int kTotalBuckets = 1 << kIndexBitLength; + static const int kTotalBuckets = 1u << kIndexBitLength; int zeroBuckets = kTotalBuckets - entries_.size(); return std::round(linearCounting(zeroBuckets, kTotalBuckets));