From dd0173cbf976bcc4e00b6a1692163b9527100152 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Fri, 15 Nov 2024 19:07:24 +0800 Subject: [PATCH 01/41] add comments for block.h --- be/src/vec/core/block.h | 69 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 2242db3f9058c2..e6ca43723c52e0 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -95,8 +95,9 @@ class Block { Block(Block&& block) = default; Block& operator=(Block&& other) = default; + /// Reserve memory for internal containers void reserve(size_t count); - // Make sure the nammes is useless when use block + /// Make sure the names is useless when use block void clear_names(); /// insert the column at the specified position @@ -123,6 +124,7 @@ class Block { std::swap(data, new_data); } + /// Initialize the index by name map void initialize_index_by_name(); /// References are invalidated after calling functions above. @@ -133,28 +135,34 @@ class Block { } const ColumnWithTypeAndName& get_by_position(size_t position) const { return data[position]; } + /// Replace column at position with rvalue column pointer void replace_by_position(size_t position, ColumnPtr&& res) { this->get_by_position(position).column = std::move(res); } + /// Replace column at position with lvalue column pointer void replace_by_position(size_t position, const ColumnPtr& res) { this->get_by_position(position).column = res; } + /// Convert const column at position to full column if it is const void replace_by_position_if_const(size_t position) { auto& element = this->get_by_position(position); element.column = element.column->convert_to_full_column_if_const(); } + /// Convert all columns to new columns if they overflow void replace_if_overflow() { for (auto& ele : data) { ele.column = std::move(*ele.column).mutate()->convert_column_if_overflow(); } } + // get column by position, throw exception when position is invalid ColumnWithTypeAndName& safe_get_by_position(size_t position); const ColumnWithTypeAndName& safe_get_by_position(size_t position) const; + // get column by name, throw exception when no such column name ColumnWithTypeAndName& get_by_name(const std::string& name); const ColumnWithTypeAndName& get_by_name(const std::string& name) const; @@ -162,22 +170,33 @@ class Block { ColumnWithTypeAndName* try_get_by_name(const std::string& name); const ColumnWithTypeAndName* try_get_by_name(const std::string& name) const; + /// Get an iterator to the beginning of the data container Container::iterator begin() { return data.begin(); } + /// Get an iterator to the end of the data container Container::iterator end() { return data.end(); } + /// Get a constant iterator to the beginning of the data container Container::const_iterator begin() const { return data.begin(); } + /// Get a constant iterator to the end of the data container Container::const_iterator end() const { return data.end(); } + /// Get a constant iterator to the beginning of the data container Container::const_iterator cbegin() const { return data.cbegin(); } + /// Get a constant iterator to the end of the data container Container::const_iterator cend() const { return data.cend(); } + // check if the column name exists bool has(const std::string& name) const; + // get the position of the column by name size_t get_position_by_name(const std::string& name) const; + // get the columns with type and name const ColumnsWithTypeAndName& get_columns_with_type_and_name() const; + // get the names of the columns std::vector get_names() const; DataTypes get_data_types() const; + // get the data type of the column by index DataTypePtr get_data_type(size_t index) const { CHECK(index < data.size()); return data[index].type; @@ -186,6 +205,7 @@ class Block { /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0. size_t rows() const; + // Get a string with the size of each column in bytes. std::string each_col_size() const; // Cut the rows in block, use in LIMIT operation @@ -204,6 +224,7 @@ class Block { /// Approximate number of bytes in memory - for profiling and limits. size_t bytes() const; + /// Get a string with the size of each column in bytes. std::string columns_bytes() const; /// Approximate number of allocated bytes in memory - for profiling and limits. @@ -212,6 +233,7 @@ class Block { /** Get a list of column names separated by commas. */ std::string dump_names() const; + /** Get a list of column types separated by commas. */ std::string dump_types() const; /** List of names, types and lengths of columns. Designed for debugging. */ @@ -220,11 +242,16 @@ class Block { /** Get the same block, but empty. */ Block clone_empty() const; + /// Get a list of columns. Columns get_columns() const; + /// Get a list of columns and convert them to full columns. Columns get_columns_and_convert(); + /// Set the columns of the block. void set_columns(const Columns& columns); + /// Clone the block with the specified columns. Block clone_with_columns(const Columns& columns) const; + /// Clone the block without the specified columns. Block clone_without_columns(const std::vector* column_offset = nullptr) const; /** Get empty columns with the same types as in block. */ @@ -251,10 +278,14 @@ class Block { // Else clear column [0, column_size) delete column [column_size, data.size) void clear_column_data(int64_t column_size = -1) noexcept; + // Check if the block is not empty. bool mem_reuse() { return !data.empty(); } + // Check if the block has no columns bool is_empty_column() { return data.empty(); } + // Check if the block has no rows (i.e. all columns have 0 rows) + // This is different from is_empty_column() which checks for absence of columns bool empty() const { return rows() == 0; } /** @@ -284,6 +315,7 @@ class Block { // copy a new block by the offset column Block copy_block(const std::vector& column_offset) const; + // append to block by selector Status append_to_block_by_selector(MutableBlock* dst, const IColumn::Selector& selector) const; // need exception safety @@ -295,11 +327,14 @@ class Block { // need exception safety static void filter_block_internal(Block* block, const IColumn::Filter& filter); + // Filter block by specified columns using filter column static Status filter_block(Block* block, const std::vector& columns_to_filter, size_t filter_column_id, size_t column_to_keep); + // Filter block using filter column static Status filter_block(Block* block, size_t filter_column_id, size_t column_to_keep); + // Remove columns after column_to_keep static void erase_useless_column(Block* block, size_t column_to_keep) { block->erase_tail(column_to_keep); } @@ -309,8 +344,10 @@ class Block { size_t* compressed_bytes, segment_v2::CompressionTypePB compression_type, bool allow_transfer_large_data = false) const; + // Deserialize from PBlock format Status deserialize(const PBlock& pblock); + // Create empty block with same schema std::unique_ptr create_same_struct_block(size_t size, bool is_reserve = false) const; /** Compares (*this) n-th row and rhs m-th row. @@ -329,6 +366,7 @@ class Block { return compare_at(n, m, columns(), rhs, nan_direction_hint); } + // Compare rows by first num_columns columns in sequential order (from index 0 to num_columns - 1) int compare_at(size_t n, size_t m, size_t num_columns, const Block& rhs, int nan_direction_hint) const { DCHECK_GE(columns(), num_columns); @@ -347,6 +385,7 @@ class Block { return 0; } + // Compare rows by specified columns in compare_columns int compare_at(size_t n, size_t m, const std::vector* compare_columns, const Block& rhs, int nan_direction_hint) const { DCHECK_GE(columns(), compare_columns->size()); @@ -377,10 +416,14 @@ class Block { // for String type or Array type void shrink_char_type_column_suffix_zero(const std::vector& char_type_idx); + // Get time spent on decompression in nanoseconds int64_t get_decompress_time() const { return _decompress_time_ns; } + // Get total bytes after decompression int64_t get_decompressed_bytes() const { return _decompressed_bytes; } + // Get time spent on compression in nanoseconds int64_t get_compress_time() const { return _compress_time_ns; } + // Set same bit flags for rows in block void set_same_bit(std::vector::const_iterator begin, std::vector::const_iterator end) { row_same_bit.insert(row_same_bit.end(), begin, end); @@ -388,6 +431,7 @@ class Block { DCHECK_EQ(row_same_bit.size(), rows()); } + // Get same bit flag for specified row position bool get_same_bit(size_t position) { if (position >= row_same_bit.size()) { return false; @@ -395,6 +439,7 @@ class Block { return row_same_bit[position]; } + // Clear all same bit flags void clear_same_bit() { row_same_bit.clear(); } // return string contains use_count() of each columns @@ -406,6 +451,7 @@ class Block { // we built some temporary columns into block void erase_tmp_columns() noexcept; + // Clear columns not marked for keeping void clear_column_mem_not_keep(const std::vector& column_keep_flags, bool need_keep_first); @@ -480,6 +526,7 @@ class MutableBlock { return _data_types[position]; } + // Compare rows by specified column int compare_one_column(size_t n, size_t m, size_t column_id, int nan_direction_hint) const { DCHECK_LE(column_id, columns()); DCHECK_LE(n, rows()); @@ -488,6 +535,7 @@ class MutableBlock { return column->compare_at(n, m, *column, nan_direction_hint); } + // Compare rows by first num_columns columns in sequential order (from index 0 to num_columns - 1) int compare_at(size_t n, size_t m, size_t num_columns, const MutableBlock& rhs, int nan_direction_hint) const { DCHECK_GE(columns(), num_columns); @@ -506,6 +554,7 @@ class MutableBlock { return 0; } + // Compare rows by specified columns in compare_columns int compare_at(size_t n, size_t m, const std::vector* compare_columns, const MutableBlock& rhs, int nan_direction_hint) const { DCHECK_GE(columns(), compare_columns->size()); @@ -524,6 +573,7 @@ class MutableBlock { return 0; } + // Get a string representation of the block's data types std::string dump_types() const { std::string res; for (auto type : _data_types) { @@ -565,6 +615,7 @@ class MutableBlock { return Status::OK(); } + // Merge another block into current block with strict type check and overflow handling. template [[nodiscard]] Status merge_impl(T&& block) { // merge is not supported in dynamic block @@ -613,12 +664,14 @@ class MutableBlock { return Status::OK(); } - // move to columns' data to a Block. this will invalidate + // Move the data of columns to a block. This will invalidate the MutableBlock. Block to_block(int start_column = 0); Block to_block(int start_column, int end_column); + // Swap the contents of two MutableBlocks void swap(MutableBlock& other) noexcept; + // Move-swap the contents of two MutableBlocks void swap(MutableBlock&& other) noexcept; void add_row(const Block* block, int row); @@ -628,11 +681,13 @@ class MutableBlock { Status add_rows(const Block* block, size_t row_begin, size_t length); Status add_rows(const Block* block, const std::vector& rows); - /// remove the column with the specified name + /// Remove the column with the specified name void erase(const String& name); + // Get a string representation of the block's data, limited to the specified number of rows std::string dump_data(size_t row_limit = 100) const; + // Clear the block's data void clear() { _columns.clear(); _data_types.clear(); @@ -644,8 +699,10 @@ class MutableBlock { // reset columns by types and names. void reset_column_data() noexcept; + // Returns the total number of bytes allocated by all columns in the block size_t allocated_bytes() const; + // Returns the approximate number of bytes in memory used by the block size_t bytes() const { size_t res = 0; for (const auto& elem : _columns) { @@ -655,16 +712,20 @@ class MutableBlock { return res; } + // Get the names of the columns in the block std::vector& get_names() { return _names; } + // Check if the block contains a column with the specified name bool has(const std::string& name) const; + // Get the position of the column with the specified name size_t get_position_by_name(const std::string& name) const; /** Get a list of column names separated by commas. */ std::string dump_names() const; private: + // Initialize the index by name map void initialize_index_by_name(); }; @@ -673,11 +734,13 @@ struct IteratorRowRef { int row_pos; bool is_same; + // Compare rows by specified arguments template int compare(const IteratorRowRef& rhs, const T& compare_arguments) const { return block->compare_at(row_pos, rhs.row_pos, compare_arguments, *rhs.block, -1); } + // Reset the IteratorRowRef to default values void reset() { block = nullptr; row_pos = -1; From e961aafcf8955bc65a9ee2ace5cf19375100633a Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Sun, 17 Nov 2024 13:38:40 +0800 Subject: [PATCH 02/41] add more comments to block.h and add some unit tests for block in block_test.cpp and add some initialization for beta_rowset_test.cpp and agg_linear_histogram_test.cpp --- be/src/vec/core/block.h | 21 +- be/test/olap/rowset/beta_rowset_test.cpp | 2 + .../agg_linear_histogram_test.cpp | 3 +- be/test/vec/core/block_test.cpp | 1572 +++++++++++++++++ 4 files changed, 1588 insertions(+), 10 deletions(-) diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index e6ca43723c52e0..697af729771d18 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -97,7 +97,7 @@ class Block { /// Reserve memory for internal containers void reserve(size_t count); - /// Make sure the names is useless when use block + /// Clear all column names and name index mappings in the block void clear_names(); /// insert the column at the specified position @@ -192,11 +192,12 @@ class Block { // get the columns with type and name const ColumnsWithTypeAndName& get_columns_with_type_and_name() const; - // get the names of the columns + // Returns a vector containing all column names in the block std::vector get_names() const; + // Returns a vector containing all column data types in the block DataTypes get_data_types() const; - // get the data type of the column by index + // Returns the data type of the column at the specified index DataTypePtr get_data_type(size_t index) const { CHECK(index < data.size()); return data[index].type; @@ -205,7 +206,8 @@ class Block { /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0. size_t rows() const; - // Get a string with the size of each column in bytes. + // Returns a string showing the size of each column, separated by ' | ' + // Returns -1 for null columns std::string each_col_size() const; // Cut the rows in block, use in LIMIT operation @@ -242,16 +244,16 @@ class Block { /** Get the same block, but empty. */ Block clone_empty() const; - /// Get a list of columns. + /// Returns a copy of all columns, converting const columns to full columns Columns get_columns() const; - /// Get a list of columns and convert them to full columns. + /// Returns all columns and converts const columns to full columns in place Columns get_columns_and_convert(); /// Set the columns of the block. void set_columns(const Columns& columns); /// Clone the block with the specified columns. Block clone_with_columns(const Columns& columns) const; - /// Clone the block without the specified columns. + /// Clone the block with the specified column offset but without data. Block clone_without_columns(const std::vector* column_offset = nullptr) const; /** Get empty columns with the same types as in block. */ @@ -315,7 +317,8 @@ class Block { // copy a new block by the offset column Block copy_block(const std::vector& column_offset) const; - // append to block by selector + // appends selected rows from this block to destination block based on selector + // skips const columns during append operation Status append_to_block_by_selector(MutableBlock* dst, const IColumn::Selector& selector) const; // need exception safety @@ -344,7 +347,7 @@ class Block { size_t* compressed_bytes, segment_v2::CompressionTypePB compression_type, bool allow_transfer_large_data = false) const; - // Deserialize from PBlock format + // deserialize block from PBlock Status deserialize(const PBlock& pblock); // Create empty block with same schema diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp index 1ed3a9ed04b2c7..b791a9901aeb85 100644 --- a/be/test/olap/rowset/beta_rowset_test.cpp +++ b/be/test/olap/rowset/beta_rowset_test.cpp @@ -236,6 +236,8 @@ TEST_F(BetaRowsetTest, ReadTest) { .region = "region", .ak = "ak", .sk = "sk", + .token = "token", + .bucket = "bucket", }}; std::string resource_id = "10000"; auto res = io::S3FileSystem::create(std::move(s3_conf), io::FileSystem::TMP_FS_ID); diff --git a/be/test/vec/aggregate_functions/agg_linear_histogram_test.cpp b/be/test/vec/aggregate_functions/agg_linear_histogram_test.cpp index 3dbf34a4dcb30c..b13ae5868bb282 100644 --- a/be/test/vec/aggregate_functions/agg_linear_histogram_test.cpp +++ b/be/test/vec/aggregate_functions/agg_linear_histogram_test.cpp @@ -205,7 +205,8 @@ class AggLinearHistogramTest : public testing::Test { AggregateFunctionSimpleFactory factory = AggregateFunctionSimpleFactory::instance(); auto agg_function = - factory.get("linear_histogram", data_types, false, -1, {.enable_decimal256 = true}); + factory.get("linear_histogram", data_types, false, -1, + {.enable_decimal256 = true, .column_infos = {}}); EXPECT_NE(agg_function, nullptr); std::unique_ptr memory(new char[agg_function->size_of_data()]); diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 54cb15f8d486d4..271bde12651870 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -693,6 +693,1578 @@ void serialize_and_deserialize_test_array() { } } +TEST(BlockTest, Constructor) { + // default constructor + { + vectorized::Block block; + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); + } + + // constructor with initializer_list + { + auto col = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type(std::make_shared()); + vectorized::Block block({ + {col->get_ptr(), type, "col1"}, + {col->get_ptr(), type, "col2"} + }); + EXPECT_EQ(2, block.columns()); + } + + // constructor with ColumnsWithTypeAndName + { + vectorized::ColumnsWithTypeAndName columns; + auto col = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type(std::make_shared()); + columns.emplace_back(col->get_ptr(), type, "col1"); + vectorized::Block block(columns); + EXPECT_EQ(1, block.columns()); + } + +} + +TEST(BlockTest, BasicOperations) { + vectorized::Block block; + auto col1 = vectorized::ColumnVector::create(); + auto col2 = vectorized::ColumnVector::create(); + auto col3 = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type(std::make_shared()); + + // test reserve + block.reserve(3); + + // test insert at end + block.insert({col1->get_ptr(), type, "col1"}); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ("col1", block.get_by_position(0).name); + + block.insert({col3->get_ptr(), type, "col3"}); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ("col3", block.get_by_position(1).name); + + // test insert at position + block.insert(1, {col2->get_ptr(), type, "col2"}); + EXPECT_EQ(3, block.columns()); + EXPECT_EQ("col2", block.get_by_position(1).name); + + // test erase by position + block.erase(1); // Remove col2 + EXPECT_EQ(2, block.columns()); + EXPECT_EQ("col1", block.get_by_position(0).name); + EXPECT_EQ("col3", block.get_by_position(1).name); + + // test erase_tail + block.insert(1, {col2->get_ptr(), type, "col2"}); + block.erase_tail(1); // Remove col2 and col3 + EXPECT_EQ(1, block.columns()); + EXPECT_EQ("col1", block.get_by_position(0).name); + + // test erase by set of positions + block.insert({col2->get_ptr(), type, "col2"}); + block.insert({col3->get_ptr(), type, "col3"}); + std::set positions_to_remove = {0, 2}; // Remove col1 and col3 + block.erase(positions_to_remove); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ("col2", block.get_by_position(0).name); + + // test erase by name + block.erase("col2"); + EXPECT_EQ(0, block.columns()); + + // test erase_not_in + block.insert({col1->get_ptr(), type, "col1"}); + block.insert({col2->get_ptr(), type, "col2"}); + block.insert({col3->get_ptr(), type, "col3"}); + std::vector columns_to_keep = {0, 2}; // Keep col1 and col3 + block.erase_not_in(columns_to_keep); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ("col1", block.get_by_position(0).name); + EXPECT_EQ("col3", block.get_by_position(1).name); + + // test clear_names + block.clear_names(); + EXPECT_EQ("", block.get_by_position(0).name); + EXPECT_EQ("", block.get_by_position(1).name); + + // test clear + block.clear(); + EXPECT_EQ(0, block.columns()); +} + +TEST(BlockTest, ColumnOperations) { + vectorized::Block block; + auto col1 = vectorized::ColumnVector::create(); + auto col2 = vectorized::ColumnVector::create(); + auto col3 = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type(std::make_shared()); + + // Setup test data + block.insert({col1->get_ptr(), type, "col1"}); + block.insert({col2->get_ptr(), type, "col2"}); + block.insert({col3->get_ptr(), type, "col3"}); + + // Test get_by_position + EXPECT_EQ("col1", block.get_by_position(0).name); + EXPECT_EQ("col2", block.get_by_position(1).name); + EXPECT_EQ("col3", block.get_by_position(2).name); + + // Test safe_get_by_position + EXPECT_EQ("col1", block.safe_get_by_position(0).name); + EXPECT_THROW(block.safe_get_by_position(10), Exception); + + // Test get_by_name + EXPECT_EQ("col1", block.get_by_name("col1").name); + EXPECT_THROW(block.get_by_name("non_existent"), Exception); + + // Test try_get_by_name + EXPECT_NE(nullptr, block.try_get_by_name("col1")); + EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); + + // Test has + EXPECT_TRUE(block.has("col1")); + EXPECT_FALSE(block.has("non_existent")); + + // Test get_position_by_name + EXPECT_EQ(0, block.get_position_by_name("col1")); + EXPECT_EQ(1, block.get_position_by_name("col2")); + EXPECT_THROW(block.get_position_by_name("non_existent"), Exception); + + // Test get_names + auto names = block.get_names(); + EXPECT_EQ(3, names.size()); + EXPECT_EQ("col1", names[0]); + EXPECT_EQ("col2", names[1]); + EXPECT_EQ("col3", names[2]); + + // Test get_data_type + EXPECT_EQ(type, block.get_data_type(0)); + EXPECT_EQ(type, block.get_data_type(1)); + EXPECT_EQ(type, block.get_data_type(2)); + + // Test get_data_types + auto types = block.get_data_types(); + EXPECT_EQ(3, types.size()); + for (const auto& t : types) { + EXPECT_EQ(type, t); + } + + // Test replace_by_position + auto new_col = vectorized::ColumnVector::create(); + block.replace_by_position(0, new_col->get_ptr()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + + // Test replace_by_position with rvalue + auto another_col = vectorized::ColumnVector::create(); + block.replace_by_position(1, another_col->get_ptr()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + + // Test replace_by_position_if_const + auto const_col = vectorized::ColumnVector::create(); + const_col->insert_value(1); + auto const_column = vectorized::ColumnConst::create(const_col->get_ptr(), 1); + block.replace_by_position(2, const_column->get_ptr()); + + // Verify it's const column before replacement + EXPECT_NE(nullptr, typeid_cast( + block.get_by_position(2).column.get())); + + // Replace const column with full column + block.replace_by_position_if_const(2); + + // Verify it's no longer const column after replacement + EXPECT_EQ(nullptr, typeid_cast( + block.get_by_position(2).column.get())); + + // Test iterator functionality + size_t count = 0; + for (const auto& col : block) { + EXPECT_EQ(type, col.type); + count++; + } + EXPECT_EQ(3, count); + + // Test const iterator functionality + const auto& const_block = block; + count = 0; + for (const auto& col : const_block) { + EXPECT_EQ(type, col.type); + count++; + } + EXPECT_EQ(3, count); + + // Test get_columns_with_type_and_name + const auto& columns = block.get_columns_with_type_and_name(); + EXPECT_EQ(3, columns.size()); + EXPECT_EQ("col1", columns[0].name); + EXPECT_EQ("col2", columns[1].name); + EXPECT_EQ("col3", columns[2].name); + + // Test sort_columns + { + vectorized::Block unsorted_block; + auto type = std::make_shared(); + + // Insert columns in random order + { + auto col_c = vectorized::ColumnVector::create(); + unsorted_block.insert({std::move(col_c), type, "c"}); + } + { + auto col_a = vectorized::ColumnVector::create(); + unsorted_block.insert({std::move(col_a), type, "a"}); + } + { + auto col_b = vectorized::ColumnVector::create(); + unsorted_block.insert({std::move(col_b), type, "b"}); + } + + // Verify original order + auto original_names = unsorted_block.get_names(); + EXPECT_EQ("c", original_names[0]); + EXPECT_EQ("a", original_names[1]); + EXPECT_EQ("b", original_names[2]); + + // Sort columns and verify + auto sorted_block = unsorted_block.sort_columns(); + auto sorted_names = sorted_block.get_names(); + + // Verify alphabetical order + EXPECT_EQ("c", sorted_names[0]); + EXPECT_EQ("b", sorted_names[1]); + EXPECT_EQ("a", sorted_names[2]); + + // Verify original block remains unchanged + original_names = unsorted_block.get_names(); + EXPECT_EQ("c", original_names[0]); + EXPECT_EQ("a", original_names[1]); + EXPECT_EQ("b", original_names[2]); + + // Verify column count remains the same + EXPECT_EQ(unsorted_block.columns(), sorted_block.columns()); + + // Verify column types are preserved + EXPECT_EQ(type, sorted_block.get_data_type(0)); + EXPECT_EQ(type, sorted_block.get_data_type(1)); + EXPECT_EQ(type, sorted_block.get_data_type(2)); + } +} + +TEST(BlockTest, RowOperations) { + vectorized::Block block; + + // Test empty block + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.columns()); + EXPECT_TRUE(block.empty()); + EXPECT_TRUE(block.is_empty_column()); + + // Add columns with data + auto col1 = vectorized::ColumnVector::create(); + auto col2 = vectorized::ColumnString::create(); + vectorized::DataTypePtr type1(std::make_shared()); + vectorized::DataTypePtr type2(std::make_shared()); + + for (int i = 0; i < 100; ++i) { + col1->insert_value(i); + col2->insert_data(std::to_string(i).c_str(), std::to_string(i).length()); + } + + block.insert({col1->get_ptr(), type1, "col1"}); + block.insert({col2->get_ptr(), type2, "col2"}); + + // Test basic properties + EXPECT_EQ(100, block.rows()); + EXPECT_EQ(2, block.columns()); + EXPECT_FALSE(block.empty()); + EXPECT_FALSE(block.is_empty_column()); + + // Test row operations + block.set_num_rows(50); // LIMIT + EXPECT_EQ(50, block.rows()); + + int64_t offset = 20; + block.skip_num_rows(offset); // OFFSET + EXPECT_EQ(30, block.rows()); +} + +// 内存和大小相关测试 +TEST(BlockTest, MemoryAndSize) { + vectorized::Block block; + + // Test empty block (no columns) + EXPECT_EQ(0, block.bytes()); + EXPECT_EQ(0, block.allocated_bytes()); + EXPECT_EQ("column bytes: []", block.columns_bytes()); + + // Add first column (Int32) + auto col1 = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type1(std::make_shared()); + for (int i = 0; i < 1000; ++i) { + col1->insert_value(i); + } + block.insert({col1->get_ptr(), type1, "col1"}); + + // Test with valid column + size_t bytes_one_col = block.bytes(); + size_t allocated_bytes_one_col = block.allocated_bytes(); + EXPECT_GT(bytes_one_col, 0); + EXPECT_GT(allocated_bytes_one_col, 0); + EXPECT_GE(allocated_bytes_one_col, bytes_one_col); + + // Test with nullptr column (should throw exception) + vectorized::Block block_with_null; + block_with_null.insert({nullptr, type1, "null_col"}); + + // bytes() should throw exception when there is a nullptr column + EXPECT_THROW(block_with_null.bytes(), Exception); + + // columns_bytes() should throw exception when there is a nullptr column + EXPECT_THROW(block_with_null.columns_bytes(), Exception); + + // allocated_bytes() should return 0 when there is a nullptr column + EXPECT_EQ(0, block_with_null.allocated_bytes()); + + // Add second valid column (String) + auto col2 = vectorized::ColumnString::create(); + vectorized::DataTypePtr type2(std::make_shared()); + for (int i = 0; i < 1000; ++i) { + std::string val = "test" + std::to_string(i); + col2->insert_data(val.c_str(), val.length()); + } + block.insert({col2->get_ptr(), type2, "col2"}); + + // Test with two valid columns + size_t bytes_two_cols = block.bytes(); + EXPECT_GT(bytes_two_cols, bytes_one_col); + + // Test after erasing first column + block.erase(0); + EXPECT_EQ(block.bytes(), col2->byte_size()); + + // Test after clearing all columns + block.clear(); + EXPECT_EQ(0, block.bytes()); + EXPECT_EQ(0, block.allocated_bytes()); + EXPECT_EQ("column bytes: []", block.columns_bytes()); + + // Test with multiple nullptr columns + vectorized::Block multi_null_block; + multi_null_block.insert({nullptr, type1, "null_col1"}); + multi_null_block.insert({nullptr, type2, "null_col2"}); + EXPECT_THROW(multi_null_block.bytes(), Exception); +} + +TEST(BlockTest, DumpMethods) { + vectorized::Block block; + + // Test empty block + EXPECT_EQ("", block.dump_names()); + EXPECT_EQ("", block.dump_types()); + EXPECT_TRUE(block.dump_structure().empty()); + + // Add first column (Int32) + auto col1 = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type1(std::make_shared()); + col1->insert_value(123); + col1->insert_value(456); + block.insert({col1->get_ptr(), type1, "col1"}); + + // Test single column + EXPECT_EQ("col1", block.dump_names()); + EXPECT_EQ("Int32", block.dump_types()); + + // Add second column (String) + auto col2 = vectorized::ColumnString::create(); + vectorized::DataTypePtr type2(std::make_shared()); + col2->insert_data("hello", 5); + col2->insert_data("world", 5); + block.insert({col2->get_ptr(), type2, "col2"}); + + // Test multiple columns + EXPECT_EQ("col1, col2", block.dump_names()); + EXPECT_EQ("Int32, String", block.dump_types()); + + // Test dump_data with different parameters + { + // Default parameters + std::string data = block.dump_data(); + EXPECT_FALSE(data.empty()); + EXPECT_TRUE(data.find("col1(Int32)") != std::string::npos); + EXPECT_TRUE(data.find("col2(String)") != std::string::npos); + EXPECT_TRUE(data.find("123") != std::string::npos); + EXPECT_TRUE(data.find("hello") != std::string::npos); + } + + { + // Test with begin offset + std::string data = block.dump_data(1); + EXPECT_TRUE(data.find("456") != std::string::npos); + EXPECT_TRUE(data.find("world") != std::string::npos); + EXPECT_FALSE(data.find("123") != std::string::npos); + } + + { + // Test with row limit + std::string data = block.dump_data(0, 1); + LOG(INFO) << "dump_data with limit:\n" << data; + EXPECT_TRUE(data.find("123") != std::string::npos); + EXPECT_FALSE(data.find("456") != std::string::npos); + } + + // Test dump_one_line + { + std::string line = block.dump_one_line(0, 2); + EXPECT_EQ("123 hello", line); + + line = block.dump_one_line(1, 2); + EXPECT_EQ("456 world", line); + + line = block.dump_one_line(0, 1); + EXPECT_EQ("123", line); + } + + // Test dump_structure + { + std::string structure = block.dump_structure(); + LOG(INFO) << "Structure:\n" << structure; + EXPECT_TRUE(structure.find("col1") != std::string::npos); + EXPECT_TRUE(structure.find("Int32") != std::string::npos); + EXPECT_TRUE(structure.find("col2") != std::string::npos); + EXPECT_TRUE(structure.find("String") != std::string::npos); + } + + // Test with nullable column + auto nullable_type = std::make_shared(type1); + auto null_map = vectorized::ColumnUInt8::create(); + auto nested_col = col1->clone(); + auto nullable_col = vectorized::ColumnNullable::create(nested_col->get_ptr(), null_map->get_ptr()); + block.insert({nullable_col->get_ptr(), nullable_type, "nullable_col"}); + + { + std::string data = block.dump_data(0, 100, true); + LOG(INFO) << "dump_data with nullable:\n" << data; + EXPECT_TRUE(data.find("nullable_col") != std::string::npos); + EXPECT_TRUE(data.find("Nullable(Int32)") != std::string::npos); + } + + // Test dump_column static method + { + // Test Int32 column + std::string int_dump = vectorized::Block::dump_column(col1->get_ptr(), type1); + EXPECT_FALSE(int_dump.empty()); + EXPECT_TRUE(int_dump.find("123") != std::string::npos); + EXPECT_TRUE(int_dump.find("456") != std::string::npos); + + // Test String column + std::string str_dump = vectorized::Block::dump_column(col2->get_ptr(), type2); + LOG(INFO) << "String column dump:\n" << str_dump; + EXPECT_FALSE(str_dump.empty()); + EXPECT_TRUE(str_dump.find("hello") != std::string::npos); + EXPECT_TRUE(str_dump.find("world") != std::string::npos); + + // Test Nullable column + std::string nullable_dump = vectorized::Block::dump_column(nullable_col->get_ptr(), nullable_type); + LOG(INFO) << "Nullable column dump:\n" << nullable_dump; + EXPECT_FALSE(nullable_dump.empty()); + EXPECT_FALSE(nullable_dump.find("123") != std::string::npos); + + // Test empty column + auto empty_col = vectorized::ColumnVector::create(); + auto empty_dump = vectorized::Block::dump_column(empty_col->get_ptr(), type1); + LOG(INFO) << "Empty column dump:\n" << empty_dump; + EXPECT_FALSE(empty_dump.empty()); // Should still return formatted empty table + } +} + +// 克隆和列操作测试 +TEST(BlockTest, CloneOperations) { + vectorized::Block block; + auto col1 = vectorized::ColumnVector::create(); + auto col2 = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type(std::make_shared()); + + col1->insert_value(1); + col2->insert_value(2); + + block.insert({col1->get_ptr(), type, "col1"}); + block.insert({col2->get_ptr(), type, "col2"}); + + // Test clone_empty + auto empty_block = block.clone_empty(); + EXPECT_EQ(block.columns(), empty_block.columns()); + EXPECT_EQ(0, empty_block.rows()); + + // Test get_columns and get_columns_and_convert + auto columns = block.get_columns(); + auto converted_columns = block.get_columns_and_convert(); + EXPECT_EQ(2, columns.size()); + EXPECT_EQ(2, converted_columns.size()); + + // Test clone_empty_columns + auto empty_columns = block.clone_empty_columns(); + EXPECT_EQ(2, empty_columns.size()); + EXPECT_EQ(0, empty_columns[0]->size()); + EXPECT_EQ(0, empty_columns[1]->size()); + + // Test mutate_columns + auto mutable_cols = block.mutate_columns(); + EXPECT_EQ(2, mutable_cols.size()); + + // Test set_columns with const columns + vectorized::Block new_block = block.clone_empty(); + new_block.set_columns(columns); + EXPECT_EQ(block.rows(), new_block.rows()); + EXPECT_EQ(block.columns(), new_block.columns()); + EXPECT_EQ("col1", new_block.get_by_position(0).name); + EXPECT_EQ("col2", new_block.get_by_position(1).name); + EXPECT_EQ(type, new_block.get_by_position(0).type); + EXPECT_EQ(type, new_block.get_by_position(1).type); + EXPECT_EQ(1, assert_cast*>( + new_block.get_by_position(0).column.get())->get_data()[0]); + EXPECT_EQ(2, assert_cast*>( + new_block.get_by_position(1).column.get())->get_data()[0]); + + // Test clone_with_columns + auto cloned_with_cols = block.clone_with_columns(columns); + EXPECT_EQ(block.rows(), cloned_with_cols.rows()); + EXPECT_EQ(block.columns(), cloned_with_cols.columns()); + EXPECT_EQ("col1", cloned_with_cols.get_by_position(0).name); + EXPECT_EQ("col2", cloned_with_cols.get_by_position(1).name); + EXPECT_EQ(type, cloned_with_cols.get_by_position(0).type); + EXPECT_EQ(type, cloned_with_cols.get_by_position(1).type); + EXPECT_EQ(1, assert_cast*>( + cloned_with_cols.get_by_position(0).column.get())->get_data()[0]); + EXPECT_EQ(2, assert_cast*>( + cloned_with_cols.get_by_position(1).column.get())->get_data()[0]); + + // Test clone_without_columns + std::vector column_offset = {0}; + auto partial_block = block.clone_without_columns(&column_offset); + EXPECT_EQ(1, partial_block.columns()); + EXPECT_EQ("col1", partial_block.get_by_position(0).name); + EXPECT_EQ(nullptr, partial_block.get_by_position(0).column.get()); + + // Test set_columns with mutable columns + { + auto mutable_columns = block.clone_empty_columns(); + auto* tmp_col0 = assert_cast*>(mutable_columns[0].get()); + auto* tmp_col1 = assert_cast*>(mutable_columns[1].get()); + tmp_col0->insert_value(3); + tmp_col1->insert_value(4); + block.set_columns(std::move(mutable_columns)); + EXPECT_EQ(1, block.rows()); + EXPECT_EQ(3, assert_cast*>( + block.get_by_position(0).column.get())->get_data()[0]); + EXPECT_EQ(4, assert_cast*>( + block.get_by_position(1).column.get())->get_data()[0]); + } + // Test clone_with_columns with mutable columns + { + auto new_mutable_columns = block.clone_empty_columns(); + auto* tmp_col0 = assert_cast*>(new_mutable_columns[0].get()); + auto* tmp_col1 = assert_cast*>(new_mutable_columns[1].get()); + tmp_col0->insert_value(5); + tmp_col1->insert_value(6); + auto cloned_with_mutable = block.clone_with_columns(std::move(new_mutable_columns)); + EXPECT_EQ(1, cloned_with_mutable.rows()); + EXPECT_EQ(5, assert_cast*>( + cloned_with_mutable.get_by_position(0).column.get())->get_data()[0]); + EXPECT_EQ(6, assert_cast*>( + cloned_with_mutable.get_by_position(1).column.get())->get_data()[0]); + } + + // Test copy_block + { + // Test copying single column + std::vector single_column = {0}; + auto single_copy = block.copy_block(single_column); + EXPECT_EQ(1, single_copy.columns()); + EXPECT_EQ("col1", single_copy.get_by_position(0).name); + EXPECT_EQ(type, single_copy.get_by_position(0).type); + EXPECT_EQ(3, assert_cast*>( + single_copy.get_by_position(0).column.get())->get_data()[0]); + + // Test copying multiple columns + std::vector multiple_columns = {0, 1}; + auto multi_copy = block.copy_block(multiple_columns); + EXPECT_EQ(2, multi_copy.columns()); + EXPECT_EQ("col1", multi_copy.get_by_position(0).name); + EXPECT_EQ("col2", multi_copy.get_by_position(1).name); + EXPECT_EQ(type, multi_copy.get_by_position(0).type); + EXPECT_EQ(type, multi_copy.get_by_position(1).type); + EXPECT_EQ(3, assert_cast*>( + multi_copy.get_by_position(0).column.get())->get_data()[0]); + EXPECT_EQ(4, assert_cast*>( + multi_copy.get_by_position(1).column.get())->get_data()[0]); + + // Test copying columns in different order + std::vector reordered_columns = {1, 0}; + auto reordered_copy = block.copy_block(reordered_columns); + EXPECT_EQ(2, reordered_copy.columns()); + EXPECT_EQ("col2", reordered_copy.get_by_position(0).name); + EXPECT_EQ("col1", reordered_copy.get_by_position(1).name); + EXPECT_EQ(4, assert_cast*>( + reordered_copy.get_by_position(0).column.get())->get_data()[0]); + EXPECT_EQ(3, assert_cast*>( + reordered_copy.get_by_position(1).column.get())->get_data()[0]); + + // Test copying same column multiple times + std::vector duplicate_columns = {0, 0}; + auto duplicate_copy = block.copy_block(duplicate_columns); + EXPECT_EQ(2, duplicate_copy.columns()); + EXPECT_EQ("col1", duplicate_copy.get_by_position(0).name); + EXPECT_EQ("col1", duplicate_copy.get_by_position(1).name); + EXPECT_EQ(3, assert_cast*>( + duplicate_copy.get_by_position(0).column.get())->get_data()[0]); + EXPECT_EQ(3, assert_cast*>( + duplicate_copy.get_by_position(1).column.get())->get_data()[0]); + } +} + +TEST(BlockTest, FilterAndSelector) { + auto create_test_block = [](int size) { + vectorized::Block test_block; + auto test_col1 = vectorized::ColumnVector::create(); + auto test_col2 = vectorized::ColumnVector::create(); + auto type = std::make_shared(); + + for (int i = 0; i < size; ++i) { + test_col1->insert_value(i); + test_col2->insert_value(i * 2); + } + + test_block.insert({test_col1->get_ptr(), type, "col1"}); + test_block.insert({test_col2->get_ptr(), type, "col2"}); + return test_block; + }; + + // Create original block + auto block = create_test_block(10); + + // Test filter_block_internal with filter only + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); // Initialize with all 1s (keep all rows) + filter[0] = 0; // Filter out first row + filter[5] = 0; // Filter out sixth row + + vectorized::Block::filter_block_internal(&test_block, filter); + EXPECT_EQ(8, test_block.rows()); + + // Verify filtered data for both columns + const auto* filtered_col1 = assert_cast*>( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast*>( + test_block.get_by_position(1).column.get()); + + // Expected values after filtering + std::vector expected_col1 = {1,2,3,4,6,7,8,9}; + std::vector expected_col2 = {2,4,6,8,12,14,16,18}; + + for (size_t i = 0; i < expected_col1.size(); ++i) { + EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); + EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + } + } + + // Test filter_block_internal with specific columns + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); + filter[0] = 0; + std::vector columns_to_filter = {0}; // Only filter first column + + vectorized::Block::filter_block_internal(&test_block, columns_to_filter, filter); + EXPECT_EQ(9, test_block.rows()); + + const auto* filtered_col1 = assert_cast*>( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast*>( + test_block.get_by_position(1).column.get()); + EXPECT_EQ(1, filtered_col1->get_data()[0]); // First column filtered + EXPECT_EQ(0, filtered_col2->get_data()[0]); // Second column unchanged + } + + // Test filter_block_internal with column_to_keep + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); + filter[0] = 0; // Filter out first row + filter[5] = 0; // Filter out sixth row + uint32_t column_to_keep = 1; // Only filter first column, keep the rest columns + + vectorized::Block::filter_block_internal(&test_block, filter, column_to_keep); + + // Verify row count after filtering + EXPECT_EQ(8, test_block.rows()); + EXPECT_EQ(2, test_block.columns()); + + // Verify filtered data for both columns + const auto* filtered_col1 = assert_cast*>( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast*>( + test_block.get_by_position(1).column.get()); + + // Expected values after filtering + std::vector expected_col1 = {1,2,3,4,6,7,8,9}; + std::vector expected_col2 = {0,2,4,6,8,10,12,14,16,18}; + + // Verify each value in filtered columns + for (size_t i = 0; i < expected_col1.size(); ++i) { + EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); + } + for (size_t i = 0; i < expected_col2.size(); ++i) { + EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + } + } + + // Test filter_block with nullable filter column + { + auto test_block = create_test_block(10); + + // Create nullable filter column + auto nullable_filter = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(10, 1), // all true + vectorized::ColumnVector::create(10, 0) // no nulls + ); + auto filter_type = std::make_shared( + std::make_shared() + ); + + // Add filter column to block + test_block.insert({nullable_filter->get_ptr(), filter_type, "filter"}); + + // Test four-parameter version + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(10, test_block.rows()); // All rows kept + + // Test three-parameter version + auto test_block2 = create_test_block(10); + test_block2.insert({nullable_filter->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(10, test_block2.rows()); // All rows kept + } + + // Test filter_block with const filter column + { + auto test_block = create_test_block(10); + + // Create const filter column (false) + auto const_filter = vectorized::ColumnConst::create( + vectorized::ColumnVector::create(1, 0), // false + 10 + ); + auto filter_type = std::make_shared(); + + // Add filter column to block + test_block.insert({const_filter->get_ptr(), filter_type, "filter"}); + + // Test four-parameter version + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(0, test_block.rows()); // All rows filtered out + + // Test three-parameter version + auto test_block2 = create_test_block(10); + test_block2.insert({const_filter->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(0, test_block2.rows()); // All rows filtered out + } + + // Test filter_block with regular filter column + { + auto test_block = create_test_block(10); + + // Create regular filter column + auto filter_column = vectorized::ColumnVector::create(); + for (size_t i = 0; i < 10; ++i) { + filter_column->insert_value(i % 2); // Keep odd-indexed rows + } + auto filter_type = std::make_shared(); + + // Add filter column to block + test_block.insert({filter_column->get_ptr(), filter_type, "filter"}); + + // Test four-parameter version + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(5, test_block.rows()); // Half rows kept + + // Verify filtered data + const auto* filtered_col1 = assert_cast*>( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast*>( + test_block.get_by_position(1).column.get()); + + std::vector expected_col1 = {1,3,5,7,9}; + std::vector expected_col2 = {2,6,10,14,18}; + + for (size_t i = 0; i < expected_col1.size(); ++i) { + EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); + EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + } + + // Test three-parameter version + auto test_block2 = create_test_block(10); + test_block2.insert({filter_column->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(5, test_block2.rows()); // Half rows kept + + // Verify filtered data + filtered_col1 = assert_cast*>( + test_block2.get_by_position(0).column.get()); + filtered_col2 = assert_cast*>( + test_block2.get_by_position(1).column.get()); + + for (size_t i = 0; i < expected_col1.size(); ++i) { + EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); + EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + } + } + + // Test append_to_block_by_selector + { + // Create destination block with proper columns + auto type = std::make_shared(); + vectorized::Block dst_block; + dst_block.insert({type->create_column(), type, "col1"}); + dst_block.insert({type->create_column(), type, "col2"}); + vectorized::MutableBlock dst(&dst_block); + + // Create selector to select every other row + vectorized::IColumn::Selector selector(5, 0); + for (size_t i = 0; i < 5; ++i) { + selector[i] = i * 2; // Select rows 0,2,4,6,8 + } + + // Perform selection + EXPECT_TRUE(block.append_to_block_by_selector(&dst, selector).ok()); + EXPECT_EQ(5, dst.rows()); + + // Verify selected data + const vectorized::Block& result_block = dst.to_block(); + + const auto* selected_col1 = assert_cast*>( + result_block.get_by_position(0).column.get()); + const auto* selected_col2 = assert_cast*>( + result_block.get_by_position(1).column.get()); + + // Expected values after selection + std::vector expected_col1 = {0,2,4,6,8}; + std::vector expected_col2 = {0,4,8,12,16}; + + for (size_t i = 0; i < expected_col1.size(); ++i) { + EXPECT_EQ(expected_col1[i], selected_col1->get_data()[i]); + EXPECT_EQ(expected_col2[i], selected_col2->get_data()[i]); + } + } +} +TEST(BlockTest, RowCheck) { + vectorized::Block block; + auto type = std::make_shared(); + + // Add columns with same number of rows + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "col1"}); + } + + { + auto col2 = vectorized::ColumnVector::create(); + block.insert({std::move(col2), type, "col2"}); + } + + // Test row number check + EXPECT_THROW(block.check_number_of_rows(), Exception); + + // Test clear operations + block.clear_column_data(1); // Clear first column and delete the rest columns + EXPECT_EQ(1, block.columns()); + + + block.clear(); + EXPECT_EQ(0, block.columns()); + + // Test swap operations + vectorized::Block other_block; + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + other_block.insert({std::move(col1), type, "col1"}); + } + + block.swap(other_block); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, other_block.columns()); +} + +TEST(BlockTest, ClearColumnData) { + auto type = std::make_shared(); + + // Test case 1: Clear with column_size == -1 (clear all data but keep columns) + { + vectorized::Block block; + + // Insert two columns with data + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + col1->insert_value(2); + block.insert({std::move(col1), type, "col1"}); + } + { + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(3); + col2->insert_value(4); + block.insert({std::move(col2), type, "col2"}); + } + + EXPECT_EQ(2, block.rows()); + EXPECT_EQ(2, block.columns()); + + // Clear data with column_size = -1 + block.clear_column_data(-1); + + // Verify columns are kept but data is cleared + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + } + + // Test case 2: Clear with specific column_size (remove extra columns) + { + vectorized::Block block; + + // Insert three columns + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "col1"}); + } + { + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(2); + block.insert({std::move(col2), type, "col2"}); + } + { + auto col3 = vectorized::ColumnVector::create(); + col3->insert_value(3); + block.insert({std::move(col3), type, "col3"}); + } + + EXPECT_EQ(3, block.columns()); + + // Clear data and keep only 2 columns + block.clear_column_data(2); + + // Verify extra columns are removed and remaining data is cleared + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + } + + // Test case 3: Clear with column_size larger than actual size + { + vectorized::Block block; + + // Insert one column + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "col1"}); + } + + EXPECT_EQ(1, block.columns()); + + // Clear data with column_size > actual size + block.clear_column_data(2); + + // Verify column is kept but data is cleared + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + } + + // Test case 4: Clear empty block + { + vectorized::Block block; + EXPECT_EQ(0, block.columns()); + + // Should not crash + block.clear_column_data(-1); + block.clear_column_data(0); + block.clear_column_data(1); + + EXPECT_EQ(0, block.columns()); + } + + // Test case 5: Verify row_same_bit is cleared + { + vectorized::Block block; + + // Insert column with data + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "col1"}); + } + + // Set some row_same_bit data (if possible) + // Note: This might need adjustment based on how row_same_bit is actually used + + block.clear_column_data(-1); + + // Verify everything is cleared + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(1, block.columns()); + // Could add verification for row_same_bit if there's a way to check it + } +} + +TEST(BlockTest, IndexByName) { + vectorized::Block block; + auto col = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type(std::make_shared()); + + // Add columns with duplicate names + block.insert({col->get_ptr(), type, "col1"}); + block.insert({col->get_ptr(), type, "col2"}); + block.insert({col->get_ptr(), type, "col1"}); // Duplicate name + + // Test get_position_by_name returns first occurrence + EXPECT_EQ(0, block.get_position_by_name("col1")); + EXPECT_EQ(1, block.get_position_by_name("col2")); + + // Initialize index + block.initialize_index_by_name(); + + // Test get_position_by_name returns last occurrence + EXPECT_EQ(2, block.get_position_by_name("col1")); + EXPECT_EQ(1, block.get_position_by_name("col2")); + + // Test has with duplicate names + EXPECT_TRUE(block.has("col1")); + EXPECT_TRUE(block.has("col2")); + EXPECT_FALSE(block.has("col3")); + + // Test get_by_name with duplicate names + EXPECT_EQ(0, block.get_by_name("col1").column->size()); + EXPECT_THROW(block.get_by_name("col3"), Exception); + + // Test try_get_by_name with duplicate names + EXPECT_NE(nullptr, block.try_get_by_name("col1")); + EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); + + // Test after modifying block structure + block.erase(2); // Remove last "col1" + block.initialize_index_by_name(); // Re-initialize index + + // Now the first "col1" should be found + EXPECT_EQ(0, block.get_position_by_name("col1")); + + // Test with empty block + block.clear(); + block.initialize_index_by_name(); + EXPECT_FALSE(block.has("col1")); + EXPECT_THROW(block.get_position_by_name("col1"), Exception); +} + +TEST(BlockTest, ReplaceIfOverflow) { + vectorized::Block block; + auto col = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type(std::make_shared()); + + // Add some data to the column + auto& data = col->get_data(); + for (int i = 0; i < 100; ++i) { + data.push_back(i); + } + + block.insert({col->get_ptr(), type, "col1"}); + + // Test replace_if_overflow + block.replace_if_overflow(); + + // Verify column is still intact + EXPECT_EQ(100, block.get_by_position(0).column->size()); +} + +TEST(BlockTest, ColumnTransformations) { + vectorized::Block block; + auto type = std::make_shared(); + + // Insert columns with unique data + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "col1"}); + } + { + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(2); + block.insert({std::move(col2), type, "col2"}); + } + + // Verify initial order + EXPECT_EQ("col1", block.get_by_position(0).name); + EXPECT_EQ("col2", block.get_by_position(1).name); + + // Test shuffle_columns + std::vector positions = {1, 0}; // change the order of columns + block.shuffle_columns(positions); + + // Verify shuffled order + EXPECT_EQ("col2", block.get_by_position(0).name); // col2 is now in the first position + EXPECT_EQ("col1", block.get_by_position(1).name); // col1 is now in the second position + + // Verify column data is also correctly shuffled + const auto* col1 = assert_cast*>( + block.get_by_position(1).column.get()); // col1 is now in position 1 + const auto* col2 = assert_cast*>( + block.get_by_position(0).column.get()); // col2 is now in position 0 + + EXPECT_EQ(1, col1->get_data()[0]); // the value of col1 should be 1 + EXPECT_EQ(2, col2->get_data()[0]); // the value of col2 should be 2 +} + +TEST(BlockTest, HashUpdate) { + // Test case 1: Single column with single value + { + vectorized::Block block; + auto col = vectorized::ColumnVector::create(); + col->insert_value(42); + auto type = std::make_shared(); + block.insert({std::move(col), type, "col1"}); + + SipHash hash1; + block.update_hash(hash1); + uint64_t hash1_value = hash1.get64(); + + // Same data should produce same hash + SipHash hash2; + block.update_hash(hash2); + EXPECT_EQ(hash1_value, hash2.get64()); + } + + // Test case 2: Multiple columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // First column + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "col1"}); + } + + // Second column + { + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(2); + block.insert({std::move(col2), type, "col2"}); + } + + SipHash hash1; + block.update_hash(hash1); + uint64_t hash1_value = hash1.get64(); + + // Different order of same values should produce different hash + vectorized::Block block2; + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(2); + block2.insert({std::move(col1), type, "col1"}); + } + { + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(1); + block2.insert({std::move(col2), type, "col2"}); + } + + SipHash hash2; + block2.update_hash(hash2); + EXPECT_NE(hash1_value, hash2.get64()); + } + + // Test case 3: Multiple rows + { + vectorized::Block block; + auto col = vectorized::ColumnVector::create(); + for (int i = 0; i < 5; ++i) { + col->insert_value(i); + } + auto type = std::make_shared(); + block.insert({std::move(col), type, "col1"}); + + SipHash hash1; + block.update_hash(hash1); + uint64_t hash1_value = hash1.get64(); + + // Different order of same values should produce different hash + auto col2 = vectorized::ColumnVector::create(); + for (int i = 4; i >= 0; --i) { + col2->insert_value(i); + } + vectorized::Block block2; + block2.insert({std::move(col2), type, "col1"}); + + SipHash hash2; + block2.update_hash(hash2); + EXPECT_NE(hash1_value, hash2.get64()); + } + + // Test case 4: Empty block + { + vectorized::Block empty_block; + SipHash hash; + empty_block.update_hash(hash); + // Should not crash + } + + // Test case 5: Nullable column + { + vectorized::Block block; + auto col = vectorized::ColumnVector::create(); + col->insert_value(1); + auto nullable_col = vectorized::make_nullable(std::move(col)); + auto type = vectorized::make_nullable(std::make_shared()); + block.insert({std::move(nullable_col), type, "nullable_col"}); + + SipHash hash1; + block.update_hash(hash1); + uint64_t hash1_value = hash1.get64(); + + // Same nullable column should produce same hash + SipHash hash2; + block.update_hash(hash2); + EXPECT_EQ(hash1_value, hash2.get64()); + } +} + +TEST(BlockTest, BlockOperations) { + // Test erase_useless_column + { + vectorized::Block block; + auto type = std::make_shared(); + + // Insert three columns + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "col1"}); + } + { + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(2); + block.insert({std::move(col2), type, "col2"}); + } + { + auto col3 = vectorized::ColumnVector::create(); + col3->insert_value(3); + block.insert({std::move(col3), type, "col3"}); + } + + EXPECT_EQ(3, block.columns()); + vectorized::Block::erase_useless_column(&block, 2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ("col1", block.get_by_position(0).name); + EXPECT_EQ("col2", block.get_by_position(1).name); + } + + // Test create_same_struct_block + { + vectorized::Block original_block; + auto type = std::make_shared(); + + // Create original block with data + { + auto col = vectorized::ColumnVector::create(); + col->insert_value(1); + original_block.insert({std::move(col), type, "col1"}); + } + + // Test case 1: with default values (is_reserve = false) + { + auto new_block = original_block.create_same_struct_block(5, false); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(5, new_block->rows()); // Should have 5 default values + EXPECT_EQ("col1", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); + + // Verify default values are inserted + const auto* col = assert_cast*>( + new_block->get_by_position(0).column.get()); + for (size_t i = 0; i < 5; ++i) { + EXPECT_EQ(0, col->get_data()[i]); // Default value for Int32 is 0 + } + } + + // Test case 2: with reserved space (is_reserve = true) + { + auto new_block = original_block.create_same_struct_block(5, true); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space + EXPECT_EQ("col1", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); + } + } + + // Test compare_at methods + { + vectorized::Block block1; + vectorized::Block block2; + auto type = std::make_shared(); + + // Prepare two blocks with test data + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + col1->insert_value(2); + block1.insert({std::move(col1), type, "col1"}); + + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(3); + col2->insert_value(4); + block1.insert({std::move(col2), type, "col2"}); + } + + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + col1->insert_value(3); + block2.insert({std::move(col1), type, "col1"}); + + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(3); + col2->insert_value(4); + block2.insert({std::move(col2), type, "col2"}); + } + + // Test basic compare_at + EXPECT_EQ(0, block1.compare_at(0, 0, block2, 1)); // First rows are equal + EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // 1 < 3 + + // Test compare_at with num_columns + EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 1)); // Compare only first column + + // Test compare_at with specific columns + std::vector compare_cols = {1}; // Compare only second column + EXPECT_EQ(0, block1.compare_at(0, 0, &compare_cols, block2, 1)); + + // Test compare_column_at + EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 1)); // Compare first column + EXPECT_LT(block1.compare_column_at(0, 1, 0, block2, 1), 0); // 1 < 3 + } + + // Test same_bit operations + { + vectorized::Block block; + auto type = std::make_shared(); + + // Create block with data + auto col = vectorized::ColumnVector::create(); + for (int i = 0; i < 3; ++i) { + col->insert_value(i); + } + block.insert({std::move(col), type, "col1"}); + + // Test set_same_bit + std::vector same_bits = {true, false, true}; + block.set_same_bit(same_bits.begin(), same_bits.end()); + + // Test get_same_bit + EXPECT_TRUE(block.get_same_bit(0)); + EXPECT_FALSE(block.get_same_bit(1)); + EXPECT_TRUE(block.get_same_bit(2)); + EXPECT_FALSE(block.get_same_bit(3)); // Out of range + + // Test clear_same_bit + block.clear_same_bit(); + EXPECT_FALSE(block.get_same_bit(0)); // After clear, all bits should be false + } + + // Test erase_tmp_columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Add regular column + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "normal_col"}); + } + + // Add temporary column with correct prefix + { + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(2); + block.insert({std::move(col2), type, + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col"}); + } + + // Add another temporary column + { + auto col3 = vectorized::ColumnVector::create(); + col3->insert_value(3); + block.insert({std::move(col3), type, + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col"}); + } + + EXPECT_EQ(3, block.columns()); + block.erase_tmp_columns(); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ("normal_col", block.get_by_position(0).name); + + // Verify temporary columns are removed + EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col")); + EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col")); + } + + // Test clear_column_mem_not_keep + { + vectorized::Block block; + auto type = std::make_shared(); + + // Add three columns + for (int i = 0; i < 3; ++i) { + auto col = vectorized::ColumnVector::create(); + col->insert_value(i); + block.insert({std::move(col), type, "col" + std::to_string(i)}); + } + + std::vector keep_flags = {true, false, true}; + block.clear_column_mem_not_keep(keep_flags, true); + + // Verify columns are kept but data is cleared for non-kept columns + EXPECT_EQ(3, block.columns()); + EXPECT_EQ(1, block.get_by_position(0).column->size()); // Kept + EXPECT_EQ(0, block.get_by_position(1).column->size()); // Cleared + EXPECT_EQ(1, block.get_by_position(2).column->size()); // Kept + } +} + +TEST(BlockTest, StringAndCompressionOperations) { + using namespace std::string_literals; + // Test shrink_char_type_column_suffix_zero + { + vectorized::Block block; + + // Add a string column with padding zeros + { + auto col = vectorized::ColumnString::create(); + // Add string with trailing zeros + std::string str1 = "hello\0\0\0"s; // 8bytes, contains 3 trailing zeros + std::string str2 = "world\0\0"s; // 7bytes, contains 2 trailing zeros + col->insert_data(str1.c_str(), str1.size()); + col->insert_data(str2.c_str(), str2.size()); + + auto type = std::make_shared(); + block.insert({std::move(col), type, "str_col"}); + } + + // Add a non-string column + { + auto col = vectorized::ColumnVector::create(); + col->insert_value(1); + col->insert_value(2); + auto type = std::make_shared(); + block.insert({std::move(col), type, "int_col"}); + } + + // Test shrinking string column + std::vector char_type_idx = {0}; // Index of string column + block.shrink_char_type_column_suffix_zero(char_type_idx); + + // Verify string column is shrunk + const auto* str_col = assert_cast( + block.get_by_position(0).column.get()); + + // Verify first string + StringRef ref1 = str_col->get_data_at(0); + EXPECT_EQ(5, ref1.size); // "hello" without zeros + EXPECT_EQ(0, memcmp(ref1.data, "hello", 5)); + + // Verify second string + StringRef ref2 = str_col->get_data_at(1); + EXPECT_EQ(5, ref2.size); // "world" without zeros + EXPECT_EQ(0, memcmp(ref2.data, "world", 5)); + + // Verify non-string column remains unchanged + const auto* int_col = assert_cast*>( + block.get_by_position(1).column.get()); + EXPECT_EQ(1, int_col->get_data()[0]); + EXPECT_EQ(2, int_col->get_data()[1]); + } + + // Test compression time and bytes tracking + { + vectorized::Block block; + + // Initially all metrics should be zero + EXPECT_EQ(0, block.get_decompress_time()); + EXPECT_EQ(0, block.get_decompressed_bytes()); + EXPECT_EQ(0, block.get_compress_time()); + + // Note: Actual compression/decompression operations would be tested + // in integration tests or specific compression-related test files + // Here we're just verifying the getters work + } + + // Test with Array + { + vectorized::Block block; + + // Create Array column with padding zeros + auto string_type = std::make_shared(); + auto array_type = std::make_shared(string_type); + + // Add two strings with trailing zeros + auto string_col = vectorized::ColumnString::create(); + std::string str1 = "hello\0\0"s; + std::string str2 = "world\0"s; + string_col->insert_data(str1.c_str(), str1.size()); + string_col->insert_data(str2.c_str(), str2.size()); + + // Create array offsets column + auto array_offsets = vectorized::ColumnArray::ColumnOffsets::create(); + array_offsets->get_data().push_back(2); // First array has 2 elements + + // Create array column + auto array_col = vectorized::ColumnArray::create( + std::move(string_col), std::move(array_offsets)); + + // Insert array column into block + block.insert({std::move(array_col), array_type, "array_str_col"}); + + // Shrink array column + std::vector char_type_idx = {0}; + block.shrink_char_type_column_suffix_zero(char_type_idx); + + // Verify strings in array are shrunk + const auto* array_col_result = assert_cast( + block.get_by_position(0).column.get()); + const auto* string_col_result = assert_cast( + array_col_result->get_data_ptr().get()); + + // Verify first string in array + StringRef ref1 = string_col_result->get_data_at(0); + EXPECT_EQ(5, ref1.size); // "hello" without zeros + EXPECT_EQ(0, memcmp(ref1.data, "hello", 5)); + + // Verify second string in array + StringRef ref2 = string_col_result->get_data_at(1); + EXPECT_EQ(5, ref2.size); // "world" without zeros + EXPECT_EQ(0, memcmp(ref2.data, "world", 5)); + } +} + TEST(BlockTest, SerializeAndDeserializeBlock) { serialize_and_deserialize_test(segment_v2::CompressionTypePB::SNAPPY); serialize_and_deserialize_test(segment_v2::CompressionTypePB::LZ4); From 6a045161ccfbff659a6d20362d17f3814d957953 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Sun, 17 Nov 2024 17:05:10 +0800 Subject: [PATCH 03/41] code format --- be/src/vec/core/block.h | 4 +- .../agg_linear_histogram_test.cpp | 5 +- be/test/vec/core/block_test.cpp | 366 +++++++++--------- 3 files changed, 177 insertions(+), 198 deletions(-) diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 697af729771d18..703fa55f3c49be 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -140,7 +140,7 @@ class Block { this->get_by_position(position).column = std::move(res); } - /// Replace column at position with lvalue column pointer + /// Replace column at position with lvalue column pointer void replace_by_position(size_t position, const ColumnPtr& res) { this->get_by_position(position).column = res; } @@ -423,7 +423,7 @@ class Block { int64_t get_decompress_time() const { return _decompress_time_ns; } // Get total bytes after decompression int64_t get_decompressed_bytes() const { return _decompressed_bytes; } - // Get time spent on compression in nanoseconds + // Get time spent on compression in nanoseconds int64_t get_compress_time() const { return _compress_time_ns; } // Set same bit flags for rows in block diff --git a/be/test/vec/aggregate_functions/agg_linear_histogram_test.cpp b/be/test/vec/aggregate_functions/agg_linear_histogram_test.cpp index b13ae5868bb282..7c293ba1ea27c6 100644 --- a/be/test/vec/aggregate_functions/agg_linear_histogram_test.cpp +++ b/be/test/vec/aggregate_functions/agg_linear_histogram_test.cpp @@ -204,9 +204,8 @@ class AggLinearHistogramTest : public testing::Test { << "(" << data_types[0]->get_name() << ")"; AggregateFunctionSimpleFactory factory = AggregateFunctionSimpleFactory::instance(); - auto agg_function = - factory.get("linear_histogram", data_types, false, -1, - {.enable_decimal256 = true, .column_infos = {}}); + auto agg_function = factory.get("linear_histogram", data_types, false, -1, + {.enable_decimal256 = true, .column_infos = {}}); EXPECT_NE(agg_function, nullptr); std::unique_ptr memory(new char[agg_function->size_of_data()]); diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 271bde12651870..c519d970bde238 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -705,10 +705,7 @@ TEST(BlockTest, Constructor) { { auto col = vectorized::ColumnVector::create(); vectorized::DataTypePtr type(std::make_shared()); - vectorized::Block block({ - {col->get_ptr(), type, "col1"}, - {col->get_ptr(), type, "col2"} - }); + vectorized::Block block({{col->get_ptr(), type, "col1"}, {col->get_ptr(), type, "col2"}}); EXPECT_EQ(2, block.columns()); } @@ -721,7 +718,6 @@ TEST(BlockTest, Constructor) { vectorized::Block block(columns); EXPECT_EQ(1, block.columns()); } - } TEST(BlockTest, BasicOperations) { @@ -858,23 +854,23 @@ TEST(BlockTest, ColumnOperations) { auto another_col = vectorized::ColumnVector::create(); block.replace_by_position(1, another_col->get_ptr()); EXPECT_EQ(0, block.get_by_position(1).column->size()); - + // Test replace_by_position_if_const auto const_col = vectorized::ColumnVector::create(); const_col->insert_value(1); auto const_column = vectorized::ColumnConst::create(const_col->get_ptr(), 1); block.replace_by_position(2, const_column->get_ptr()); - + // Verify it's const column before replacement - EXPECT_NE(nullptr, typeid_cast( - block.get_by_position(2).column.get())); - + EXPECT_NE(nullptr, + typeid_cast(block.get_by_position(2).column.get())); + // Replace const column with full column block.replace_by_position_if_const(2); - + // Verify it's no longer const column after replacement - EXPECT_EQ(nullptr, typeid_cast( - block.get_by_position(2).column.get())); + EXPECT_EQ(nullptr, + typeid_cast(block.get_by_position(2).column.get())); // Test iterator functionality size_t count = 0; @@ -904,7 +900,7 @@ TEST(BlockTest, ColumnOperations) { { vectorized::Block unsorted_block; auto type = std::make_shared(); - + // Insert columns in random order { auto col_c = vectorized::ColumnVector::create(); @@ -918,31 +914,31 @@ TEST(BlockTest, ColumnOperations) { auto col_b = vectorized::ColumnVector::create(); unsorted_block.insert({std::move(col_b), type, "b"}); } - + // Verify original order auto original_names = unsorted_block.get_names(); EXPECT_EQ("c", original_names[0]); EXPECT_EQ("a", original_names[1]); EXPECT_EQ("b", original_names[2]); - + // Sort columns and verify auto sorted_block = unsorted_block.sort_columns(); auto sorted_names = sorted_block.get_names(); - + // Verify alphabetical order EXPECT_EQ("c", sorted_names[0]); EXPECT_EQ("b", sorted_names[1]); EXPECT_EQ("a", sorted_names[2]); - + // Verify original block remains unchanged original_names = unsorted_block.get_names(); EXPECT_EQ("c", original_names[0]); EXPECT_EQ("a", original_names[1]); EXPECT_EQ("b", original_names[2]); - + // Verify column count remains the same EXPECT_EQ(unsorted_block.columns(), sorted_block.columns()); - + // Verify column types are preserved EXPECT_EQ(type, sorted_block.get_data_type(0)); EXPECT_EQ(type, sorted_block.get_data_type(1)); @@ -952,51 +948,50 @@ TEST(BlockTest, ColumnOperations) { TEST(BlockTest, RowOperations) { vectorized::Block block; - + // Test empty block EXPECT_EQ(0, block.rows()); EXPECT_EQ(0, block.columns()); EXPECT_TRUE(block.empty()); EXPECT_TRUE(block.is_empty_column()); - + // Add columns with data auto col1 = vectorized::ColumnVector::create(); auto col2 = vectorized::ColumnString::create(); vectorized::DataTypePtr type1(std::make_shared()); vectorized::DataTypePtr type2(std::make_shared()); - + for (int i = 0; i < 100; ++i) { col1->insert_value(i); col2->insert_data(std::to_string(i).c_str(), std::to_string(i).length()); } - + block.insert({col1->get_ptr(), type1, "col1"}); block.insert({col2->get_ptr(), type2, "col2"}); - + // Test basic properties EXPECT_EQ(100, block.rows()); EXPECT_EQ(2, block.columns()); EXPECT_FALSE(block.empty()); EXPECT_FALSE(block.is_empty_column()); - + // Test row operations block.set_num_rows(50); // LIMIT EXPECT_EQ(50, block.rows()); - + int64_t offset = 20; block.skip_num_rows(offset); // OFFSET EXPECT_EQ(30, block.rows()); } -// 内存和大小相关测试 TEST(BlockTest, MemoryAndSize) { vectorized::Block block; - + // Test empty block (no columns) EXPECT_EQ(0, block.bytes()); EXPECT_EQ(0, block.allocated_bytes()); EXPECT_EQ("column bytes: []", block.columns_bytes()); - + // Add first column (Int32) auto col1 = vectorized::ColumnVector::create(); vectorized::DataTypePtr type1(std::make_shared()); @@ -1004,21 +999,21 @@ TEST(BlockTest, MemoryAndSize) { col1->insert_value(i); } block.insert({col1->get_ptr(), type1, "col1"}); - + // Test with valid column size_t bytes_one_col = block.bytes(); size_t allocated_bytes_one_col = block.allocated_bytes(); EXPECT_GT(bytes_one_col, 0); EXPECT_GT(allocated_bytes_one_col, 0); EXPECT_GE(allocated_bytes_one_col, bytes_one_col); - + // Test with nullptr column (should throw exception) vectorized::Block block_with_null; block_with_null.insert({nullptr, type1, "null_col"}); // bytes() should throw exception when there is a nullptr column EXPECT_THROW(block_with_null.bytes(), Exception); - + // columns_bytes() should throw exception when there is a nullptr column EXPECT_THROW(block_with_null.columns_bytes(), Exception); @@ -1033,21 +1028,21 @@ TEST(BlockTest, MemoryAndSize) { col2->insert_data(val.c_str(), val.length()); } block.insert({col2->get_ptr(), type2, "col2"}); - + // Test with two valid columns size_t bytes_two_cols = block.bytes(); EXPECT_GT(bytes_two_cols, bytes_one_col); - + // Test after erasing first column block.erase(0); EXPECT_EQ(block.bytes(), col2->byte_size()); - + // Test after clearing all columns block.clear(); EXPECT_EQ(0, block.bytes()); EXPECT_EQ(0, block.allocated_bytes()); EXPECT_EQ("column bytes: []", block.columns_bytes()); - + // Test with multiple nullptr columns vectorized::Block multi_null_block; multi_null_block.insert({nullptr, type1, "null_col1"}); @@ -1057,34 +1052,34 @@ TEST(BlockTest, MemoryAndSize) { TEST(BlockTest, DumpMethods) { vectorized::Block block; - + // Test empty block EXPECT_EQ("", block.dump_names()); EXPECT_EQ("", block.dump_types()); EXPECT_TRUE(block.dump_structure().empty()); - + // Add first column (Int32) auto col1 = vectorized::ColumnVector::create(); vectorized::DataTypePtr type1(std::make_shared()); col1->insert_value(123); col1->insert_value(456); block.insert({col1->get_ptr(), type1, "col1"}); - + // Test single column EXPECT_EQ("col1", block.dump_names()); EXPECT_EQ("Int32", block.dump_types()); - + // Add second column (String) auto col2 = vectorized::ColumnString::create(); vectorized::DataTypePtr type2(std::make_shared()); col2->insert_data("hello", 5); col2->insert_data("world", 5); block.insert({col2->get_ptr(), type2, "col2"}); - + // Test multiple columns EXPECT_EQ("col1, col2", block.dump_names()); EXPECT_EQ("Int32, String", block.dump_types()); - + // Test dump_data with different parameters { // Default parameters @@ -1095,7 +1090,7 @@ TEST(BlockTest, DumpMethods) { EXPECT_TRUE(data.find("123") != std::string::npos); EXPECT_TRUE(data.find("hello") != std::string::npos); } - + { // Test with begin offset std::string data = block.dump_data(1); @@ -1103,7 +1098,7 @@ TEST(BlockTest, DumpMethods) { EXPECT_TRUE(data.find("world") != std::string::npos); EXPECT_FALSE(data.find("123") != std::string::npos); } - + { // Test with row limit std::string data = block.dump_data(0, 1); @@ -1111,7 +1106,7 @@ TEST(BlockTest, DumpMethods) { EXPECT_TRUE(data.find("123") != std::string::npos); EXPECT_FALSE(data.find("456") != std::string::npos); } - + // Test dump_one_line { std::string line = block.dump_one_line(0, 2); @@ -1123,7 +1118,7 @@ TEST(BlockTest, DumpMethods) { line = block.dump_one_line(0, 1); EXPECT_EQ("123", line); } - + // Test dump_structure { std::string structure = block.dump_structure(); @@ -1133,14 +1128,15 @@ TEST(BlockTest, DumpMethods) { EXPECT_TRUE(structure.find("col2") != std::string::npos); EXPECT_TRUE(structure.find("String") != std::string::npos); } - + // Test with nullable column auto nullable_type = std::make_shared(type1); auto null_map = vectorized::ColumnUInt8::create(); auto nested_col = col1->clone(); - auto nullable_col = vectorized::ColumnNullable::create(nested_col->get_ptr(), null_map->get_ptr()); + auto nullable_col = + vectorized::ColumnNullable::create(nested_col->get_ptr(), null_map->get_ptr()); block.insert({nullable_col->get_ptr(), nullable_type, "nullable_col"}); - + { std::string data = block.dump_data(0, 100, true); LOG(INFO) << "dump_data with nullable:\n" << data; @@ -1155,20 +1151,20 @@ TEST(BlockTest, DumpMethods) { EXPECT_FALSE(int_dump.empty()); EXPECT_TRUE(int_dump.find("123") != std::string::npos); EXPECT_TRUE(int_dump.find("456") != std::string::npos); - + // Test String column std::string str_dump = vectorized::Block::dump_column(col2->get_ptr(), type2); LOG(INFO) << "String column dump:\n" << str_dump; EXPECT_FALSE(str_dump.empty()); EXPECT_TRUE(str_dump.find("hello") != std::string::npos); EXPECT_TRUE(str_dump.find("world") != std::string::npos); - + // Test Nullable column std::string nullable_dump = vectorized::Block::dump_column(nullable_col->get_ptr(), nullable_type); LOG(INFO) << "Nullable column dump:\n" << nullable_dump; EXPECT_FALSE(nullable_dump.empty()); EXPECT_FALSE(nullable_dump.find("123") != std::string::npos); - + // Test empty column auto empty_col = vectorized::ColumnVector::create(); auto empty_dump = vectorized::Block::dump_column(empty_col->get_ptr(), type1); @@ -1177,40 +1173,39 @@ TEST(BlockTest, DumpMethods) { } } -// 克隆和列操作测试 TEST(BlockTest, CloneOperations) { vectorized::Block block; auto col1 = vectorized::ColumnVector::create(); auto col2 = vectorized::ColumnVector::create(); vectorized::DataTypePtr type(std::make_shared()); - + col1->insert_value(1); col2->insert_value(2); - + block.insert({col1->get_ptr(), type, "col1"}); block.insert({col2->get_ptr(), type, "col2"}); - + // Test clone_empty auto empty_block = block.clone_empty(); EXPECT_EQ(block.columns(), empty_block.columns()); EXPECT_EQ(0, empty_block.rows()); - + // Test get_columns and get_columns_and_convert auto columns = block.get_columns(); auto converted_columns = block.get_columns_and_convert(); EXPECT_EQ(2, columns.size()); EXPECT_EQ(2, converted_columns.size()); - + // Test clone_empty_columns auto empty_columns = block.clone_empty_columns(); EXPECT_EQ(2, empty_columns.size()); EXPECT_EQ(0, empty_columns[0]->size()); EXPECT_EQ(0, empty_columns[1]->size()); - + // Test mutate_columns auto mutable_cols = block.mutate_columns(); EXPECT_EQ(2, mutable_cols.size()); - + // Test set_columns with const columns vectorized::Block new_block = block.clone_empty(); new_block.set_columns(columns); @@ -1224,7 +1219,7 @@ TEST(BlockTest, CloneOperations) { new_block.get_by_position(0).column.get())->get_data()[0]); EXPECT_EQ(2, assert_cast*>( new_block.get_by_position(1).column.get())->get_data()[0]); - + // Test clone_with_columns auto cloned_with_cols = block.clone_with_columns(columns); EXPECT_EQ(block.rows(), cloned_with_cols.rows()); @@ -1237,14 +1232,14 @@ TEST(BlockTest, CloneOperations) { cloned_with_cols.get_by_position(0).column.get())->get_data()[0]); EXPECT_EQ(2, assert_cast*>( cloned_with_cols.get_by_position(1).column.get())->get_data()[0]); - + // Test clone_without_columns std::vector column_offset = {0}; auto partial_block = block.clone_without_columns(&column_offset); EXPECT_EQ(1, partial_block.columns()); EXPECT_EQ("col1", partial_block.get_by_position(0).name); EXPECT_EQ(nullptr, partial_block.get_by_position(0).column.get()); - + // Test set_columns with mutable columns { auto mutable_columns = block.clone_empty_columns(); @@ -1328,12 +1323,12 @@ TEST(BlockTest, FilterAndSelector) { auto test_col1 = vectorized::ColumnVector::create(); auto test_col2 = vectorized::ColumnVector::create(); auto type = std::make_shared(); - + for (int i = 0; i < size; ++i) { test_col1->insert_value(i); test_col2->insert_value(i * 2); } - + test_block.insert({test_col1->get_ptr(), type, "col1"}); test_block.insert({test_col2->get_ptr(), type, "col2"}); return test_block; @@ -1341,27 +1336,27 @@ TEST(BlockTest, FilterAndSelector) { // Create original block auto block = create_test_block(10); - + // Test filter_block_internal with filter only { auto test_block = create_test_block(10); vectorized::IColumn::Filter filter(10, 1); // Initialize with all 1s (keep all rows) filter[0] = 0; // Filter out first row filter[5] = 0; // Filter out sixth row - + vectorized::Block::filter_block_internal(&test_block, filter); EXPECT_EQ(8, test_block.rows()); - + // Verify filtered data for both columns const auto* filtered_col1 = assert_cast*>( test_block.get_by_position(0).column.get()); const auto* filtered_col2 = assert_cast*>( test_block.get_by_position(1).column.get()); - + // Expected values after filtering std::vector expected_col1 = {1,2,3,4,6,7,8,9}; std::vector expected_col2 = {2,4,6,8,12,14,16,18}; - + for (size_t i = 0; i < expected_col1.size(); ++i) { EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); @@ -1374,10 +1369,10 @@ TEST(BlockTest, FilterAndSelector) { vectorized::IColumn::Filter filter(10, 1); filter[0] = 0; std::vector columns_to_filter = {0}; // Only filter first column - + vectorized::Block::filter_block_internal(&test_block, columns_to_filter, filter); EXPECT_EQ(9, test_block.rows()); - + const auto* filtered_col1 = assert_cast*>( test_block.get_by_position(0).column.get()); const auto* filtered_col2 = assert_cast*>( @@ -1385,7 +1380,7 @@ TEST(BlockTest, FilterAndSelector) { EXPECT_EQ(1, filtered_col1->get_data()[0]); // First column filtered EXPECT_EQ(0, filtered_col2->get_data()[0]); // Second column unchanged } - + // Test filter_block_internal with column_to_keep { auto test_block = create_test_block(10); @@ -1393,23 +1388,23 @@ TEST(BlockTest, FilterAndSelector) { filter[0] = 0; // Filter out first row filter[5] = 0; // Filter out sixth row uint32_t column_to_keep = 1; // Only filter first column, keep the rest columns - + vectorized::Block::filter_block_internal(&test_block, filter, column_to_keep); - + // Verify row count after filtering EXPECT_EQ(8, test_block.rows()); EXPECT_EQ(2, test_block.columns()); - + // Verify filtered data for both columns const auto* filtered_col1 = assert_cast*>( test_block.get_by_position(0).column.get()); const auto* filtered_col2 = assert_cast*>( test_block.get_by_position(1).column.get()); - + // Expected values after filtering std::vector expected_col1 = {1,2,3,4,6,7,8,9}; std::vector expected_col2 = {0,2,4,6,8,10,12,14,16,18}; - + // Verify each value in filtered columns for (size_t i = 0; i < expected_col1.size(); ++i) { EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); @@ -1422,7 +1417,7 @@ TEST(BlockTest, FilterAndSelector) { // Test filter_block with nullable filter column { auto test_block = create_test_block(10); - + // Create nullable filter column auto nullable_filter = vectorized::ColumnNullable::create( vectorized::ColumnVector::create(10, 1), // all true @@ -1431,87 +1426,87 @@ TEST(BlockTest, FilterAndSelector) { auto filter_type = std::make_shared( std::make_shared() ); - + // Add filter column to block test_block.insert({nullable_filter->get_ptr(), filter_type, "filter"}); - + // Test four-parameter version std::vector columns_to_filter = {0, 1}; EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); EXPECT_EQ(10, test_block.rows()); // All rows kept - + // Test three-parameter version auto test_block2 = create_test_block(10); test_block2.insert({nullable_filter->get_ptr(), filter_type, "filter"}); EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); EXPECT_EQ(10, test_block2.rows()); // All rows kept } - + // Test filter_block with const filter column { auto test_block = create_test_block(10); - + // Create const filter column (false) auto const_filter = vectorized::ColumnConst::create( vectorized::ColumnVector::create(1, 0), // false 10 ); auto filter_type = std::make_shared(); - + // Add filter column to block test_block.insert({const_filter->get_ptr(), filter_type, "filter"}); - + // Test four-parameter version std::vector columns_to_filter = {0, 1}; EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); EXPECT_EQ(0, test_block.rows()); // All rows filtered out - + // Test three-parameter version auto test_block2 = create_test_block(10); test_block2.insert({const_filter->get_ptr(), filter_type, "filter"}); EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); EXPECT_EQ(0, test_block2.rows()); // All rows filtered out } - + // Test filter_block with regular filter column { auto test_block = create_test_block(10); - + // Create regular filter column auto filter_column = vectorized::ColumnVector::create(); for (size_t i = 0; i < 10; ++i) { filter_column->insert_value(i % 2); // Keep odd-indexed rows } auto filter_type = std::make_shared(); - + // Add filter column to block test_block.insert({filter_column->get_ptr(), filter_type, "filter"}); - + // Test four-parameter version std::vector columns_to_filter = {0, 1}; EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); EXPECT_EQ(5, test_block.rows()); // Half rows kept - + // Verify filtered data const auto* filtered_col1 = assert_cast*>( test_block.get_by_position(0).column.get()); const auto* filtered_col2 = assert_cast*>( test_block.get_by_position(1).column.get()); - + std::vector expected_col1 = {1,3,5,7,9}; std::vector expected_col2 = {2,6,10,14,18}; - + for (size_t i = 0; i < expected_col1.size(); ++i) { EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); } - + // Test three-parameter version auto test_block2 = create_test_block(10); test_block2.insert({filter_column->get_ptr(), filter_type, "filter"}); EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); EXPECT_EQ(5, test_block2.rows()); // Half rows kept - + // Verify filtered data filtered_col1 = assert_cast*>( test_block2.get_by_position(0).column.get()); @@ -1532,29 +1527,29 @@ TEST(BlockTest, FilterAndSelector) { dst_block.insert({type->create_column(), type, "col1"}); dst_block.insert({type->create_column(), type, "col2"}); vectorized::MutableBlock dst(&dst_block); - + // Create selector to select every other row vectorized::IColumn::Selector selector(5, 0); for (size_t i = 0; i < 5; ++i) { selector[i] = i * 2; // Select rows 0,2,4,6,8 } - + // Perform selection EXPECT_TRUE(block.append_to_block_by_selector(&dst, selector).ok()); EXPECT_EQ(5, dst.rows()); - + // Verify selected data const vectorized::Block& result_block = dst.to_block(); - + const auto* selected_col1 = assert_cast*>( result_block.get_by_position(0).column.get()); const auto* selected_col2 = assert_cast*>( result_block.get_by_position(1).column.get()); - + // Expected values after selection std::vector expected_col1 = {0,2,4,6,8}; std::vector expected_col2 = {0,4,8,12,16}; - + for (size_t i = 0; i < expected_col1.size(); ++i) { EXPECT_EQ(expected_col1[i], selected_col1->get_data()[i]); EXPECT_EQ(expected_col2[i], selected_col2->get_data()[i]); @@ -1564,22 +1559,22 @@ TEST(BlockTest, FilterAndSelector) { TEST(BlockTest, RowCheck) { vectorized::Block block; auto type = std::make_shared(); - + // Add columns with same number of rows { auto col1 = vectorized::ColumnVector::create(); col1->insert_value(1); block.insert({std::move(col1), type, "col1"}); } - + { auto col2 = vectorized::ColumnVector::create(); block.insert({std::move(col2), type, "col2"}); } - + // Test row number check EXPECT_THROW(block.check_number_of_rows(), Exception); - + // Test clear operations block.clear_column_data(1); // Clear first column and delete the rest columns EXPECT_EQ(1, block.columns()); @@ -1587,7 +1582,7 @@ TEST(BlockTest, RowCheck) { block.clear(); EXPECT_EQ(0, block.columns()); - + // Test swap operations vectorized::Block other_block; { @@ -1595,7 +1590,7 @@ TEST(BlockTest, RowCheck) { col1->insert_value(1); other_block.insert({std::move(col1), type, "col1"}); } - + block.swap(other_block); EXPECT_EQ(1, block.columns()); EXPECT_EQ(0, other_block.columns()); @@ -1603,11 +1598,11 @@ TEST(BlockTest, RowCheck) { TEST(BlockTest, ClearColumnData) { auto type = std::make_shared(); - + // Test case 1: Clear with column_size == -1 (clear all data but keep columns) { vectorized::Block block; - + // Insert two columns with data { auto col1 = vectorized::ColumnVector::create(); @@ -1621,24 +1616,24 @@ TEST(BlockTest, ClearColumnData) { col2->insert_value(4); block.insert({std::move(col2), type, "col2"}); } - + EXPECT_EQ(2, block.rows()); EXPECT_EQ(2, block.columns()); - + // Clear data with column_size = -1 block.clear_column_data(-1); - + // Verify columns are kept but data is cleared EXPECT_EQ(0, block.rows()); EXPECT_EQ(2, block.columns()); EXPECT_EQ(0, block.get_by_position(0).column->size()); EXPECT_EQ(0, block.get_by_position(1).column->size()); } - + // Test case 2: Clear with specific column_size (remove extra columns) { vectorized::Block block; - + // Insert three columns { auto col1 = vectorized::ColumnVector::create(); @@ -1655,70 +1650,69 @@ TEST(BlockTest, ClearColumnData) { col3->insert_value(3); block.insert({std::move(col3), type, "col3"}); } - + EXPECT_EQ(3, block.columns()); - + // Clear data and keep only 2 columns block.clear_column_data(2); - + // Verify extra columns are removed and remaining data is cleared EXPECT_EQ(2, block.columns()); EXPECT_EQ(0, block.rows()); EXPECT_EQ(0, block.get_by_position(0).column->size()); EXPECT_EQ(0, block.get_by_position(1).column->size()); } - + // Test case 3: Clear with column_size larger than actual size { vectorized::Block block; - + // Insert one column { auto col1 = vectorized::ColumnVector::create(); col1->insert_value(1); block.insert({std::move(col1), type, "col1"}); } - + EXPECT_EQ(1, block.columns()); - + // Clear data with column_size > actual size block.clear_column_data(2); - + // Verify column is kept but data is cleared EXPECT_EQ(1, block.columns()); EXPECT_EQ(0, block.rows()); EXPECT_EQ(0, block.get_by_position(0).column->size()); } - + // Test case 4: Clear empty block { vectorized::Block block; EXPECT_EQ(0, block.columns()); - + // Should not crash block.clear_column_data(-1); block.clear_column_data(0); block.clear_column_data(1); - + EXPECT_EQ(0, block.columns()); } - + // Test case 5: Verify row_same_bit is cleared { vectorized::Block block; - + // Insert column with data { auto col1 = vectorized::ColumnVector::create(); col1->insert_value(1); block.insert({std::move(col1), type, "col1"}); } - + // Set some row_same_bit data (if possible) // Note: This might need adjustment based on how row_same_bit is actually used - block.clear_column_data(-1); - + // Verify everything is cleared EXPECT_EQ(0, block.rows()); EXPECT_EQ(1, block.columns()); @@ -1763,7 +1757,7 @@ TEST(BlockTest, IndexByName) { // Test after modifying block structure block.erase(2); // Remove last "col1" block.initialize_index_by_name(); // Re-initialize index - + // Now the first "col1" should be found EXPECT_EQ(0, block.get_position_by_name("col1")); @@ -1778,18 +1772,18 @@ TEST(BlockTest, ReplaceIfOverflow) { vectorized::Block block; auto col = vectorized::ColumnVector::create(); vectorized::DataTypePtr type(std::make_shared()); - + // Add some data to the column auto& data = col->get_data(); for (int i = 0; i < 100; ++i) { data.push_back(i); } - + block.insert({col->get_ptr(), type, "col1"}); - + // Test replace_if_overflow block.replace_if_overflow(); - + // Verify column is still intact EXPECT_EQ(100, block.get_by_position(0).column->size()); } @@ -1797,7 +1791,7 @@ TEST(BlockTest, ReplaceIfOverflow) { TEST(BlockTest, ColumnTransformations) { vectorized::Block block; auto type = std::make_shared(); - + // Insert columns with unique data { auto col1 = vectorized::ColumnVector::create(); @@ -1817,17 +1811,17 @@ TEST(BlockTest, ColumnTransformations) { // Test shuffle_columns std::vector positions = {1, 0}; // change the order of columns block.shuffle_columns(positions); - + // Verify shuffled order EXPECT_EQ("col2", block.get_by_position(0).name); // col2 is now in the first position EXPECT_EQ("col1", block.get_by_position(1).name); // col1 is now in the second position - + // Verify column data is also correctly shuffled const auto* col1 = assert_cast*>( block.get_by_position(1).column.get()); // col1 is now in position 1 const auto* col2 = assert_cast*>( block.get_by_position(0).column.get()); // col2 is now in position 0 - + EXPECT_EQ(1, col1->get_data()[0]); // the value of col1 should be 1 EXPECT_EQ(2, col2->get_data()[0]); // the value of col2 should be 2 } @@ -1952,7 +1946,7 @@ TEST(BlockTest, BlockOperations) { { vectorized::Block block; auto type = std::make_shared(); - + // Insert three columns { auto col1 = vectorized::ColumnVector::create(); @@ -1969,7 +1963,7 @@ TEST(BlockTest, BlockOperations) { col3->insert_value(3); block.insert({std::move(col3), type, "col3"}); } - + EXPECT_EQ(3, block.columns()); vectorized::Block::erase_useless_column(&block, 2); EXPECT_EQ(2, block.columns()); @@ -1981,14 +1975,14 @@ TEST(BlockTest, BlockOperations) { { vectorized::Block original_block; auto type = std::make_shared(); - + // Create original block with data { auto col = vectorized::ColumnVector::create(); col->insert_value(1); original_block.insert({std::move(col), type, "col1"}); } - + // Test case 1: with default values (is_reserve = false) { auto new_block = original_block.create_same_struct_block(5, false); @@ -1996,7 +1990,7 @@ TEST(BlockTest, BlockOperations) { EXPECT_EQ(5, new_block->rows()); // Should have 5 default values EXPECT_EQ("col1", new_block->get_by_position(0).name); EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); - + // Verify default values are inserted const auto* col = assert_cast*>( new_block->get_by_position(0).column.get()); @@ -2004,7 +1998,7 @@ TEST(BlockTest, BlockOperations) { EXPECT_EQ(0, col->get_data()[i]); // Default value for Int32 is 0 } } - + // Test case 2: with reserved space (is_reserve = true) { auto new_block = original_block.create_same_struct_block(5, true); @@ -2020,43 +2014,43 @@ TEST(BlockTest, BlockOperations) { vectorized::Block block1; vectorized::Block block2; auto type = std::make_shared(); - + // Prepare two blocks with test data { auto col1 = vectorized::ColumnVector::create(); col1->insert_value(1); col1->insert_value(2); block1.insert({std::move(col1), type, "col1"}); - + auto col2 = vectorized::ColumnVector::create(); col2->insert_value(3); col2->insert_value(4); block1.insert({std::move(col2), type, "col2"}); } - + { auto col1 = vectorized::ColumnVector::create(); col1->insert_value(1); col1->insert_value(3); block2.insert({std::move(col1), type, "col1"}); - + auto col2 = vectorized::ColumnVector::create(); col2->insert_value(3); col2->insert_value(4); block2.insert({std::move(col2), type, "col2"}); } - + // Test basic compare_at EXPECT_EQ(0, block1.compare_at(0, 0, block2, 1)); // First rows are equal EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // 1 < 3 - + // Test compare_at with num_columns EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 1)); // Compare only first column - + // Test compare_at with specific columns std::vector compare_cols = {1}; // Compare only second column EXPECT_EQ(0, block1.compare_at(0, 0, &compare_cols, block2, 1)); - + // Test compare_column_at EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 1)); // Compare first column EXPECT_LT(block1.compare_column_at(0, 1, 0, block2, 1), 0); // 1 < 3 @@ -2066,24 +2060,24 @@ TEST(BlockTest, BlockOperations) { { vectorized::Block block; auto type = std::make_shared(); - + // Create block with data auto col = vectorized::ColumnVector::create(); for (int i = 0; i < 3; ++i) { col->insert_value(i); } block.insert({std::move(col), type, "col1"}); - + // Test set_same_bit std::vector same_bits = {true, false, true}; block.set_same_bit(same_bits.begin(), same_bits.end()); - + // Test get_same_bit EXPECT_TRUE(block.get_same_bit(0)); EXPECT_FALSE(block.get_same_bit(1)); EXPECT_TRUE(block.get_same_bit(2)); EXPECT_FALSE(block.get_same_bit(3)); // Out of range - + // Test clear_same_bit block.clear_same_bit(); EXPECT_FALSE(block.get_same_bit(0)); // After clear, all bits should be false @@ -2093,14 +2087,14 @@ TEST(BlockTest, BlockOperations) { { vectorized::Block block; auto type = std::make_shared(); - + // Add regular column { auto col1 = vectorized::ColumnVector::create(); col1->insert_value(1); block.insert({std::move(col1), type, "normal_col"}); } - + // Add temporary column with correct prefix { auto col2 = vectorized::ColumnVector::create(); @@ -2108,7 +2102,7 @@ TEST(BlockTest, BlockOperations) { block.insert({std::move(col2), type, std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col"}); } - + // Add another temporary column { auto col3 = vectorized::ColumnVector::create(); @@ -2116,12 +2110,12 @@ TEST(BlockTest, BlockOperations) { block.insert({std::move(col3), type, std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col"}); } - + EXPECT_EQ(3, block.columns()); block.erase_tmp_columns(); EXPECT_EQ(1, block.columns()); EXPECT_EQ("normal_col", block.get_by_position(0).name); - + // Verify temporary columns are removed EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col")); EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col")); @@ -2131,17 +2125,17 @@ TEST(BlockTest, BlockOperations) { { vectorized::Block block; auto type = std::make_shared(); - + // Add three columns for (int i = 0; i < 3; ++i) { auto col = vectorized::ColumnVector::create(); col->insert_value(i); block.insert({std::move(col), type, "col" + std::to_string(i)}); } - + std::vector keep_flags = {true, false, true}; block.clear_column_mem_not_keep(keep_flags, true); - + // Verify columns are kept but data is cleared for non-kept columns EXPECT_EQ(3, block.columns()); EXPECT_EQ(1, block.get_by_position(0).column->size()); // Kept @@ -2150,12 +2144,12 @@ TEST(BlockTest, BlockOperations) { } } -TEST(BlockTest, StringAndCompressionOperations) { +TEST(BlockTest, StringOperations) { using namespace std::string_literals; // Test shrink_char_type_column_suffix_zero { vectorized::Block block; - + // Add a string column with padding zeros { auto col = vectorized::ColumnString::create(); @@ -2164,7 +2158,7 @@ TEST(BlockTest, StringAndCompressionOperations) { std::string str2 = "world\0\0"s; // 7bytes, contains 2 trailing zeros col->insert_data(str1.c_str(), str1.size()); col->insert_data(str2.c_str(), str2.size()); - + auto type = std::make_shared(); block.insert({std::move(col), type, "str_col"}); } @@ -2203,61 +2197,47 @@ TEST(BlockTest, StringAndCompressionOperations) { EXPECT_EQ(2, int_col->get_data()[1]); } - // Test compression time and bytes tracking - { - vectorized::Block block; - - // Initially all metrics should be zero - EXPECT_EQ(0, block.get_decompress_time()); - EXPECT_EQ(0, block.get_decompressed_bytes()); - EXPECT_EQ(0, block.get_compress_time()); - - // Note: Actual compression/decompression operations would be tested - // in integration tests or specific compression-related test files - // Here we're just verifying the getters work - } - // Test with Array { vectorized::Block block; - + // Create Array column with padding zeros auto string_type = std::make_shared(); auto array_type = std::make_shared(string_type); - + // Add two strings with trailing zeros auto string_col = vectorized::ColumnString::create(); std::string str1 = "hello\0\0"s; std::string str2 = "world\0"s; string_col->insert_data(str1.c_str(), str1.size()); string_col->insert_data(str2.c_str(), str2.size()); - + // Create array offsets column auto array_offsets = vectorized::ColumnArray::ColumnOffsets::create(); array_offsets->get_data().push_back(2); // First array has 2 elements - + // Create array column auto array_col = vectorized::ColumnArray::create( std::move(string_col), std::move(array_offsets)); - + // Insert array column into block block.insert({std::move(array_col), array_type, "array_str_col"}); - + // Shrink array column std::vector char_type_idx = {0}; block.shrink_char_type_column_suffix_zero(char_type_idx); - + // Verify strings in array are shrunk const auto* array_col_result = assert_cast( block.get_by_position(0).column.get()); const auto* string_col_result = assert_cast( array_col_result->get_data_ptr().get()); - + // Verify first string in array StringRef ref1 = string_col_result->get_data_at(0); EXPECT_EQ(5, ref1.size); // "hello" without zeros EXPECT_EQ(0, memcmp(ref1.data, "hello", 5)); - + // Verify second string in array StringRef ref2 = string_col_result->get_data_at(1); EXPECT_EQ(5, ref2.size); // "world" without zeros From 05f564f3dad11115a415daa9ab118b0e646ecc5a Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Sun, 17 Nov 2024 17:16:12 +0800 Subject: [PATCH 04/41] code format --- be/test/vec/core/block_test.cpp | 140 ++++++++++++++++++-------------- 1 file changed, 78 insertions(+), 62 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index c519d970bde238..9a5772299eefb1 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -976,11 +976,11 @@ TEST(BlockTest, RowOperations) { EXPECT_FALSE(block.is_empty_column()); // Test row operations - block.set_num_rows(50); // LIMIT + block.set_num_rows(50); // LIMIT EXPECT_EQ(50, block.rows()); int64_t offset = 20; - block.skip_num_rows(offset); // OFFSET + block.skip_num_rows(offset); // OFFSET EXPECT_EQ(30, block.rows()); } @@ -1111,10 +1111,10 @@ TEST(BlockTest, DumpMethods) { { std::string line = block.dump_one_line(0, 2); EXPECT_EQ("123 hello", line); - + line = block.dump_one_line(1, 2); EXPECT_EQ("456 world", line); - + line = block.dump_one_line(0, 1); EXPECT_EQ("123", line); } @@ -1160,7 +1160,8 @@ TEST(BlockTest, DumpMethods) { EXPECT_TRUE(str_dump.find("world") != std::string::npos); // Test Nullable column - std::string nullable_dump = vectorized::Block::dump_column(nullable_col->get_ptr(), nullable_type); + std::string nullable_dump = + vectorized::Block::dump_column(nullable_col->get_ptr(), nullable_type); LOG(INFO) << "Nullable column dump:\n" << nullable_dump; EXPECT_FALSE(nullable_dump.empty()); EXPECT_FALSE(nullable_dump.find("123") != std::string::npos); @@ -1169,7 +1170,7 @@ TEST(BlockTest, DumpMethods) { auto empty_col = vectorized::ColumnVector::create(); auto empty_dump = vectorized::Block::dump_column(empty_col->get_ptr(), type1); LOG(INFO) << "Empty column dump:\n" << empty_dump; - EXPECT_FALSE(empty_dump.empty()); // Should still return formatted empty table + EXPECT_FALSE(empty_dump.empty()); // Should still return formatted empty table } } @@ -1216,9 +1217,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ(type, new_block.get_by_position(0).type); EXPECT_EQ(type, new_block.get_by_position(1).type); EXPECT_EQ(1, assert_cast*>( - new_block.get_by_position(0).column.get())->get_data()[0]); + new_block.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(2, assert_cast*>( - new_block.get_by_position(1).column.get())->get_data()[0]); + new_block.get_by_position(1).column.get()) + ->get_data()[0]); // Test clone_with_columns auto cloned_with_cols = block.clone_with_columns(columns); @@ -1229,9 +1232,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ(type, cloned_with_cols.get_by_position(0).type); EXPECT_EQ(type, cloned_with_cols.get_by_position(1).type); EXPECT_EQ(1, assert_cast*>( - cloned_with_cols.get_by_position(0).column.get())->get_data()[0]); + cloned_with_cols.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(2, assert_cast*>( - cloned_with_cols.get_by_position(1).column.get())->get_data()[0]); + cloned_with_cols.get_by_position(1).column.get()) + ->get_data()[0]); // Test clone_without_columns std::vector column_offset = {0}; @@ -1250,23 +1255,29 @@ TEST(BlockTest, CloneOperations) { block.set_columns(std::move(mutable_columns)); EXPECT_EQ(1, block.rows()); EXPECT_EQ(3, assert_cast*>( - block.get_by_position(0).column.get())->get_data()[0]); + block.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(4, assert_cast*>( - block.get_by_position(1).column.get())->get_data()[0]); + block.get_by_position(1).column.get()) + ->get_data()[0]); } // Test clone_with_columns with mutable columns { auto new_mutable_columns = block.clone_empty_columns(); - auto* tmp_col0 = assert_cast*>(new_mutable_columns[0].get()); - auto* tmp_col1 = assert_cast*>(new_mutable_columns[1].get()); + auto* tmp_col0 = + assert_cast*>(new_mutable_columns[0].get()); + auto* tmp_col1 = + assert_cast*>(new_mutable_columns[1].get()); tmp_col0->insert_value(5); tmp_col1->insert_value(6); auto cloned_with_mutable = block.clone_with_columns(std::move(new_mutable_columns)); EXPECT_EQ(1, cloned_with_mutable.rows()); EXPECT_EQ(5, assert_cast*>( - cloned_with_mutable.get_by_position(0).column.get())->get_data()[0]); + cloned_with_mutable.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(6, assert_cast*>( - cloned_with_mutable.get_by_position(1).column.get())->get_data()[0]); + cloned_with_mutable.get_by_position(1).column.get()) + ->get_data()[0]); } // Test copy_block @@ -1278,7 +1289,8 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ("col1", single_copy.get_by_position(0).name); EXPECT_EQ(type, single_copy.get_by_position(0).type); EXPECT_EQ(3, assert_cast*>( - single_copy.get_by_position(0).column.get())->get_data()[0]); + single_copy.get_by_position(0).column.get()) + ->get_data()[0]); // Test copying multiple columns std::vector multiple_columns = {0, 1}; @@ -1289,9 +1301,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ(type, multi_copy.get_by_position(0).type); EXPECT_EQ(type, multi_copy.get_by_position(1).type); EXPECT_EQ(3, assert_cast*>( - multi_copy.get_by_position(0).column.get())->get_data()[0]); + multi_copy.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(4, assert_cast*>( - multi_copy.get_by_position(1).column.get())->get_data()[0]); + multi_copy.get_by_position(1).column.get()) + ->get_data()[0]); // Test copying columns in different order std::vector reordered_columns = {1, 0}; @@ -1300,9 +1314,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ("col2", reordered_copy.get_by_position(0).name); EXPECT_EQ("col1", reordered_copy.get_by_position(1).name); EXPECT_EQ(4, assert_cast*>( - reordered_copy.get_by_position(0).column.get())->get_data()[0]); + reordered_copy.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(3, assert_cast*>( - reordered_copy.get_by_position(1).column.get())->get_data()[0]); + reordered_copy.get_by_position(1).column.get()) + ->get_data()[0]); // Test copying same column multiple times std::vector duplicate_columns = {0, 0}; @@ -1311,9 +1327,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ("col1", duplicate_copy.get_by_position(0).name); EXPECT_EQ("col1", duplicate_copy.get_by_position(1).name); EXPECT_EQ(3, assert_cast*>( - duplicate_copy.get_by_position(0).column.get())->get_data()[0]); + duplicate_copy.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(3, assert_cast*>( - duplicate_copy.get_by_position(1).column.get())->get_data()[0]); + duplicate_copy.get_by_position(1).column.get()) + ->get_data()[0]); } } @@ -1340,22 +1358,22 @@ TEST(BlockTest, FilterAndSelector) { // Test filter_block_internal with filter only { auto test_block = create_test_block(10); - vectorized::IColumn::Filter filter(10, 1); // Initialize with all 1s (keep all rows) - filter[0] = 0; // Filter out first row - filter[5] = 0; // Filter out sixth row + vectorized::IColumn::Filter filter(10, 1); // Initialize with all 1s (keep all rows) + filter[0] = 0; // Filter out first row + filter[5] = 0; // Filter out sixth row vectorized::Block::filter_block_internal(&test_block, filter); EXPECT_EQ(8, test_block.rows()); // Verify filtered data for both columns const auto* filtered_col1 = assert_cast*>( - test_block.get_by_position(0).column.get()); + test_block.get_by_position(0).column.get()); const auto* filtered_col2 = assert_cast*>( - test_block.get_by_position(1).column.get()); + test_block.get_by_position(1).column.get()); // Expected values after filtering - std::vector expected_col1 = {1,2,3,4,6,7,8,9}; - std::vector expected_col2 = {2,4,6,8,12,14,16,18}; + std::vector expected_col1 = {1, 2, 3, 4, 6, 7, 8, 9}; + std::vector expected_col2 = {2, 4, 6, 8, 12, 14, 16, 18}; for (size_t i = 0; i < expected_col1.size(); ++i) { EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); @@ -1368,26 +1386,26 @@ TEST(BlockTest, FilterAndSelector) { auto test_block = create_test_block(10); vectorized::IColumn::Filter filter(10, 1); filter[0] = 0; - std::vector columns_to_filter = {0}; // Only filter first column + std::vector columns_to_filter = {0}; // Only filter first column vectorized::Block::filter_block_internal(&test_block, columns_to_filter, filter); EXPECT_EQ(9, test_block.rows()); const auto* filtered_col1 = assert_cast*>( - test_block.get_by_position(0).column.get()); + test_block.get_by_position(0).column.get()); const auto* filtered_col2 = assert_cast*>( - test_block.get_by_position(1).column.get()); - EXPECT_EQ(1, filtered_col1->get_data()[0]); // First column filtered - EXPECT_EQ(0, filtered_col2->get_data()[0]); // Second column unchanged + test_block.get_by_position(1).column.get()); + EXPECT_EQ(1, filtered_col1->get_data()[0]); // First column filtered + EXPECT_EQ(0, filtered_col2->get_data()[0]); // Second column unchanged } // Test filter_block_internal with column_to_keep { auto test_block = create_test_block(10); vectorized::IColumn::Filter filter(10, 1); - filter[0] = 0; // Filter out first row - filter[5] = 0; // Filter out sixth row - uint32_t column_to_keep = 1; // Only filter first column, keep the rest columns + filter[0] = 0; // Filter out first row + filter[5] = 0; // Filter out sixth row + uint32_t column_to_keep = 1; // Only filter first column, keep the rest columns vectorized::Block::filter_block_internal(&test_block, filter, column_to_keep); @@ -1397,13 +1415,13 @@ TEST(BlockTest, FilterAndSelector) { // Verify filtered data for both columns const auto* filtered_col1 = assert_cast*>( - test_block.get_by_position(0).column.get()); + test_block.get_by_position(0).column.get()); const auto* filtered_col2 = assert_cast*>( - test_block.get_by_position(1).column.get()); + test_block.get_by_position(1).column.get()); // Expected values after filtering - std::vector expected_col1 = {1,2,3,4,6,7,8,9}; - std::vector expected_col2 = {0,2,4,6,8,10,12,14,16,18}; + std::vector expected_col1 = {1, 2, 3, 4, 6, 7, 8, 9}; + std::vector expected_col2 = {2, 4, 6, 8, 12, 14, 16, 18}; // Verify each value in filtered columns for (size_t i = 0; i < expected_col1.size(); ++i) { @@ -1420,12 +1438,11 @@ TEST(BlockTest, FilterAndSelector) { // Create nullable filter column auto nullable_filter = vectorized::ColumnNullable::create( - vectorized::ColumnVector::create(10, 1), // all true - vectorized::ColumnVector::create(10, 0) // no nulls + vectorized::ColumnVector::create(10, 1), // all true + vectorized::ColumnVector::create(10, 0) // no nulls ); auto filter_type = std::make_shared( - std::make_shared() - ); + std::make_shared()); // Add filter column to block test_block.insert({nullable_filter->get_ptr(), filter_type, "filter"}); @@ -1433,13 +1450,13 @@ TEST(BlockTest, FilterAndSelector) { // Test four-parameter version std::vector columns_to_filter = {0, 1}; EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); - EXPECT_EQ(10, test_block.rows()); // All rows kept + EXPECT_EQ(10, test_block.rows()); // All rows kept // Test three-parameter version auto test_block2 = create_test_block(10); test_block2.insert({nullable_filter->get_ptr(), filter_type, "filter"}); EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); - EXPECT_EQ(10, test_block2.rows()); // All rows kept + EXPECT_EQ(10, test_block2.rows()); // All rows kept } // Test filter_block with const filter column @@ -1448,9 +1465,8 @@ TEST(BlockTest, FilterAndSelector) { // Create const filter column (false) auto const_filter = vectorized::ColumnConst::create( - vectorized::ColumnVector::create(1, 0), // false - 10 - ); + vectorized::ColumnVector::create(1, 0), // false + 10); auto filter_type = std::make_shared(); // Add filter column to block @@ -1459,13 +1475,13 @@ TEST(BlockTest, FilterAndSelector) { // Test four-parameter version std::vector columns_to_filter = {0, 1}; EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); - EXPECT_EQ(0, test_block.rows()); // All rows filtered out + EXPECT_EQ(0, test_block.rows()); // All rows filtered out // Test three-parameter version auto test_block2 = create_test_block(10); test_block2.insert({const_filter->get_ptr(), filter_type, "filter"}); EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); - EXPECT_EQ(0, test_block2.rows()); // All rows filtered out + EXPECT_EQ(0, test_block2.rows()); // All rows filtered out } // Test filter_block with regular filter column @@ -1475,7 +1491,7 @@ TEST(BlockTest, FilterAndSelector) { // Create regular filter column auto filter_column = vectorized::ColumnVector::create(); for (size_t i = 0; i < 10; ++i) { - filter_column->insert_value(i % 2); // Keep odd-indexed rows + filter_column->insert_value(i % 2); // Keep odd-indexed rows } auto filter_type = std::make_shared(); @@ -1485,7 +1501,7 @@ TEST(BlockTest, FilterAndSelector) { // Test four-parameter version std::vector columns_to_filter = {0, 1}; EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); - EXPECT_EQ(5, test_block.rows()); // Half rows kept + EXPECT_EQ(5, test_block.rows()); // Half rows kept // Verify filtered data const auto* filtered_col1 = assert_cast*>( @@ -1505,7 +1521,7 @@ TEST(BlockTest, FilterAndSelector) { auto test_block2 = create_test_block(10); test_block2.insert({filter_column->get_ptr(), filter_type, "filter"}); EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); - EXPECT_EQ(5, test_block2.rows()); // Half rows kept + EXPECT_EQ(5, test_block2.rows()); // Half rows kept // Verify filtered data filtered_col1 = assert_cast*>( @@ -1531,7 +1547,7 @@ TEST(BlockTest, FilterAndSelector) { // Create selector to select every other row vectorized::IColumn::Selector selector(5, 0); for (size_t i = 0; i < 5; ++i) { - selector[i] = i * 2; // Select rows 0,2,4,6,8 + selector[i] = i * 2; // Select rows 0,2,4,6,8 } // Perform selection @@ -1542,13 +1558,13 @@ TEST(BlockTest, FilterAndSelector) { const vectorized::Block& result_block = dst.to_block(); const auto* selected_col1 = assert_cast*>( - result_block.get_by_position(0).column.get()); + result_block.get_by_position(0).column.get()); const auto* selected_col2 = assert_cast*>( - result_block.get_by_position(1).column.get()); + result_block.get_by_position(1).column.get()); // Expected values after selection - std::vector expected_col1 = {0,2,4,6,8}; - std::vector expected_col2 = {0,4,8,12,16}; + std::vector expected_col1 = {0, 2, 4, 6, 8}; + std::vector expected_col2 = {2, 4, 6, 8, 12}; for (size_t i = 0; i < expected_col1.size(); ++i) { EXPECT_EQ(expected_col1[i], selected_col1->get_data()[i]); @@ -1576,7 +1592,7 @@ TEST(BlockTest, RowCheck) { EXPECT_THROW(block.check_number_of_rows(), Exception); // Test clear operations - block.clear_column_data(1); // Clear first column and delete the rest columns + block.clear_column_data(1); // Clear first column and delete the rest columns EXPECT_EQ(1, block.columns()); From c73bc01841bfe54c1f89acdc10065da7bc56efe9 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Sun, 17 Nov 2024 17:24:39 +0800 Subject: [PATCH 05/41] code format --- be/test/vec/core/block_test.cpp | 126 ++++++++++++++++---------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 9a5772299eefb1..ec8565ec213da2 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1218,10 +1218,10 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ(type, new_block.get_by_position(1).type); EXPECT_EQ(1, assert_cast*>( new_block.get_by_position(0).column.get()) - ->get_data()[0]); + ->get_data()[0]); EXPECT_EQ(2, assert_cast*>( new_block.get_by_position(1).column.get()) - ->get_data()[0]); + ->get_data()[0]); // Test clone_with_columns auto cloned_with_cols = block.clone_with_columns(columns); @@ -1232,11 +1232,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ(type, cloned_with_cols.get_by_position(0).type); EXPECT_EQ(type, cloned_with_cols.get_by_position(1).type); EXPECT_EQ(1, assert_cast*>( - cloned_with_cols.get_by_position(0).column.get()) - ->get_data()[0]); + cloned_with_cols.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(2, assert_cast*>( - cloned_with_cols.get_by_position(1).column.get()) - ->get_data()[0]); + cloned_with_cols.get_by_position(1).column.get()) + ->get_data()[0]); // Test clone_without_columns std::vector column_offset = {0}; @@ -1255,11 +1255,11 @@ TEST(BlockTest, CloneOperations) { block.set_columns(std::move(mutable_columns)); EXPECT_EQ(1, block.rows()); EXPECT_EQ(3, assert_cast*>( - block.get_by_position(0).column.get()) - ->get_data()[0]); + block.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(4, assert_cast*>( - block.get_by_position(1).column.get()) - ->get_data()[0]); + block.get_by_position(1).column.get()) + ->get_data()[0]); } // Test clone_with_columns with mutable columns { @@ -1273,11 +1273,11 @@ TEST(BlockTest, CloneOperations) { auto cloned_with_mutable = block.clone_with_columns(std::move(new_mutable_columns)); EXPECT_EQ(1, cloned_with_mutable.rows()); EXPECT_EQ(5, assert_cast*>( - cloned_with_mutable.get_by_position(0).column.get()) - ->get_data()[0]); + cloned_with_mutable.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(6, assert_cast*>( - cloned_with_mutable.get_by_position(1).column.get()) - ->get_data()[0]); + cloned_with_mutable.get_by_position(1).column.get()) + ->get_data()[0]); } // Test copy_block @@ -1289,8 +1289,8 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ("col1", single_copy.get_by_position(0).name); EXPECT_EQ(type, single_copy.get_by_position(0).type); EXPECT_EQ(3, assert_cast*>( - single_copy.get_by_position(0).column.get()) - ->get_data()[0]); + single_copy.get_by_position(0).column.get()) + ->get_data()[0]); // Test copying multiple columns std::vector multiple_columns = {0, 1}; @@ -1301,11 +1301,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ(type, multi_copy.get_by_position(0).type); EXPECT_EQ(type, multi_copy.get_by_position(1).type); EXPECT_EQ(3, assert_cast*>( - multi_copy.get_by_position(0).column.get()) - ->get_data()[0]); + multi_copy.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(4, assert_cast*>( - multi_copy.get_by_position(1).column.get()) - ->get_data()[0]); + multi_copy.get_by_position(1).column.get()) + ->get_data()[0]); // Test copying columns in different order std::vector reordered_columns = {1, 0}; @@ -1314,11 +1314,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ("col2", reordered_copy.get_by_position(0).name); EXPECT_EQ("col1", reordered_copy.get_by_position(1).name); EXPECT_EQ(4, assert_cast*>( - reordered_copy.get_by_position(0).column.get()) - ->get_data()[0]); + reordered_copy.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(3, assert_cast*>( - reordered_copy.get_by_position(1).column.get()) - ->get_data()[0]); + reordered_copy.get_by_position(1).column.get()) + ->get_data()[0]); // Test copying same column multiple times std::vector duplicate_columns = {0, 0}; @@ -1327,11 +1327,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ("col1", duplicate_copy.get_by_position(0).name); EXPECT_EQ("col1", duplicate_copy.get_by_position(1).name); EXPECT_EQ(3, assert_cast*>( - duplicate_copy.get_by_position(0).column.get()) - ->get_data()[0]); + duplicate_copy.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(3, assert_cast*>( - duplicate_copy.get_by_position(1).column.get()) - ->get_data()[0]); + duplicate_copy.get_by_position(1).column.get()) + ->get_data()[0]); } } @@ -1359,8 +1359,8 @@ TEST(BlockTest, FilterAndSelector) { { auto test_block = create_test_block(10); vectorized::IColumn::Filter filter(10, 1); // Initialize with all 1s (keep all rows) - filter[0] = 0; // Filter out first row - filter[5] = 0; // Filter out sixth row + filter[0] = 0; // Filter out first row + filter[5] = 0; // Filter out sixth row vectorized::Block::filter_block_internal(&test_block, filter); EXPECT_EQ(8, test_block.rows()); @@ -1505,12 +1505,12 @@ TEST(BlockTest, FilterAndSelector) { // Verify filtered data const auto* filtered_col1 = assert_cast*>( - test_block.get_by_position(0).column.get()); + test_block.get_by_position(0).column.get()); const auto* filtered_col2 = assert_cast*>( - test_block.get_by_position(1).column.get()); + test_block.get_by_position(1).column.get()); - std::vector expected_col1 = {1,3,5,7,9}; - std::vector expected_col2 = {2,6,10,14,18}; + std::vector expected_col1 = {1, 3, 5, 7, 9}; + std::vector expected_col2 = {2, 6, 10, 14, 18}; for (size_t i = 0; i < expected_col1.size(); ++i) { EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); @@ -1525,9 +1525,9 @@ TEST(BlockTest, FilterAndSelector) { // Verify filtered data filtered_col1 = assert_cast*>( - test_block2.get_by_position(0).column.get()); + test_block2.get_by_position(0).column.get()); filtered_col2 = assert_cast*>( - test_block2.get_by_position(1).column.get()); + test_block2.get_by_position(1).column.get()); for (size_t i = 0; i < expected_col1.size(); ++i) { EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); @@ -1771,11 +1771,11 @@ TEST(BlockTest, IndexByName) { EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); // Test after modifying block structure - block.erase(2); // Remove last "col1" + block.erase(2); // Remove last "col1" block.initialize_index_by_name(); // Re-initialize index // Now the first "col1" should be found - EXPECT_EQ(0, block.get_position_by_name("col1")); + EXPECT_EQ(0, block.get_position_by_name("col1")); // Test with empty block block.clear(); @@ -1825,21 +1825,21 @@ TEST(BlockTest, ColumnTransformations) { EXPECT_EQ("col2", block.get_by_position(1).name); // Test shuffle_columns - std::vector positions = {1, 0}; // change the order of columns + std::vector positions = {1, 0}; // change the order of columns block.shuffle_columns(positions); // Verify shuffled order - EXPECT_EQ("col2", block.get_by_position(0).name); // col2 is now in the first position - EXPECT_EQ("col1", block.get_by_position(1).name); // col1 is now in the second position + EXPECT_EQ("col2", block.get_by_position(0).name); // col2 is now in the first position + EXPECT_EQ("col1", block.get_by_position(1).name); // col1 is now in the second position // Verify column data is also correctly shuffled const auto* col1 = assert_cast*>( - block.get_by_position(1).column.get()); // col1 is now in position 1 + block.get_by_position(1).column.get()); // col1 is now in position 1 const auto* col2 = assert_cast*>( - block.get_by_position(0).column.get()); // col2 is now in position 0 + block.get_by_position(0).column.get()); // col2 is now in position 0 - EXPECT_EQ(1, col1->get_data()[0]); // the value of col1 should be 1 - EXPECT_EQ(2, col2->get_data()[0]); // the value of col2 should be 2 + EXPECT_EQ(1, col1->get_data()[0]); // the value of col1 should be 1 + EXPECT_EQ(2, col2->get_data()[0]); // the value of col2 should be 2 } TEST(BlockTest, HashUpdate) { @@ -2003,15 +2003,15 @@ TEST(BlockTest, BlockOperations) { { auto new_block = original_block.create_same_struct_block(5, false); EXPECT_EQ(original_block.columns(), new_block->columns()); - EXPECT_EQ(5, new_block->rows()); // Should have 5 default values + EXPECT_EQ(5, new_block->rows()); // Should have 5 default values EXPECT_EQ("col1", new_block->get_by_position(0).name); EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); // Verify default values are inserted const auto* col = assert_cast*>( - new_block->get_by_position(0).column.get()); + new_block->get_by_position(0).column.get()); for (size_t i = 0; i < 5; ++i) { - EXPECT_EQ(0, col->get_data()[i]); // Default value for Int32 is 0 + EXPECT_EQ(0, col->get_data()[i]); // Default value for Int32 is 0 } } @@ -2019,7 +2019,7 @@ TEST(BlockTest, BlockOperations) { { auto new_block = original_block.create_same_struct_block(5, true); EXPECT_EQ(original_block.columns(), new_block->columns()); - EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space + EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space EXPECT_EQ("col1", new_block->get_by_position(0).name); EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); } @@ -2064,7 +2064,7 @@ TEST(BlockTest, BlockOperations) { EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 1)); // Compare only first column // Test compare_at with specific columns - std::vector compare_cols = {1}; // Compare only second column + std::vector compare_cols = {1}; // Compare only second column EXPECT_EQ(0, block1.compare_at(0, 0, &compare_cols, block2, 1)); // Test compare_column_at @@ -2092,11 +2092,11 @@ TEST(BlockTest, BlockOperations) { EXPECT_TRUE(block.get_same_bit(0)); EXPECT_FALSE(block.get_same_bit(1)); EXPECT_TRUE(block.get_same_bit(2)); - EXPECT_FALSE(block.get_same_bit(3)); // Out of range + EXPECT_FALSE(block.get_same_bit(3)); // Out of range // Test clear_same_bit block.clear_same_bit(); - EXPECT_FALSE(block.get_same_bit(0)); // After clear, all bits should be false + EXPECT_FALSE(block.get_same_bit(0)); // After clear, all bits should be false } // Test erase_tmp_columns @@ -2154,9 +2154,9 @@ TEST(BlockTest, BlockOperations) { // Verify columns are kept but data is cleared for non-kept columns EXPECT_EQ(3, block.columns()); - EXPECT_EQ(1, block.get_by_position(0).column->size()); // Kept - EXPECT_EQ(0, block.get_by_position(1).column->size()); // Cleared - EXPECT_EQ(1, block.get_by_position(2).column->size()); // Kept + EXPECT_EQ(1, block.get_by_position(0).column->size()); // Kept + EXPECT_EQ(0, block.get_by_position(1).column->size()); // Cleared + EXPECT_EQ(1, block.get_by_position(2).column->size()); // Kept } } @@ -2170,8 +2170,8 @@ TEST(BlockTest, StringOperations) { { auto col = vectorized::ColumnString::create(); // Add string with trailing zeros - std::string str1 = "hello\0\0\0"s; // 8bytes, contains 3 trailing zeros - std::string str2 = "world\0\0"s; // 7bytes, contains 2 trailing zeros + std::string str1 = "hello\0\0\0"s; // 8bytes, contains 3 trailing zeros + std::string str2 = "world\0\0"s; // 7bytes, contains 2 trailing zeros col->insert_data(str1.c_str(), str1.size()); col->insert_data(str2.c_str(), str2.size()); @@ -2189,7 +2189,7 @@ TEST(BlockTest, StringOperations) { } // Test shrinking string column - std::vector char_type_idx = {0}; // Index of string column + std::vector char_type_idx = {0}; // Index of string column block.shrink_char_type_column_suffix_zero(char_type_idx); // Verify string column is shrunk @@ -2198,12 +2198,12 @@ TEST(BlockTest, StringOperations) { // Verify first string StringRef ref1 = str_col->get_data_at(0); - EXPECT_EQ(5, ref1.size); // "hello" without zeros + EXPECT_EQ(5, ref1.size); // "hello" without zeros EXPECT_EQ(0, memcmp(ref1.data, "hello", 5)); // Verify second string StringRef ref2 = str_col->get_data_at(1); - EXPECT_EQ(5, ref2.size); // "world" without zeros + EXPECT_EQ(5, ref2.size); // "world" without zeros EXPECT_EQ(0, memcmp(ref2.data, "world", 5)); // Verify non-string column remains unchanged @@ -2230,7 +2230,7 @@ TEST(BlockTest, StringOperations) { // Create array offsets column auto array_offsets = vectorized::ColumnArray::ColumnOffsets::create(); - array_offsets->get_data().push_back(2); // First array has 2 elements + array_offsets->get_data().push_back(2); // First array has 2 elements // Create array column auto array_col = vectorized::ColumnArray::create( @@ -2251,12 +2251,12 @@ TEST(BlockTest, StringOperations) { // Verify first string in array StringRef ref1 = string_col_result->get_data_at(0); - EXPECT_EQ(5, ref1.size); // "hello" without zeros + EXPECT_EQ(5, ref1.size); // "hello" without zeros EXPECT_EQ(0, memcmp(ref1.data, "hello", 5)); // Verify second string in array StringRef ref2 = string_col_result->get_data_at(1); - EXPECT_EQ(5, ref2.size); // "world" without zeros + EXPECT_EQ(5, ref2.size); // "world" without zeros EXPECT_EQ(0, memcmp(ref2.data, "world", 5)); } } From 658cf55defad9b5ab628ddec971da1ec3d07b179 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Sun, 17 Nov 2024 17:35:44 +0800 Subject: [PATCH 06/41] code format --- be/test/vec/core/block_test.cpp | 59 ++++++++++++++++----------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index ec8565ec213da2..76b28f1292aa3f 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1217,11 +1217,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ(type, new_block.get_by_position(0).type); EXPECT_EQ(type, new_block.get_by_position(1).type); EXPECT_EQ(1, assert_cast*>( - new_block.get_by_position(0).column.get()) - ->get_data()[0]); + new_block.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(2, assert_cast*>( - new_block.get_by_position(1).column.get()) - ->get_data()[0]); + new_block.get_by_position(1).column.get()) + ->get_data()[0]); // Test clone_with_columns auto cloned_with_cols = block.clone_with_columns(columns); @@ -1232,11 +1232,11 @@ TEST(BlockTest, CloneOperations) { EXPECT_EQ(type, cloned_with_cols.get_by_position(0).type); EXPECT_EQ(type, cloned_with_cols.get_by_position(1).type); EXPECT_EQ(1, assert_cast*>( - cloned_with_cols.get_by_position(0).column.get()) - ->get_data()[0]); + cloned_with_cols.get_by_position(0).column.get()) + ->get_data()[0]); EXPECT_EQ(2, assert_cast*>( - cloned_with_cols.get_by_position(1).column.get()) - ->get_data()[0]); + cloned_with_cols.get_by_position(1).column.get()) + ->get_data()[0]); // Test clone_without_columns std::vector column_offset = {0}; @@ -1359,8 +1359,8 @@ TEST(BlockTest, FilterAndSelector) { { auto test_block = create_test_block(10); vectorized::IColumn::Filter filter(10, 1); // Initialize with all 1s (keep all rows) - filter[0] = 0; // Filter out first row - filter[5] = 0; // Filter out sixth row + filter[0] = 0; // Filter out first row + filter[5] = 0; // Filter out sixth row vectorized::Block::filter_block_internal(&test_block, filter); EXPECT_EQ(8, test_block.rows()); @@ -1528,7 +1528,7 @@ TEST(BlockTest, FilterAndSelector) { test_block2.get_by_position(0).column.get()); filtered_col2 = assert_cast*>( test_block2.get_by_position(1).column.get()); - + for (size_t i = 0; i < expected_col1.size(); ++i) { EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); @@ -1595,7 +1595,6 @@ TEST(BlockTest, RowCheck) { block.clear_column_data(1); // Clear first column and delete the rest columns EXPECT_EQ(1, block.columns()); - block.clear(); EXPECT_EQ(0, block.columns()); @@ -1771,7 +1770,7 @@ TEST(BlockTest, IndexByName) { EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); // Test after modifying block structure - block.erase(2); // Remove last "col1" + block.erase(2); // Remove last "col1" block.initialize_index_by_name(); // Re-initialize index // Now the first "col1" should be found @@ -2057,19 +2056,19 @@ TEST(BlockTest, BlockOperations) { } // Test basic compare_at - EXPECT_EQ(0, block1.compare_at(0, 0, block2, 1)); // First rows are equal - EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // 1 < 3 + EXPECT_EQ(0, block1.compare_at(0, 0, block2, 1)); // First rows are equal + EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // 1 < 3 // Test compare_at with num_columns - EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 1)); // Compare only first column + EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 1)); // Compare only first column // Test compare_at with specific columns std::vector compare_cols = {1}; // Compare only second column EXPECT_EQ(0, block1.compare_at(0, 0, &compare_cols, block2, 1)); // Test compare_column_at - EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 1)); // Compare first column - EXPECT_LT(block1.compare_column_at(0, 1, 0, block2, 1), 0); // 1 < 3 + EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 1)); // Compare first column + EXPECT_LT(block1.compare_column_at(0, 1, 0, block2, 1), 0); // 1 < 3 } // Test same_bit operations @@ -2115,16 +2114,16 @@ TEST(BlockTest, BlockOperations) { { auto col2 = vectorized::ColumnVector::create(); col2->insert_value(2); - block.insert({std::move(col2), type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col"}); + block.insert({std::move(col2), type, + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col"}); } // Add another temporary column { auto col3 = vectorized::ColumnVector::create(); col3->insert_value(3); - block.insert({std::move(col3), type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col"}); + block.insert({std::move(col3), type, + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col"}); } EXPECT_EQ(3, block.columns()); @@ -2193,8 +2192,8 @@ TEST(BlockTest, StringOperations) { block.shrink_char_type_column_suffix_zero(char_type_idx); // Verify string column is shrunk - const auto* str_col = assert_cast( - block.get_by_position(0).column.get()); + const auto* str_col = + assert_cast(block.get_by_position(0).column.get()); // Verify first string StringRef ref1 = str_col->get_data_at(0); @@ -2208,7 +2207,7 @@ TEST(BlockTest, StringOperations) { // Verify non-string column remains unchanged const auto* int_col = assert_cast*>( - block.get_by_position(1).column.get()); + block.get_by_position(1).column.get()); EXPECT_EQ(1, int_col->get_data()[0]); EXPECT_EQ(2, int_col->get_data()[1]); } @@ -2233,8 +2232,8 @@ TEST(BlockTest, StringOperations) { array_offsets->get_data().push_back(2); // First array has 2 elements // Create array column - auto array_col = vectorized::ColumnArray::create( - std::move(string_col), std::move(array_offsets)); + auto array_col = + vectorized::ColumnArray::create(std::move(string_col), std::move(array_offsets)); // Insert array column into block block.insert({std::move(array_col), array_type, "array_str_col"}); @@ -2244,10 +2243,10 @@ TEST(BlockTest, StringOperations) { block.shrink_char_type_column_suffix_zero(char_type_idx); // Verify strings in array are shrunk - const auto* array_col_result = assert_cast( - block.get_by_position(0).column.get()); + const auto* array_col_result = + assert_cast(block.get_by_position(0).column.get()); const auto* string_col_result = assert_cast( - array_col_result->get_data_ptr().get()); + array_col_result->get_data_ptr().get()); // Verify first string in array StringRef ref1 = string_col_result->get_data_at(0); From 121304d0b11793a5fe1ddd3dc6a51fca2e077888 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Sun, 17 Nov 2024 17:45:40 +0800 Subject: [PATCH 07/41] code format --- be/test/vec/core/block_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 76b28f1292aa3f..a643a17f1ffde2 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -2115,7 +2115,7 @@ TEST(BlockTest, BlockOperations) { auto col2 = vectorized::ColumnVector::create(); col2->insert_value(2); block.insert({std::move(col2), type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col"}); + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col"}); } // Add another temporary column @@ -2123,7 +2123,7 @@ TEST(BlockTest, BlockOperations) { auto col3 = vectorized::ColumnVector::create(); col3->insert_value(3); block.insert({std::move(col3), type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col"}); + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col"}); } EXPECT_EQ(3, block.columns()); @@ -2232,7 +2232,7 @@ TEST(BlockTest, StringOperations) { array_offsets->get_data().push_back(2); // First array has 2 elements // Create array column - auto array_col = + auto array_col = vectorized::ColumnArray::create(std::move(string_col), std::move(array_offsets)); // Insert array column into block From 99c1eae5d432c633aca91d73125aeb2777c00f4a Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Sun, 17 Nov 2024 20:17:48 +0800 Subject: [PATCH 08/41] fix bug in filterandselector --- be/test/vec/core/block_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index a643a17f1ffde2..4efbb00f5a85bc 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1421,7 +1421,7 @@ TEST(BlockTest, FilterAndSelector) { // Expected values after filtering std::vector expected_col1 = {1, 2, 3, 4, 6, 7, 8, 9}; - std::vector expected_col2 = {2, 4, 6, 8, 12, 14, 16, 18}; + std::vector expected_col2 = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18}; // Verify each value in filtered columns for (size_t i = 0; i < expected_col1.size(); ++i) { @@ -1564,7 +1564,7 @@ TEST(BlockTest, FilterAndSelector) { // Expected values after selection std::vector expected_col1 = {0, 2, 4, 6, 8}; - std::vector expected_col2 = {2, 4, 6, 8, 12}; + std::vector expected_col2 = {0, 4, 8, 12, 16}; for (size_t i = 0; i < expected_col1.size(); ++i) { EXPECT_EQ(expected_col1[i], selected_col1->get_data()[i]); From 173aeb9b9a031defe0d504f3fca19cf20ebf018d Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Sun, 17 Nov 2024 20:37:42 +0800 Subject: [PATCH 09/41] code format --- be/test/vec/core/block_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 4efbb00f5a85bc..d4e303037673c6 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -2115,7 +2115,7 @@ TEST(BlockTest, BlockOperations) { auto col2 = vectorized::ColumnVector::create(); col2->insert_value(2); block.insert({std::move(col2), type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col"}); + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col"}); } // Add another temporary column @@ -2123,7 +2123,7 @@ TEST(BlockTest, BlockOperations) { auto col3 = vectorized::ColumnVector::create(); col3->insert_value(3); block.insert({std::move(col3), type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col"}); + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col"}); } EXPECT_EQ(3, block.columns()); From 8b8728d8a153471afa4323f5fddf8f603f627263 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Tue, 19 Nov 2024 21:40:37 +0800 Subject: [PATCH 10/41] update the comments of the block.h --- be/src/vec/core/block.h | 841 +++++++++++++++++++++++++++++++++++----- 1 file changed, 740 insertions(+), 101 deletions(-) diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 703fa55f3c49be..53d670b812c4d4 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -95,9 +95,19 @@ class Block { Block(Block&& block) = default; Block& operator=(Block&& other) = default; - /// Reserve memory for internal containers + /** + * Reserves memory for the Block's internal data structures. + * This method pre-allocates memory for both the index_by_name map and data vector + * to avoid reallocations when inserting elements up to the specified count. + * + * @param count The number of elements to reserve space for + */ void reserve(size_t count); - /// Clear all column names and name index mappings in the block + /** + * Clears all column names from the Block. + * This method removes all name mappings from the index and clears the name field + * of each column entry, while preserving the actual column data. + */ void clear_names(); /// insert the column at the specified position @@ -124,7 +134,11 @@ class Block { std::swap(data, new_data); } - /// Initialize the index by name map + /** + * Initializes or rebuilds the name-to-position index mapping for all columns in the Block. + * Creates a mapping from column names to their positions in the data array. + * This method is typically called after bulk modifications to the Block's structure. + */ void initialize_index_by_name(); /// References are invalidated after calling functions above. @@ -135,23 +149,39 @@ class Block { } const ColumnWithTypeAndName& get_by_position(size_t position) const { return data[position]; } - /// Replace column at position with rvalue column pointer + /** + * Replace column at specified position with a new column. + * These are overloaded methods that handle both move and copy semantics. + * The column type and name remain unchanged, only the column data is replaced. + * + * @param position The position of the column to replace + * @param res The new column to replace with (either by rvalue reference or const reference) + */ void replace_by_position(size_t position, ColumnPtr&& res) { this->get_by_position(position).column = std::move(res); } - - /// Replace column at position with lvalue column pointer void replace_by_position(size_t position, const ColumnPtr& res) { this->get_by_position(position).column = res; } - /// Convert const column at position to full column if it is const + /** + * Convert const column at position to full column if it is const. + * This method checks if the column at the specified position is a const column, + * and if so, converts it to a full column with actual data. + * + * @param position The position of the column to check and potentially convert + */ void replace_by_position_if_const(size_t position) { auto& element = this->get_by_position(position); element.column = element.column->convert_to_full_column_if_const(); } - /// Convert all columns to new columns if they overflow + /** + * Convert all columns to new columns if they overflow. + * This method checks each column in the block for potential overflow conditions + * and converts them to appropriate new columns if necessary. + * The conversion is done in-place using move semantics for efficiency. + */ void replace_if_overflow() { for (auto& ele : data) { ele.column = std::move(*ele.column).mutate()->convert_column_if_overflow(); @@ -166,142 +196,353 @@ class Block { ColumnWithTypeAndName& get_by_name(const std::string& name); const ColumnWithTypeAndName& get_by_name(const std::string& name) const; - // return nullptr when no such column name + // get column by name, return nullptr when no such column name ColumnWithTypeAndName* try_get_by_name(const std::string& name); const ColumnWithTypeAndName* try_get_by_name(const std::string& name) const; - /// Get an iterator to the beginning of the data container Container::iterator begin() { return data.begin(); } - /// Get an iterator to the end of the data container Container::iterator end() { return data.end(); } - /// Get a constant iterator to the beginning of the data container Container::const_iterator begin() const { return data.begin(); } - /// Get a constant iterator to the end of the data container Container::const_iterator end() const { return data.end(); } - /// Get a constant iterator to the beginning of the data container Container::const_iterator cbegin() const { return data.cbegin(); } - /// Get a constant iterator to the end of the data container Container::const_iterator cend() const { return data.cend(); } - // check if the column name exists + /** + * Checks if a column with the specified name exists in the Block. + * Uses the index_by_name map for efficient lookup. + * + * @param name The name of the column to check for + * @return true if the column exists, false otherwise + */ bool has(const std::string& name) const; - // get the position of the column by name + /** + * Gets the position of a column by its name. + * Performs a lookup in the index_by_name map and throws an exception if the column is not found. + * + * @param name The name of the column to look up + * @return The position of the column in the Block + * @throws Exception if the column name is not found + */ size_t get_position_by_name(const std::string& name) const; - // get the columns with type and name + /** + * Returns a const reference to the internal data structure containing all columns with their types and names. + * Provides direct access to the Block's underlying column data structure. + * + * @return Const reference to ColumnsWithTypeAndName container holding all column information + */ const ColumnsWithTypeAndName& get_columns_with_type_and_name() const; - // Returns a vector containing all column names in the block + /** + * Returns a vector containing all column names in the Block. + * Creates a new vector with the names of all columns in their current order. + * Pre-reserves space for efficiency. + * + * @return Vector of strings containing all column names + */ std::vector get_names() const; - // Returns a vector containing all column data types in the block + /** + * Returns a vector containing all column data types in the Block. + * Creates a new vector with the data types of all columns in their current order. + * Pre-reserves space for efficiency. + * + * @return Vector of DataTypePtr containing all column data types + */ DataTypes get_data_types() const; - // Returns the data type of the column at the specified index + /** + * Returns the data type of the column at the specified index. + * Performs bounds checking using CHECK macro. + * + * @param index The index of the column whose data type is requested + * @return DataTypePtr pointing to the column's data type + * @throws CHECK failure if index is out of bounds + */ DataTypePtr get_data_type(size_t index) const { CHECK(index < data.size()); return data[index].type; } - /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0. + /** + * Returns the number of rows in the Block. + * Finds the first non-null column and returns its size. + * All columns in a valid Block should have the same number of rows. + * + * @return The number of rows in the Block, or 0 if no valid columns exist + */ size_t rows() const; - // Returns a string showing the size of each column, separated by ' | ' - // Returns -1 for null columns + /** + * Returns a string representation of each column's size in the Block. + * Format: "size1 | size2 | size3 | ..." + * Uses -1 to indicate null columns. + * Useful for debugging and logging purposes. + * + * @return A string showing the size of each column separated by " | " + */ std::string each_col_size() const; - // Cut the rows in block, use in LIMIT operation + /** + * Reduces the number of rows in the Block to the specified length. + * Only performs the operation if the current number of rows is greater than the target length. + * Shrinks all columns and adjusts row_same_bit vector accordingly. + * + * @param length The target number of rows + * Note: Does nothing if current rows <= length + */ void set_num_rows(size_t length); - // Skip the rows in block, use in OFFSET, LIMIT operation - void skip_num_rows(int64_t& offset); - - /// As the assumption we used around, the number of columns won't exceed int16 range. so no need to worry when we - /// assign it to int32. + /** + * Skips a specified number of rows from the beginning of the Block. + * If length >= current rows, clears the entire Block and updates remaining length. + * Otherwise, removes first 'length' rows and keeps the rest. + * + * @param length Input/Output parameter - number of rows to skip, updated with remaining rows to skip + */ + void skip_num_rows(int64_t& length); + + /** + * Returns the number of columns in the Block. + * As the assumption we used around, the number of columns won't exceed int16 range. + * So no need to worry when we assign it to int32. + * + * @return The number of columns in the Block as a uint32_t + */ uint32_t columns() const { return static_cast(data.size()); } - /// Checks that every column in block is not nullptr and has same number of elements. + /** + * Validates that all non-null columns in the Block have the same number of rows. + * Checks for consistency in the Block's structure. + * + * @param allow_null_columns If true, skips null columns during validation + * @throws Exception if any column is null (when not allowed) or if column sizes don't match + */ void check_number_of_rows(bool allow_null_columns = false) const; - /// Approximate number of bytes in memory - for profiling and limits. + /** + * Calculates the total memory size in bytes used by all columns in the Block. + * Throws an exception if any column is null. + * + * @return Total size in bytes of all columns + * @throws Exception if any column is null, with detailed error message listing all column names + */ size_t bytes() const; - /// Get a string with the size of each column in bytes. + /** + * Returns a string representation of the memory size of each column in the Block. + * Format: "column bytes: [,size1, size2, size3, ...]" + * Throws an exception if any column is null. + * + * @return Formatted string showing byte size of each column + * @throws Exception if any column is null, with detailed error message listing all column names + */ std::string columns_bytes() const; - /// Approximate number of allocated bytes in memory - for profiling and limits. + /** + * Calculates the total allocated memory in bytes for all non-null columns in the Block. + * Unlike bytes(), this method: + * 1. Skips null columns instead of throwing exception + * 2. Uses allocated_bytes() which may include memory reserved but not used + * + * @return Total allocated memory size in bytes for all non-null columns + */ size_t allocated_bytes() const; - /** Get a list of column names separated by commas. */ + /** + * Returns a comma-separated string of all column names in the Block. + * Format: "name1, name2, name3, ..." + * Used for debugging and error reporting. + * + * @return String containing all column names separated by commas + */ std::string dump_names() const; - /** Get a list of column types separated by commas. */ + /** + * Returns a comma-separated string of all column types in the Block. + * Format: "type1, type2, type3, ..." + * Used for debugging and error reporting. + * + * @return String containing all column data types separated by commas + */ std::string dump_types() const; - /** List of names, types and lengths of columns. Designed for debugging. */ + /** + * Returns a detailed string representation of the Block's structure. + * Each column's structure is dumped on a new line. + * Format: + * col1_structure, + * col2_structure, + * col3_structure, ... + * + * Uses each column's dump_structure() method for detailed information. + * + * @return Multi-line string containing detailed structure of all columns + */ std::string dump_structure() const; - /** Get the same block, but empty. */ + /** + * Creates a new Block with the same structure but no data. + * Clones all columns as empty columns while preserving types and names. + * Useful for creating a template Block with the same schema. + * + * @return A new Block with same structure but zero rows + */ Block clone_empty() const; - /// Returns a copy of all columns, converting const columns to full columns + /** + * Returns all columns in the Block as a vector of Column objects. + * Converts any const columns to their full (non-const) representation. + * + * @return Vector of Column objects, where const columns are converted to full columns + * Note: The returned columns are shared pointers to the actual column data + */ Columns get_columns() const; - /// Returns all columns and converts const columns to full columns in place + /** + * Returns all columns and converts const columns to full columns in-place. + * Unlike get_columns(), this method modifies the original Block's columns. + * + * @return Vector of Column objects after converting const columns + * Note: This method mutates the Block by converting const columns to full columns + */ Columns get_columns_and_convert(); - /// Set the columns of the block. + /** + * Sets the Block's columns to the provided columns. + * Replaces existing column data while maintaining types and names. + * + * @param columns Vector of Column objects to set + * @throws DCHECK failure if columns.size() < data.size() + * Note: Only updates column data, keeps original column types and names + */ void set_columns(const Columns& columns); - /// Clone the block with the specified columns. + /** + * Creates a new Block with given columns while keeping original types and names. + * These are overloaded methods that handle both move and copy semantics. + * + * @param columns Columns to use in new Block + * @return New Block with provided columns and original metadata + * @throws FATAL error if column count mismatch (only in const version) + */ Block clone_with_columns(const Columns& columns) const; - /// Clone the block with the specified column offset but without data. + Block clone_with_columns(MutableColumns&& columns) const; + + /** + * Creates a new Block with null columns but preserving types and names. + * Optionally allows selecting specific columns using column_offset. + * + * @param column_offset Optional vector of column indices to include + * If null, all columns are included + * @return New Block with null columns but original metadata + * Note: Resulting Block has same structure but no actual column data + */ Block clone_without_columns(const std::vector* column_offset = nullptr) const; - /** Get empty columns with the same types as in block. */ + /** + * Creates empty mutable columns with same structure as current Block. + * For each column: + * - If source column exists: creates empty clone + * - If source column is null: creates new empty column of same type + * + * @return Vector of empty MutableColumns matching Block's structure + * Note: Returns independent columns that can be modified without affecting original Block + */ MutableColumns clone_empty_columns() const; - /** Get columns from block for mutation. Columns in block will be nullptr. */ + /** + * Creates mutable columns from current Block's columns. + * For each column: + * - If source column exists: moves and mutates it + * - If source column is null: creates new empty column + * + * @return Vector of MutableColumns + * Note: This method modifies the original Block by moving out its columns + * Note: Returns columns that can be modified independently + */ MutableColumns mutate_columns(); - /** Replace columns in a block */ + /** + * Sets Block's columns using provided MutableColumns. + * Moves ownership of columns into Block. + * + * @param columns MutableColumns to move into Block + * @throws DCHECK failure if columns.size() < data.size() + * Note: Uses move semantics to avoid copying + * Note: Original columns vector will be modified (moved from) + */ void set_columns(MutableColumns&& columns); - Block clone_with_columns(MutableColumns&& columns) const; - /** Get a block with columns that have been rearranged in the order of their names. */ + /** + * Creates a new Block with columns sorted by their names. + * Uses index_by_name map to determine column order. + * + * @return New Block with same columns but sorted by column names + * Note: Original Block remains unchanged + * Note: Column data is shared with original Block + */ Block sort_columns() const; void clear(); void swap(Block& other) noexcept; void swap(Block&& other) noexcept; - // Shuffle columns in place based on the result_column_ids + /** + * Shuffle columns in place based on the result_column_ids. + * Creates a new temporary Block with reordered columns and swaps with current Block. + * + * @param result_column_ids Vector of column indices specifying new order + * Note: Modifies current Block's column order + * Note: Only keeps columns specified in result_column_ids + */ void shuffle_columns(const std::vector& result_column_ids); - // Default column size = -1 means clear all column in block - // Else clear column [0, column_size) delete column [column_size, data.size) + /** + * Clears all column data in the Block and optionally removes excess columns. + * + * @param column_size If >= 0, removes all columns beyond this index + * If -1, keeps all columns but clears their data + * + * Note: Skips memory checking during operation + * Note: Verifies each column has single reference before clearing + * Note: Clears row_same_bit vector + * Note: Uses move semantics for efficiency + */ void clear_column_data(int64_t column_size = -1) noexcept; - // Check if the block is not empty. + /** + * Checks if the block is not empty. + * + * @return True if the block is not empty, false otherwise + */ bool mem_reuse() { return !data.empty(); } - // Check if the block has no columns + /** + * Checks if the block has no columns. + * + * @return True if the block has no columns, false otherwise + */ bool is_empty_column() { return data.empty(); } - // Check if the block has no rows (i.e. all columns have 0 rows) - // This is different from is_empty_column() which checks for absence of columns + /** + * Checks if the block has no rows (i.e. all columns have 0 rows). + * This is different from is_empty_column() which checks for absence of columns. + * + * @return True if the block has no rows, false otherwise + */ bool empty() const { return rows() == 0; } - /** - * Updates SipHash of the Block, using update method of columns. - * Returns hash for block, that could be used to differentiate blocks - * with same structure, but different data. - */ + /** + * Updates SipHash of the Block, using update method of columns. + * Returns hash for block, that could be used to differentiate blocks + * with same structure, but different data. + */ void update_hash(SipHash& hash) const; - /** - * Get block data in string. - * If code is in default_implementation_for_nulls or something likely, type and column's nullity could - * temporarily be not same. set allow_null_mismatch to true to dump it correctly. - */ + /** + * Get block data in string. + * If code is in default_implementation_for_nulls or something likely, type and column's nullity could + * temporarily be not same. set allow_null_mismatch to true to dump it correctly. + */ std::string dump_data(size_t begin = 0, size_t row_limit = 100, bool allow_null_mismatch = false) const; @@ -311,14 +552,23 @@ class Block { return b.dump_data(0, b.rows()); } - /** Get one line data from block, only use in load data */ + // get one line data from block, only use in load data std::string dump_one_line(size_t row, int column_end) const; // copy a new block by the offset column Block copy_block(const std::vector& column_offset) const; - // appends selected rows from this block to destination block based on selector - // skips const columns during append operation + /** + * Appends selected rows from this Block to destination MutableBlock based on selector. + * + * @param dst Destination MutableBlock to append data to + * @param selector Vector specifying which rows to append + * @return Status::OK() if successful, error status otherwise + * + * Note: Skips const columns during append + * Note: Requires dst to have same number of columns as source + * Note: Uses RETURN_IF_CATCH_EXCEPTION for error handling + */ Status append_to_block_by_selector(MutableBlock* dst, const IColumn::Selector& selector) const; // need exception safety @@ -330,27 +580,116 @@ class Block { // need exception safety static void filter_block_internal(Block* block, const IColumn::Filter& filter); - // Filter block by specified columns using filter column + /** + * Filters block columns based on a filter column and specified columns to filter. + * Handles three types of filter columns: + * 1. Nullable columns - combines null map with filter + * 2. Const columns - clears all data if false + * 3. Regular UInt8 columns - uses directly as filter + * + * @param block Block to filter + * @param columns_to_filter Vector of column indices to apply filter to + * @param filter_column_id Index of column containing filter + * @param column_to_keep Number of columns to keep + * @return Status::OK() if successful, error status otherwise + * + * Note: Modifies block in place + * Note: Removes unnecessary columns after filtering + */ static Status filter_block(Block* block, const std::vector& columns_to_filter, size_t filter_column_id, size_t column_to_keep); - // Filter block using filter column + /** + * Simplified version of filter_block that filters first N columns. + * Creates a vector of column indices [0, column_to_keep) and calls main filter_block. + * + * @param block Block to filter + * @param filter_column_id Index of column containing filter + * @param column_to_keep Number of columns to keep + * @return Status::OK() if successful, error status otherwise + * + * Note: Convenience wrapper around main filter_block method + * Note: Filters columns in order from 0 to column_to_keep-1 + */ static Status filter_block(Block* block, size_t filter_column_id, size_t column_to_keep); - // Remove columns after column_to_keep + /** + * Remove columns after column_to_keep from the Block. + * Static helper method that wraps Block::erase_tail. + * + * @param block Pointer to Block to modify + * @param column_to_keep Number of columns to keep (removes all columns after this index) + * + * Note: Modifies block in place + * Note: Removes all columns with index >= column_to_keep + */ static void erase_useless_column(Block* block, size_t column_to_keep) { block->erase_tail(column_to_keep); } - // serialize block to PBlock + /** + * Serializes Block data into a PBlock (Protocol Buffer) format with optional compression. + * + * @param be_exec_version Backend execution version + * @param pblock Output Protocol Buffer block + * @param uncompressed_bytes Output parameter for uncompressed size + * @param compressed_bytes Output parameter for compressed size + * @param compression_type Type of compression to use + * @param allow_transfer_large_data Whether to allow blocks larger than 2GB + * + * @return Status::OK() if successful, error status otherwise + * + * Process: + * 1. Validates version and calculates uncompressed size + * 2. Serializes column metadata and data + * 3. Optionally compresses the data + * 4. Sets appropriate PBlock fields + * + * Note: Handles memory allocation failures + * Note: Supports various compression types + * Note: Has 2GB size limit by default + */ Status serialize(int be_exec_version, PBlock* pblock, size_t* uncompressed_bytes, size_t* compressed_bytes, segment_v2::CompressionTypePB compression_type, bool allow_transfer_large_data = false) const; - // deserialize block from PBlock + /** + * Deserializes a PBlock (Protocol Buffer) into this Block. + * Handles both compressed and uncompressed data. + * + * @param pblock Protocol Buffer block to deserialize + * @return Status::OK() if successful, error status otherwise + * + * Process: + * 1. Clears current Block data + * 2. Handles version compatibility + * 3. Decompresses data if needed + * 4. Deserializes column metadata and data + * 5. Rebuilds column index + * + * Supports: + * - Multiple compression types + * - Legacy snappy compression + * - Version compatibility + * - Memory allocation failure handling + */ Status deserialize(const PBlock& pblock); - // Create empty block with same schema + /** + * Creates a new Block with same structure but different size. + * + * @param size Number of rows for the new Block + * @param is_reserve If true, reserves space; if false, fills with default values + * @return Unique pointer to new Block with same structure + * + * Creates a Block that: + * - Has same column types and names + * - Either reserves space or contains default values + * - Is independent from original Block + * + * Note: Returns unique_ptr for automatic memory management + * Note: Column data is not copied, only structure is preserved + */ std::unique_ptr create_same_struct_block(size_t size, bool is_reserve = false) const; /** Compares (*this) n-th row and rhs m-th row. @@ -369,7 +708,24 @@ class Block { return compare_at(n, m, columns(), rhs, nan_direction_hint); } - // Compare rows by first num_columns columns in sequential order (from index 0 to num_columns - 1) + /** + * Compares rows by first num_columns columns in sequential order. + * + * @param n Row index in this Block + * @param m Row index in rhs Block + * @param num_columns Number of columns to compare + * @param rhs Block to compare against + * @param nan_direction_hint Direction for NaN comparison (-1: NaN is smallest, 1: NaN is largest) + * @return -1 if this < rhs, 0 if equal, 1 if this > rhs + * + * Checks: + * - Both blocks have enough columns + * - Row indices are valid + * - Column types match + * + * Note: Compares columns sequentially until difference found + * Note: Returns 0 if all specified columns are equal + */ int compare_at(size_t n, size_t m, size_t num_columns, const Block& rhs, int nan_direction_hint) const { DCHECK_GE(columns(), num_columns); @@ -388,7 +744,24 @@ class Block { return 0; } - // Compare rows by specified columns in compare_columns + /** + * Compare rows by specified columns in compare_columns vector. + * + * @param n Row index in this Block + * @param m Row index in rhs Block + * @param compare_columns Vector of column indices to compare + * @param rhs Block to compare against + * @param nan_direction_hint Direction for NaN comparison (-1: NaN is smallest, 1: NaN is largest) + * @return -1 if this < rhs, 0 if equal, 1 if this > rhs + * + * Checks: + * - Both blocks have enough columns + * - Row indices are valid + * - Column types match for specified columns + * + * Note: Compares only columns specified in compare_columns + * Note: Returns 0 if all specified columns are equal + */ int compare_at(size_t n, size_t m, const std::vector* compare_columns, const Block& rhs, int nan_direction_hint) const { DCHECK_GE(columns(), compare_columns->size()); @@ -407,8 +780,20 @@ class Block { return 0; } - //note(wb) no DCHECK here, because this method is only used after compare_at now, so no need to repeat check here. - // If this method is used in more places, you can add DCHECK case by case. + /** + * Compare single column values between two blocks at specified row positions. + * + * @param n Row index in this Block + * @param m Row index in rhs Block + * @param col_idx Index of column to compare + * @param rhs Block to compare against + * @param nan_direction_hint Direction for NaN comparison (-1: NaN is smallest, 1: NaN is largest) + * @return -1 if this < rhs, 0 if equal, 1 if this > rhs + * + * Note: No DCHECK here as this method is typically called after compare_at + * Note: Assumes column types match and indices are valid + * Note: Direct wrapper around column's compare_at method + */ int compare_column_at(size_t n, size_t m, size_t col_idx, const Block& rhs, int nan_direction_hint) const { auto res = get_by_position(col_idx).column->compare_at( @@ -419,14 +804,21 @@ class Block { // for String type or Array type void shrink_char_type_column_suffix_zero(const std::vector& char_type_idx); - // Get time spent on decompression in nanoseconds int64_t get_decompress_time() const { return _decompress_time_ns; } - // Get total bytes after decompression int64_t get_decompressed_bytes() const { return _decompressed_bytes; } - // Get time spent on compression in nanoseconds int64_t get_compress_time() const { return _compress_time_ns; } - // Set same bit flags for rows in block + /** + * Set same bit flags for rows in block. + * + * @param begin Iterator to start of source same bits + * @param end Iterator to end of source same bits + * + * Note: Appends same bits to row_same_bit vector + * Note: Verifies final size matches number of rows + * Note: Used to track which rows are identical + * Note: Important for optimization in data processing + */ void set_same_bit(std::vector::const_iterator begin, std::vector::const_iterator end) { row_same_bit.insert(row_same_bit.end(), begin, end); @@ -434,7 +826,17 @@ class Block { DCHECK_EQ(row_same_bit.size(), rows()); } - // Get same bit flag for specified row position + /** + * Get same bit flag for specified row position. + * + * @param position Row index to check + * @return true if row is marked as same, false if different or position invalid + * + * Note: Returns false for out of range positions + * Note: Used to check if row is identical to previous + * Note: Part of row deduplication optimization + * Note: Safe access with bounds checking + */ bool get_same_bit(size_t position) { if (position >= row_same_bit.size()) { return false; @@ -442,7 +844,12 @@ class Block { return row_same_bit[position]; } - // Clear all same bit flags + /** + * Clear all same bit flags. + * + * Note: Resets all flags to false + * Note: Used to reset row deduplication tracking + */ void clear_same_bit() { row_same_bit.clear(); } // return string contains use_count() of each columns @@ -479,6 +886,19 @@ class MutableBlock { IndexByName index_by_name; public: + /** + * Static factory method to create a MutableBlock from a Block pointer. + * + * @param block Pointer to source Block, can be nullptr + * @return MutableBlock instance + * - Empty MutableBlock if input is nullptr + * - MutableBlock containing source Block's data if input is valid + * + * Note: Handles nullptr gracefully + * Note: Uses MutableBlock constructor for valid input + * Note: Convenient way to create MutableBlock + * Note: Safe conversion from Block to MutableBlock + */ static MutableBlock build_mutable_block(Block* block) { return block == nullptr ? MutableBlock() : MutableBlock(block); } @@ -507,29 +927,89 @@ class MutableBlock { _names = std::move(m_block._names); initialize_index_by_name(); } - + /** + * Get number of rows in MutableBlock. + * Returns size of first non-null column, or 0 if all columns are null. + * + * @return Number of rows in block + * + * Note: All non-null columns should have same number of rows + * Note: Returns 0 for empty block or all null columns + */ size_t rows() const; + /** + * Get number of columns in MutableBlock. + * + * @return Number of columns in block + */ size_t columns() const { return _columns.size(); } + /** + * Check if MutableBlock is empty. + * + * @return true if no rows, false otherwise + * + * Note: Simple check for zero rows + */ bool empty() const { return rows() == 0; } + /** + * Get mutable columns of MutableBlock. + * + * @return Reference to mutable columns vector + */ MutableColumns& mutable_columns() { return _columns; } + /** + * Set mutable columns of MutableBlock. + * + * @param columns MutableColumns to set + */ void set_mutable_columns(MutableColumns&& columns) { _columns = std::move(columns); } + /** + * Get data types of MutableBlock. + * + * @return Reference to data types vector + */ DataTypes& data_types() { return _data_types; } + /** + * Get column by position. + * + * @param position Column index to get + * @return Reference to mutable column pointer + */ MutableColumnPtr& get_column_by_position(size_t position) { return _columns[position]; } const MutableColumnPtr& get_column_by_position(size_t position) const { return _columns[position]; } + /** + * Get data type by position. + * + * @param position Column index to get + * @return Reference to data type pointer + */ DataTypePtr& get_datatype_by_position(size_t position) { return _data_types[position]; } const DataTypePtr& get_datatype_by_position(size_t position) const { return _data_types[position]; } - // Compare rows by specified column + /** + * Compare values in a single column between two rows in the same block. + * + * @param n First row index to compare + * @param m Second row index to compare + * @param column_id Column index to compare + * @param nan_direction_hint Direction for NaN comparison (-1: NaN is smallest, 1: NaN is largest) + * @return -1 if row n < row m, 0 if equal, 1 if row n > row m + * + * Note: Checks indices validity with DCHECK + * Note: Compares values within the same column + * Note: Handles NaN values according to direction hint + * Note: const method guarantees no modification + */ int compare_one_column(size_t n, size_t m, size_t column_id, int nan_direction_hint) const { DCHECK_LE(column_id, columns()); DCHECK_LE(n, rows()); @@ -538,7 +1018,24 @@ class MutableBlock { return column->compare_at(n, m, *column, nan_direction_hint); } - // Compare rows by first num_columns columns in sequential order (from index 0 to num_columns - 1) + /** + * Compare rows by first num_columns columns in sequential order. + * + * @param n Row index in this MutableBlock + * @param m Row index in rhs MutableBlock + * @param num_columns Number of columns to compare + * @param rhs MutableBlock to compare against + * @param nan_direction_hint Direction for NaN comparison (-1: NaN is smallest, 1: NaN is largest) + * @return -1 if this < rhs, 0 if equal, 1 if this > rhs + * + * Checks: + * - Both blocks have enough columns + * - Row indices are valid + * - Column types match + * + * Note: Compares columns sequentially until difference found + * Note: Returns 0 if all specified columns are equal + */ int compare_at(size_t n, size_t m, size_t num_columns, const MutableBlock& rhs, int nan_direction_hint) const { DCHECK_GE(columns(), num_columns); @@ -557,7 +1054,24 @@ class MutableBlock { return 0; } - // Compare rows by specified columns in compare_columns + /** + * Compare rows by specified columns in compare_columns vector. + * + * @param n Row index in this MutableBlock + * @param m Row index in rhs MutableBlock + * @param compare_columns Vector of column indices to compare + * @param rhs MutableBlock to compare against + * @param nan_direction_hint Direction for NaN comparison (-1: NaN is smallest, 1: NaN is largest) + * @return -1 if this < rhs, 0 if equal, 1 if this > rhs + * + * Checks: + * - Both blocks have enough columns + * - Row indices are valid + * - Column types match for specified columns + * + * Note: Compares only columns specified in compare_columns + * Note: Returns 0 if all specified columns are equal + */ int compare_at(size_t n, size_t m, const std::vector* compare_columns, const MutableBlock& rhs, int nan_direction_hint) const { DCHECK_GE(columns(), compare_columns->size()); @@ -576,7 +1090,16 @@ class MutableBlock { return 0; } - // Get a string representation of the block's data types + /** + * Get a string representation of the block's data types. + * + * @return Comma-separated string of column data type names + * + * Note: Creates human-readable format + * Note: Adds commas between type names + * Note: No trailing comma + * Note: Empty string for empty block + */ std::string dump_types() const { std::string res; for (auto type : _data_types) { @@ -667,16 +1190,52 @@ class MutableBlock { return Status::OK(); } - // Move the data of columns to a block. This will invalidate the MutableBlock. + /** + * Convert MutableBlock to Block, optionally selecting a range of columns. + * + * First overload: + * @param start_column Starting column index + * @param end_column Ending column index (exclusive) + * @return Block containing specified range of columns + * + * Note: Moves column data to new Block + * Note: Original MutableBlock columns become invalid in range + * Note: Preserves column types and names + */ Block to_block(int start_column = 0); Block to_block(int start_column, int end_column); - // Swap the contents of two MutableBlocks + /** + * Swap contents with another MutableBlock. + * + * @param other MutableBlock to swap with + * Swaps all members between blocks using std::swap + * + * Note: Both are noexcept operations + * Note: SCOPED_SKIP_MEMORY_CHECK disables memory tracking + * Note: Efficiently transfers ownership of resources + * Note: Complete swap/move of all internal structures + */ void swap(MutableBlock& other) noexcept; - - // Move-swap the contents of two MutableBlocks void swap(MutableBlock&& other) noexcept; + /** + * Add a single row from source Block to this MutableBlock. + * + * @param block Pointer to source Block + * @param row Index of row to copy from source Block + * + * Note: Assumes compatible column structure between blocks + * Note: Copies data from specified row for all columns + * Note: Performs column-wise insertion + * Note: No size checks or type validation + * + * Important: + * - Caller must ensure block is not null + * - Caller must ensure row index is valid + * - Caller must ensure column types match + * - Caller must ensure column counts match + */ void add_row(const Block* block, int row); // Batch add row should return error status if allocate memory failed. Status add_rows(const Block* block, const uint32_t* row_begin, const uint32_t* row_end, @@ -684,10 +1243,37 @@ class MutableBlock { Status add_rows(const Block* block, size_t row_begin, size_t length); Status add_rows(const Block* block, const std::vector& rows); - /// Remove the column with the specified name + /** + * Remove a column by name from the MutableBlock. + * + * @param name Name of column to remove + * @throws Exception if column name not found + */ void erase(const String& name); - // Get a string representation of the block's data, limited to the specified number of rows + /** + * Generate a formatted string representation of the MutableBlock data. + * + * @param row_limit Maximum number of rows to display + * @return Formatted string with ASCII table representation + * + * Format: + * +------+------+------+ + * | Col1 | Col2 | Col3 | + * +------+------+------+ + * | val1 | val2 | val3 | + * +------+------+------+ + * + * Features: + * - Column headers with data types + * - Fixed width columns (min 15 chars) + * - Truncates long values with ... + * - Shows row count if limited + * - Handles empty columns + * + * Note: Used for debugging and data inspection + * Note: Formats data in a readable table structure + */ std::string dump_data(size_t row_limit = 100) const; // Clear the block's data @@ -702,10 +1288,40 @@ class MutableBlock { // reset columns by types and names. void reset_column_data() noexcept; - // Returns the total number of bytes allocated by all columns in the block + /** + * Calculate total memory allocated by all columns in MutableBlock. + * + * @return Total number of bytes allocated + * + * Features: + * - Sums allocated memory across all columns + * - Skips null columns + * - Includes both data and metadata memory + * - const method for safe memory inspection + * + * Note: Used for memory tracking and optimization + * Note: Only counts valid columns + * Note: Delegates to column's allocated_bytes() + * Note: Important for memory management + */ size_t allocated_bytes() const; - // Returns the approximate number of bytes in memory used by the block + /** + * Calculate approximate memory usage of all columns in MutableBlock. + * + * @return Approximate total bytes used by all columns + * + * Features: + * - Returns actual data size without allocations + * - Sums byte_size() of all columns + * - Quick memory usage estimation + * - const method for safe inspection + * + * Note: Different from allocated_bytes() which includes allocations + * Note: More lightweight than allocated_bytes() + * Note: Used for size estimation + * Note: Does not skip null columns + */ size_t bytes() const { size_t res = 0; for (const auto& elem : _columns) { @@ -715,16 +1331,39 @@ class MutableBlock { return res; } - // Get the names of the columns in the block + /** + * Get the names of the columns in the block. + * + * @return Reference to the vector of column names + */ std::vector& get_names() { return _names; } - // Check if the block contains a column with the specified name + /** + * Checks if a column with the specified name exists in the Block. + * Uses the index_by_name map for efficient lookup. + * + * @param name The name of the column to check for + * @return true if the column exists, false otherwise + */ bool has(const std::string& name) const; - // Get the position of the column with the specified name + /** + * Get column position by column name. + * + * @param name Column name to look up + * @return Zero-based position index of the column + * @throws Exception if column name not found + */ size_t get_position_by_name(const std::string& name) const; - /** Get a list of column names separated by commas. */ + /** + * Get a list of column names separated by commas. + * + * @return Comma-separated string of column names + * + * Note: Joins all column names with commas + * Note: Useful for debugging and logging + */ std::string dump_names() const; private: From cf8ab087d11f8d1dbd203756b390f949876a75ff Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Mon, 25 Nov 2024 15:25:44 +0800 Subject: [PATCH 11/41] add corner case for BlockTest.Constructor --- be/test/vec/core/block_test.cpp | 64 ++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index d4e303037673c6..72e5256c4b15c2 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -694,29 +694,83 @@ void serialize_and_deserialize_test_array() { } TEST(BlockTest, Constructor) { - // default constructor + // Test empty block constructor { vectorized::Block block; EXPECT_EQ(0, block.columns()); EXPECT_EQ(0, block.rows()); + EXPECT_TRUE(block.empty()); } - // constructor with initializer_list + // Test constructor with regular Int32 columns { auto col = vectorized::ColumnVector::create(); vectorized::DataTypePtr type(std::make_shared()); vectorized::Block block({{col->get_ptr(), type, "col1"}, {col->get_ptr(), type, "col2"}}); EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0,block.rows()); + EXPECT_TRUE(block.empty()); } - // constructor with ColumnsWithTypeAndName + // Test constructor with const column { - vectorized::ColumnsWithTypeAndName columns; auto col = vectorized::ColumnVector::create(); + col->insert_value(42); + auto const_col = vectorized::ColumnConst::create(col->get_ptr(), 10); vectorized::DataTypePtr type(std::make_shared()); - columns.emplace_back(col->get_ptr(), type, "col1"); + vectorized::Block block({{const_col->get_ptr(), type, "const_col"}}); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(10, block.rows()); + } + + // Test constructor with nullable column + { + auto col = vectorized::ColumnVector::create(); + auto nullable_col = vectorized::make_nullable(col->get_ptr()); + auto nullable_type = vectorized::make_nullable( + std::make_shared()); + vectorized::Block block({{nullable_col, nullable_type, "nullable_col"}}); + EXPECT_EQ(1, block.columns()); + } + + // Test constructor with mixed column types + { + vectorized::ColumnsWithTypeAndName columns; + + // Regular column + auto regular_col = vectorized::ColumnVector::create(); + auto regular_type = std::make_shared(); + columns.emplace_back(regular_col->get_ptr(), regular_type, "regular_col"); + + // Const column + auto const_base = vectorized::ColumnVector::create(); + const_base->insert_value(42); + auto const_col = vectorized::ColumnConst::create(const_base->get_ptr(), 10); + columns.emplace_back(const_col->get_ptr(), regular_type, "const_col"); + + // Nullable column + auto nullable_col = vectorized::make_nullable(regular_col->get_ptr()); + auto nullable_type = vectorized::make_nullable(regular_type); + columns.emplace_back(nullable_col, nullable_type, "nullable_col"); + vectorized::Block block(columns); + EXPECT_EQ(3, block.columns()); + } + + // Test constructor with empty columns + { + vectorized::ColumnsWithTypeAndName columns; + vectorized::Block block(columns); + EXPECT_EQ(0, block.columns()); + EXPECT_TRUE(block.empty()); + } + + // Test constructor with nullptr column (should handle gracefully) + { + auto type = std::make_shared(); + vectorized::Block block({{nullptr, type, "null_col"}}); EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); } } From cf88247752f4d67ffc7901c0d7139e4c0e07ffdf Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Mon, 25 Nov 2024 16:14:42 +0800 Subject: [PATCH 12/41] add corner cases for BlockTest.BasicOperations --- be/test/vec/core/block_test.cpp | 282 ++++++++++++++++++++++++++------ 1 file changed, 228 insertions(+), 54 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 72e5256c4b15c2..060ac7460c39d6 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -775,71 +775,245 @@ TEST(BlockTest, Constructor) { } TEST(BlockTest, BasicOperations) { - vectorized::Block block; - auto col1 = vectorized::ColumnVector::create(); - auto col2 = vectorized::ColumnVector::create(); - auto col3 = vectorized::ColumnVector::create(); - vectorized::DataTypePtr type(std::make_shared()); + // Test with empty block + { + vectorized::Block empty_block; + EXPECT_NO_THROW(empty_block.clear()); + EXPECT_NO_THROW(empty_block.clear_names()); + EXPECT_NO_THROW(empty_block.reserve(0)); + } - // test reserve - block.reserve(3); + // Test with regular columns + { + vectorized::Block block; + auto col1 = vectorized::ColumnVector::create(); + auto col2 = vectorized::ColumnVector::create(); + auto col3 = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type(std::make_shared()); - // test insert at end - block.insert({col1->get_ptr(), type, "col1"}); - EXPECT_EQ(1, block.columns()); - EXPECT_EQ("col1", block.get_by_position(0).name); + // Test reserve with different sizes + EXPECT_NO_THROW(block.reserve(0)); + EXPECT_NO_THROW(block.reserve(100)); + block.reserve(3); + block.insert({col1->get_ptr(), type, "col1"}); + block.insert({col2->get_ptr(), type, "col2"}); + block.insert({col3->get_ptr(), type, "col3"}); + EXPECT_EQ(3, block.columns()); - block.insert({col3->get_ptr(), type, "col3"}); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ("col3", block.get_by_position(1).name); + // Test clear_names + block.clear_names(); + EXPECT_EQ("", block.get_by_position(0).name); + EXPECT_EQ("", block.get_by_position(1).name); + EXPECT_EQ("", block.get_by_position(2).name); - // test insert at position - block.insert(1, {col2->get_ptr(), type, "col2"}); - EXPECT_EQ(3, block.columns()); - EXPECT_EQ("col2", block.get_by_position(1).name); + // Test clear + block.clear(); + EXPECT_EQ(0, block.columns()); + EXPECT_TRUE(block.empty()); - // test erase by position - block.erase(1); // Remove col2 - EXPECT_EQ(2, block.columns()); - EXPECT_EQ("col1", block.get_by_position(0).name); - EXPECT_EQ("col3", block.get_by_position(1).name); + // Test insert operations + // Insert at end + block.insert({col1->get_ptr(), type, "col1"}); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ("col1", block.get_by_position(0).name); - // test erase_tail - block.insert(1, {col2->get_ptr(), type, "col2"}); - block.erase_tail(1); // Remove col2 and col3 - EXPECT_EQ(1, block.columns()); - EXPECT_EQ("col1", block.get_by_position(0).name); + // Insert duplicate name (should update existing) + block.insert({col2->get_ptr(), type, "col1"}); + EXPECT_EQ(2, block.columns()); - // test erase by set of positions - block.insert({col2->get_ptr(), type, "col2"}); - block.insert({col3->get_ptr(), type, "col3"}); - std::set positions_to_remove = {0, 2}; // Remove col1 and col3 - block.erase(positions_to_remove); - EXPECT_EQ(1, block.columns()); - EXPECT_EQ("col2", block.get_by_position(0).name); + // Insert at specific position + block.insert(0, {col3->get_ptr(), type, "col0"}); + EXPECT_EQ(3, block.columns()); - // test erase by name - block.erase("col2"); - EXPECT_EQ(0, block.columns()); + // Insert at invalid position + EXPECT_THROW(block.insert(10, {col3->get_ptr(), type, "col3"}), Exception); - // test erase_not_in - block.insert({col1->get_ptr(), type, "col1"}); - block.insert({col2->get_ptr(), type, "col2"}); - block.insert({col3->get_ptr(), type, "col3"}); - std::vector columns_to_keep = {0, 2}; // Keep col1 and col3 - block.erase_not_in(columns_to_keep); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ("col1", block.get_by_position(0).name); - EXPECT_EQ("col3", block.get_by_position(1).name); + // Insert nullptr column + EXPECT_NO_THROW(block.insert({nullptr, type, "null_col"})); - // test clear_names - block.clear_names(); - EXPECT_EQ("", block.get_by_position(0).name); - EXPECT_EQ("", block.get_by_position(1).name); + // Test erase operations + // Erase by position + block.erase(0); + EXPECT_EQ(3, block.columns()); - // test clear - block.clear(); - EXPECT_EQ(0, block.columns()); + // Erase by name + block.erase("col1"); + EXPECT_EQ(2, block.columns()); + + // Erase set of positions + std::set positions = {0}; + block.erase(positions); + EXPECT_EQ(1, block.columns()); + + // Erase by invalid name + EXPECT_THROW(block.erase("non_existent"), Exception); + + // Erase with erase_not_in + std::vector empty_vec; + EXPECT_NO_THROW(block.erase_not_in(empty_vec)); + EXPECT_EQ(0, block.columns()); + } + + // Test with const columns + { + vectorized::Block block; + vectorized::DataTypePtr type(std::make_shared()); + + // Create multiple const columns + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(42); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 10); + block.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(24); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); + block.insert({const_col2->get_ptr(), type, "const_col2"}); + + auto base_col3 = vectorized::ColumnVector::create(); + base_col3->insert_value(33); + auto const_col3 = vectorized::ColumnConst::create(base_col3->get_ptr(), 8); + block.insert({const_col3->get_ptr(), type, "const_col3"}); + + EXPECT_EQ(3, block.columns()); + EXPECT_EQ(10, block.rows()); + + // Test clear_names with const columns + block.clear_names(); + EXPECT_EQ("", block.get_by_position(0).name); + EXPECT_EQ("", block.get_by_position(1).name); + EXPECT_EQ("", block.get_by_position(2).name); + EXPECT_EQ(3, block.columns()); + EXPECT_EQ(10, block.rows()); + + // Test clear with const columns + block.clear(); + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_TRUE(block.empty()); + + // Test insert operations + // Insert at end + block.insert({const_col1->get_ptr(), type, "const_col1"}); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(10, block.rows()); + EXPECT_EQ("const_col1", block.get_by_position(0).name); + + // Insert duplicate name + block.insert({const_col2->get_ptr(), type, "const_col1"}); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(10, block.rows()); + + // Insert at specific position + block.insert(1, {const_col3->get_ptr(), type, "const_col3"}); + EXPECT_EQ(3, block.columns()); + EXPECT_EQ("const_col3", block.get_by_position(1).name); + + // Insert at invalid position + EXPECT_THROW(block.insert(10, {const_col1->get_ptr(), type, "invalid"}), Exception); + + // Insert nullptr column + EXPECT_NO_THROW(block.insert({nullptr, type, "null_col"})); + + // Test erase operations + // Erase by position + block.erase(0); + EXPECT_EQ(3, block.columns()); + + // Erase by name + block.erase("const_col3"); + EXPECT_EQ(2, block.columns()); + + // Erase set of positions + std::set positions = {0}; + block.erase(positions); + EXPECT_EQ(1, block.columns()); + + // Erase by invalid name + EXPECT_THROW(block.erase("non_existent"), Exception); + + // Erase with erase_not_in + std::vector empty_vec; + EXPECT_NO_THROW(block.erase_not_in(empty_vec)); + EXPECT_EQ(0, block.columns()); + } + + // Test with nullable columns + { + vectorized::Block block; + vectorized::DataTypePtr base_type(std::make_shared()); + auto nullable_type = vectorized::make_nullable(base_type); + + // Create multiple nullable columns + auto col1 = vectorized::ColumnVector::create(); + auto nullable_col1 = vectorized::make_nullable(col1->get_ptr()); + block.insert({nullable_col1, nullable_type, "nullable_col1"}); + + auto col2 = vectorized::ColumnVector::create(); + auto nullable_col2 = vectorized::make_nullable(col2->get_ptr()); + block.insert({nullable_col2, nullable_type, "nullable_col2"}); + + auto col3 = vectorized::ColumnVector::create(); + auto nullable_col3 = vectorized::make_nullable(col3->get_ptr()); + block.insert({nullable_col3, nullable_type, "nullable_col3"}); + + EXPECT_EQ(3, block.columns()); + + // Test clear_names with nullable columns + block.clear_names(); + EXPECT_EQ("", block.get_by_position(0).name); + EXPECT_EQ("", block.get_by_position(1).name); + EXPECT_EQ("", block.get_by_position(2).name); + EXPECT_EQ(3, block.columns()); + + // Test clear with nullable columns + block.clear(); + EXPECT_EQ(0, block.columns()); + EXPECT_TRUE(block.empty()); + + // Test insert operations + // Insert at end + block.insert({nullable_col1, nullable_type, "nullable_col1"}); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ("nullable_col1", block.get_by_position(0).name); + + // Insert duplicate name + block.insert({nullable_col2, nullable_type, "nullable_col1"}); + EXPECT_EQ(2, block.columns()); + + // Insert at specific position + block.insert(1, {nullable_col3, nullable_type, "nullable_col3"}); + EXPECT_EQ(3, block.columns()); + EXPECT_EQ("nullable_col3", block.get_by_position(1).name); + + // Insert at invalid position + EXPECT_THROW(block.insert(10, {nullable_col1, nullable_type, "invalid"}), Exception); + + // Insert nullptr column + EXPECT_NO_THROW(block.insert({nullptr, nullable_type, "null_col"})); + + // Test erase operations + // Erase by position + block.erase(0); + EXPECT_EQ(3, block.columns()); + + // Erase by name + block.erase("nullable_col3"); + EXPECT_EQ(2, block.columns()); + + // Erase set of positions + std::set positions = {0}; + block.erase(positions); + EXPECT_EQ(1, block.columns()); + + // Erase by invalid name + EXPECT_THROW(block.erase("non_existent"), Exception); + + // Erase with erase_not_in + std::vector empty_vec; + EXPECT_NO_THROW(block.erase_not_in(empty_vec)); + EXPECT_EQ(0, block.columns()); + } } TEST(BlockTest, ColumnOperations) { From a44abe12f71f64e9dea53172272c46412a0b7b37 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Mon, 25 Nov 2024 16:32:03 +0800 Subject: [PATCH 13/41] Refactor and format block_test.cpp for improved readability and consistency --- be/test/vec/core/block_test.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 060ac7460c39d6..4ab2acc74d4a65 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -708,7 +708,7 @@ TEST(BlockTest, Constructor) { vectorized::DataTypePtr type(std::make_shared()); vectorized::Block block({{col->get_ptr(), type, "col1"}, {col->get_ptr(), type, "col2"}}); EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0,block.rows()); + EXPECT_EQ(0, block.rows()); EXPECT_TRUE(block.empty()); } @@ -727,8 +727,8 @@ TEST(BlockTest, Constructor) { { auto col = vectorized::ColumnVector::create(); auto nullable_col = vectorized::make_nullable(col->get_ptr()); - auto nullable_type = vectorized::make_nullable( - std::make_shared()); + auto nullable_type = + vectorized::make_nullable(std::make_shared()); vectorized::Block block({{nullable_col, nullable_type, "nullable_col"}}); EXPECT_EQ(1, block.columns()); } @@ -736,18 +736,18 @@ TEST(BlockTest, Constructor) { // Test constructor with mixed column types { vectorized::ColumnsWithTypeAndName columns; - + // Regular column auto regular_col = vectorized::ColumnVector::create(); auto regular_type = std::make_shared(); columns.emplace_back(regular_col->get_ptr(), regular_type, "regular_col"); - + // Const column auto const_base = vectorized::ColumnVector::create(); const_base->insert_value(42); auto const_col = vectorized::ColumnConst::create(const_base->get_ptr(), 10); columns.emplace_back(const_col->get_ptr(), regular_type, "const_col"); - + // Nullable column auto nullable_col = vectorized::make_nullable(regular_col->get_ptr()); auto nullable_type = vectorized::make_nullable(regular_type); From 77694e3e28d1755fd6cc46103d9198b5e3cb743d Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Mon, 25 Nov 2024 16:33:26 +0800 Subject: [PATCH 14/41] Remove unnecessary whitespace in block_test.cpp to enhance code clarity --- be/test/vec/core/block_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 4ab2acc74d4a65..87613d0e845c9b 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -747,7 +747,7 @@ TEST(BlockTest, Constructor) { const_base->insert_value(42); auto const_col = vectorized::ColumnConst::create(const_base->get_ptr(), 10); columns.emplace_back(const_col->get_ptr(), regular_type, "const_col"); - + // Nullable column auto nullable_col = vectorized::make_nullable(regular_col->get_ptr()); auto nullable_type = vectorized::make_nullable(regular_type); From 07712ac0b33cde42934fe40a96e23d6d187f77b8 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Tue, 26 Nov 2024 19:56:49 +0800 Subject: [PATCH 15/41] Add death tests for invalid erase operations in BlockTest --- be/test/vec/core/block_test.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 87613d0e845c9b..b5d84679fcb966 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -18,6 +18,7 @@ #include "vec/core/block.h" #include +#include #include #include @@ -781,6 +782,7 @@ TEST(BlockTest, BasicOperations) { EXPECT_NO_THROW(empty_block.clear()); EXPECT_NO_THROW(empty_block.clear_names()); EXPECT_NO_THROW(empty_block.reserve(0)); + EXPECT_DEATH(empty_block.erase(0), "Block is empty"); } // Test with regular columns @@ -817,7 +819,7 @@ TEST(BlockTest, BasicOperations) { EXPECT_EQ(1, block.columns()); EXPECT_EQ("col1", block.get_by_position(0).name); - // Insert duplicate name (should update existing) + // Insert duplicate name block.insert({col2->get_ptr(), type, "col1"}); EXPECT_EQ(2, block.columns()); @@ -848,6 +850,9 @@ TEST(BlockTest, BasicOperations) { // Erase by invalid name EXPECT_THROW(block.erase("non_existent"), Exception); + // Erase by invalid position + EXPECT_DEATH(block.erase(10), "Position out of bound in Block::erase"); + // Erase with erase_not_in std::vector empty_vec; EXPECT_NO_THROW(block.erase_not_in(empty_vec)); @@ -932,6 +937,9 @@ TEST(BlockTest, BasicOperations) { // Erase by invalid name EXPECT_THROW(block.erase("non_existent"), Exception); + // Erase by invalid position + EXPECT_DEATH(block.erase(10), "Position out of bound in Block::erase"); + // Erase with erase_not_in std::vector empty_vec; EXPECT_NO_THROW(block.erase_not_in(empty_vec)); @@ -1009,6 +1017,9 @@ TEST(BlockTest, BasicOperations) { // Erase by invalid name EXPECT_THROW(block.erase("non_existent"), Exception); + // Erase by invalid position + EXPECT_DEATH(block.erase(10), "Position out of bound in Block::erase"); + // Erase with erase_not_in std::vector empty_vec; EXPECT_NO_THROW(block.erase_not_in(empty_vec)); From a717ea7f0ebcc1606f53c670c2a3a6ea748ee950 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Tue, 26 Nov 2024 21:54:04 +0800 Subject: [PATCH 16/41] Enhance BlockTest with comprehensive tests for empty, const, and nullable columns; add sorting functionality tests. This update includes death tests for invalid operations and ensures proper handling of various column types. --- be/test/vec/core/block_test.cpp | 550 ++++++++++++++++++++++++-------- 1 file changed, 417 insertions(+), 133 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index b5d84679fcb966..202e962fe85cab 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1028,160 +1028,444 @@ TEST(BlockTest, BasicOperations) { } TEST(BlockTest, ColumnOperations) { - vectorized::Block block; - auto col1 = vectorized::ColumnVector::create(); - auto col2 = vectorized::ColumnVector::create(); - auto col3 = vectorized::ColumnVector::create(); - vectorized::DataTypePtr type(std::make_shared()); + // Test with empty block + { + vectorized::Block empty_block; + + // Test get operations with empty block + EXPECT_DEATH(empty_block.get_by_position(0), ""); + EXPECT_THROW(empty_block.safe_get_by_position(0), Exception); + EXPECT_THROW(empty_block.get_by_name("non_existent"), Exception); + EXPECT_EQ(nullptr, empty_block.try_get_by_name("non_existent")); + + // Test has + EXPECT_FALSE(empty_block.has("non_existent")); + + // Test get_position_by_name + EXPECT_THROW(empty_block.get_position_by_name("non_existent"), Exception); + + // Test get_names + auto names = empty_block.get_names(); + EXPECT_EQ(0, names.size()); + + // Test get_data_types + auto types = empty_block.get_data_types(); + EXPECT_EQ(0, types.size()); + + // Test replace_by_position + auto col = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type(std::make_shared()); + EXPECT_DEATH(empty_block.replace_by_position(0, col->get_ptr()), ""); + + // Test replace_by_position_if_const + EXPECT_DEATH(empty_block.replace_by_position_if_const(0), ""); - // Setup test data - block.insert({col1->get_ptr(), type, "col1"}); - block.insert({col2->get_ptr(), type, "col2"}); - block.insert({col3->get_ptr(), type, "col3"}); + // Test get_columns_with_type_and_name + const auto& columns = empty_block.get_columns_with_type_and_name(); + EXPECT_EQ(0, columns.size()); + } - // Test get_by_position - EXPECT_EQ("col1", block.get_by_position(0).name); - EXPECT_EQ("col2", block.get_by_position(1).name); - EXPECT_EQ("col3", block.get_by_position(2).name); + // Test with regular columns + { + vectorized::Block block; + auto col1 = vectorized::ColumnVector::create(); + auto col2 = vectorized::ColumnVector::create(); + auto col3 = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type(std::make_shared()); - // Test safe_get_by_position - EXPECT_EQ("col1", block.safe_get_by_position(0).name); - EXPECT_THROW(block.safe_get_by_position(10), Exception); + // Setup test data + block.insert({col1->get_ptr(), type, "col1"}); + block.insert({col2->get_ptr(), type, "col2"}); + block.insert({col3->get_ptr(), type, "col3"}); - // Test get_by_name - EXPECT_EQ("col1", block.get_by_name("col1").name); - EXPECT_THROW(block.get_by_name("non_existent"), Exception); + // Test get_by_position + EXPECT_EQ("col1", block.get_by_position(0).name); + EXPECT_EQ("col2", block.get_by_position(1).name); + EXPECT_EQ("col3", block.get_by_position(2).name); + EXPECT_DEATH(block.get_by_position(3), ""); + + // Test safe_get_by_position + EXPECT_EQ("col1", block.safe_get_by_position(0).name); + EXPECT_THROW(block.safe_get_by_position(10), Exception); + + // Test get_by_name + EXPECT_EQ("col1", block.get_by_name("col1").name); + EXPECT_THROW(block.get_by_name("non_existent"), Exception); + + // Test try_get_by_name + EXPECT_NE(nullptr, block.try_get_by_name("col1")); + EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); + + // Test has + EXPECT_TRUE(block.has("col1")); + EXPECT_FALSE(block.has("non_existent")); + + // Test get_position_by_name + EXPECT_EQ(0, block.get_position_by_name("col1")); + EXPECT_EQ(1, block.get_position_by_name("col2")); + EXPECT_THROW(block.get_position_by_name("non_existent"), Exception); + + // Test get_names + auto names = block.get_names(); + EXPECT_EQ(3, names.size()); + EXPECT_EQ("col1", names[0]); + EXPECT_EQ("col2", names[1]); + EXPECT_EQ("col3", names[2]); + + // Test get_data_type + EXPECT_EQ(type, block.get_data_type(0)); + EXPECT_EQ(type, block.get_data_type(1)); + EXPECT_EQ(type, block.get_data_type(2)); + + // Test get_data_types + auto types = block.get_data_types(); + EXPECT_EQ(3, types.size()); + for (const auto& t : types) { + EXPECT_EQ(type, t); + } - // Test try_get_by_name - EXPECT_NE(nullptr, block.try_get_by_name("col1")); - EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); + // Test replace_by_position + auto new_col = vectorized::ColumnVector::create(); + block.replace_by_position(0, new_col->get_ptr()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_DEATH(block.replace_by_position(10, new_col->get_ptr()), ""); - // Test has - EXPECT_TRUE(block.has("col1")); - EXPECT_FALSE(block.has("non_existent")); + // Test replace_by_position_if_const + auto const_col = vectorized::ColumnVector::create(); + const_col->insert_value(1); + auto const_column = vectorized::ColumnConst::create(const_col->get_ptr(), 1); + block.replace_by_position(2, const_column->get_ptr()); - // Test get_position_by_name - EXPECT_EQ(0, block.get_position_by_name("col1")); - EXPECT_EQ(1, block.get_position_by_name("col2")); - EXPECT_THROW(block.get_position_by_name("non_existent"), Exception); - - // Test get_names - auto names = block.get_names(); - EXPECT_EQ(3, names.size()); - EXPECT_EQ("col1", names[0]); - EXPECT_EQ("col2", names[1]); - EXPECT_EQ("col3", names[2]); - - // Test get_data_type - EXPECT_EQ(type, block.get_data_type(0)); - EXPECT_EQ(type, block.get_data_type(1)); - EXPECT_EQ(type, block.get_data_type(2)); - - // Test get_data_types - auto types = block.get_data_types(); - EXPECT_EQ(3, types.size()); - for (const auto& t : types) { - EXPECT_EQ(type, t); - } - - // Test replace_by_position - auto new_col = vectorized::ColumnVector::create(); - block.replace_by_position(0, new_col->get_ptr()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - - // Test replace_by_position with rvalue - auto another_col = vectorized::ColumnVector::create(); - block.replace_by_position(1, another_col->get_ptr()); - EXPECT_EQ(0, block.get_by_position(1).column->size()); - - // Test replace_by_position_if_const - auto const_col = vectorized::ColumnVector::create(); - const_col->insert_value(1); - auto const_column = vectorized::ColumnConst::create(const_col->get_ptr(), 1); - block.replace_by_position(2, const_column->get_ptr()); - - // Verify it's const column before replacement - EXPECT_NE(nullptr, - typeid_cast(block.get_by_position(2).column.get())); - - // Replace const column with full column - block.replace_by_position_if_const(2); - - // Verify it's no longer const column after replacement - EXPECT_EQ(nullptr, - typeid_cast(block.get_by_position(2).column.get())); - - // Test iterator functionality - size_t count = 0; - for (const auto& col : block) { - EXPECT_EQ(type, col.type); - count++; - } - EXPECT_EQ(3, count); - - // Test const iterator functionality - const auto& const_block = block; - count = 0; - for (const auto& col : const_block) { - EXPECT_EQ(type, col.type); - count++; - } - EXPECT_EQ(3, count); - - // Test get_columns_with_type_and_name - const auto& columns = block.get_columns_with_type_and_name(); - EXPECT_EQ(3, columns.size()); - EXPECT_EQ("col1", columns[0].name); - EXPECT_EQ("col2", columns[1].name); - EXPECT_EQ("col3", columns[2].name); - - // Test sort_columns - { - vectorized::Block unsorted_block; - auto type = std::make_shared(); + // Verify it's const column before replacement + EXPECT_NE(nullptr, + typeid_cast(block.get_by_position(2).column.get())); - // Insert columns in random order - { - auto col_c = vectorized::ColumnVector::create(); - unsorted_block.insert({std::move(col_c), type, "c"}); - } - { - auto col_a = vectorized::ColumnVector::create(); - unsorted_block.insert({std::move(col_a), type, "a"}); + // Replace const column with full column + block.replace_by_position_if_const(2); + EXPECT_DEATH(block.replace_by_position_if_const(10), ""); + + // Verify it's no longer const column after replacement + EXPECT_EQ(nullptr, + typeid_cast(block.get_by_position(2).column.get())); + + // Test get_columns_with_type_and_name + const auto& columns = block.get_columns_with_type_and_name(); + EXPECT_EQ(3, columns.size()); + EXPECT_EQ("col1", columns[0].name); + EXPECT_EQ("col2", columns[1].name); + EXPECT_EQ("col3", columns[2].name); + } + + // Test with const columns + { + vectorized::Block block; + vectorized::DataTypePtr type(std::make_shared()); + + // Create and insert const columns + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(42); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 10); + block.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(24); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); + block.insert({const_col2->get_ptr(), type, "const_col2"}); + + // Test get_by_position + EXPECT_EQ("const_col1", block.get_by_position(0).name); + EXPECT_EQ("const_col2", block.get_by_position(1).name); + EXPECT_DEATH(block.get_by_position(2), ""); + + // Test safe_get_by_position + EXPECT_EQ("const_col1", block.safe_get_by_position(0).name); + EXPECT_THROW(block.safe_get_by_position(10), Exception); + + // Test get_by_name + EXPECT_EQ("const_col1", block.get_by_name("const_col1").name); + EXPECT_THROW(block.get_by_name("non_existent"), Exception); + + // Test try_get_by_name + EXPECT_NE(nullptr, block.try_get_by_name("const_col1")); + EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); + + // Test has + EXPECT_TRUE(block.has("const_col1")); + EXPECT_FALSE(block.has("non_existent")); + + // Test get_position_by_name + EXPECT_EQ(0, block.get_position_by_name("const_col1")); + EXPECT_EQ(1, block.get_position_by_name("const_col2")); + EXPECT_THROW(block.get_position_by_name("non_existent"), Exception); + + // Test get_names + auto names = block.get_names(); + EXPECT_EQ(2, names.size()); + EXPECT_EQ("const_col1", names[0]); + EXPECT_EQ("const_col2", names[1]); + + // Test get_data_type + EXPECT_EQ(type, block.get_data_type(0)); + EXPECT_EQ(type, block.get_data_type(1)); + + // Test get_data_types + auto types = block.get_data_types(); + EXPECT_EQ(2, types.size()); + for (const auto& t : types) { + EXPECT_EQ(type, t); } - { - auto col_b = vectorized::ColumnVector::create(); - unsorted_block.insert({std::move(col_b), type, "b"}); + + // Test replace_by_position + auto new_const_col = vectorized::ColumnVector::create(); + new_const_col->insert_value(100); + auto new_const = vectorized::ColumnConst::create(new_const_col->get_ptr(), 10); + block.replace_by_position(0, new_const->get_ptr()); + EXPECT_EQ(10, block.get_by_position(0).column->size()); + EXPECT_DEATH(block.replace_by_position(10, new_const->get_ptr()), ""); + + // Test replace_by_position_if_const + block.replace_by_position_if_const(0); + EXPECT_EQ(nullptr, + typeid_cast(block.get_by_position(0).column.get())); + EXPECT_DEATH(block.replace_by_position_if_const(10), ""); + + // Test get_columns_with_type_and_name + const auto& columns = block.get_columns_with_type_and_name(); + EXPECT_EQ(2, columns.size()); + EXPECT_EQ("const_col1", columns[0].name); + EXPECT_EQ("const_col2", columns[1].name); + } + + // Test with nullable columns + { + vectorized::Block block; + vectorized::DataTypePtr base_type(std::make_shared()); + auto nullable_type = vectorized::make_nullable(base_type); + + // Create and insert nullable columns + auto col1 = vectorized::ColumnVector::create(); + auto nullable_col1 = vectorized::make_nullable(col1->get_ptr()); + block.insert({nullable_col1, nullable_type, "nullable_col1"}); + + auto col2 = vectorized::ColumnVector::create(); + auto nullable_col2 = vectorized::make_nullable(col2->get_ptr()); + block.insert({nullable_col2, nullable_type, "nullable_col2"}); + + // Test get_by_position + EXPECT_EQ("nullable_col1", block.get_by_position(0).name); + EXPECT_EQ("nullable_col2", block.get_by_position(1).name); + EXPECT_DEATH(block.get_by_position(2), ""); + + // Test safe_get_by_position + EXPECT_EQ("nullable_col1", block.safe_get_by_position(0).name); + EXPECT_THROW(block.safe_get_by_position(10), Exception); + + // Test get_by_name + EXPECT_EQ("nullable_col1", block.get_by_name("nullable_col1").name); + EXPECT_THROW(block.get_by_name("non_existent"), Exception); + + // Test try_get_by_name + EXPECT_NE(nullptr, block.try_get_by_name("nullable_col1")); + EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); + + // Test has + EXPECT_TRUE(block.has("nullable_col1")); + EXPECT_FALSE(block.has("non_existent")); + + // Test get_position_by_name + EXPECT_EQ(0, block.get_position_by_name("nullable_col1")); + EXPECT_EQ(1, block.get_position_by_name("nullable_col2")); + EXPECT_THROW(block.get_position_by_name("non_existent"), Exception); + + // Test get_names + auto names = block.get_names(); + EXPECT_EQ(2, names.size()); + EXPECT_EQ("nullable_col1", names[0]); + EXPECT_EQ("nullable_col2", names[1]); + + // Test get_data_type + EXPECT_EQ(nullable_type, block.get_data_type(0)); + EXPECT_EQ(nullable_type, block.get_data_type(1)); + + // Test get_data_types + auto types = block.get_data_types(); + EXPECT_EQ(2, types.size()); + for (const auto& t : types) { + EXPECT_EQ(nullable_type, t); } - // Verify original order - auto original_names = unsorted_block.get_names(); - EXPECT_EQ("c", original_names[0]); - EXPECT_EQ("a", original_names[1]); - EXPECT_EQ("b", original_names[2]); + // Test replace_by_position + auto new_col = vectorized::ColumnVector::create(); + auto new_nullable = vectorized::make_nullable(new_col->get_ptr()); + block.replace_by_position(0, new_nullable); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_DEATH(block.replace_by_position(10, new_nullable), ""); + + // Test replace_by_position_if_const + block.replace_by_position_if_const(0); + EXPECT_NE(nullptr, + typeid_cast(block.get_by_position(0).column.get())); + EXPECT_DEATH(block.replace_by_position_if_const(10), ""); + + // Test get_columns_with_type_and_name + const auto& columns = block.get_columns_with_type_and_name(); + EXPECT_EQ(2, columns.size()); + EXPECT_EQ("nullable_col1", columns[0].name); + EXPECT_EQ("nullable_col2", columns[1].name); + } +} + +TEST(BlockTest, SortColumns) { + // Test sort_columns with empty block + { + vectorized::Block empty_block; + auto sorted_empty = empty_block.sort_columns(); + EXPECT_EQ(0, sorted_empty.columns()); + EXPECT_EQ(0, sorted_empty.rows()); + } + + // Test sort_columns with regular columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Insert columns in random order + auto col_c = vectorized::ColumnVector::create(); + col_c->insert_value(1); + block.insert({col_c->get_ptr(), type, "c"}); + + auto col_a = vectorized::ColumnVector::create(); + col_a->insert_value(2); + block.insert({col_a->get_ptr(), type, "a"}); + + auto col_b = vectorized::ColumnVector::create(); + col_b->insert_value(3); + block.insert({col_b->get_ptr(), type, "b"}); + + // Sort and verify + auto sorted_block = block.sort_columns(); + auto sorted_names = sorted_block.get_names(); + EXPECT_EQ("c", sorted_names[0]); + EXPECT_EQ("b", sorted_names[1]); + EXPECT_EQ("a", sorted_names[2]); + + // Verify data is preserved + EXPECT_EQ(1, sorted_block.get_by_position(0).column->get_int(0)); + EXPECT_EQ(3, sorted_block.get_by_position(1).column->get_int(0)); + EXPECT_EQ(2, sorted_block.get_by_position(2).column->get_int(0)); + } + + // Test sort_columns with const columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Create and insert const columns in random order + auto base_c = vectorized::ColumnVector::create(); + base_c->insert_value(42); + auto const_c = vectorized::ColumnConst::create(base_c->get_ptr(), 10); + block.insert({const_c->get_ptr(), type, "c"}); + + auto base_a = vectorized::ColumnVector::create(); + base_a->insert_value(24); + auto const_a = vectorized::ColumnConst::create(base_a->get_ptr(), 10); + block.insert({const_a->get_ptr(), type, "a"}); - // Sort columns and verify - auto sorted_block = unsorted_block.sort_columns(); + auto base_b = vectorized::ColumnVector::create(); + base_b->insert_value(33); + auto const_b = vectorized::ColumnConst::create(base_b->get_ptr(), 10); + block.insert({const_b->get_ptr(), type, "b"}); + + // Sort and verify + auto sorted_block = block.sort_columns(); auto sorted_names = sorted_block.get_names(); + EXPECT_EQ("c", sorted_names[0]); + EXPECT_EQ("b", sorted_names[1]); + EXPECT_EQ("a", sorted_names[2]); + + // Verify const values are preserved + EXPECT_EQ(42, sorted_block.get_by_position(0).column->get_int(0)); + EXPECT_EQ(33, sorted_block.get_by_position(1).column->get_int(0)); + EXPECT_EQ(24, sorted_block.get_by_position(2).column->get_int(0)); + + // Verify columns remain const + for (size_t i = 0; i < 3; ++i) { + EXPECT_NE(nullptr, + typeid_cast(sorted_block.get_by_position(i).column.get())); + } + } + + // Test sort_columns with nullable columns + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = vectorized::make_nullable(base_type); - // Verify alphabetical order + // Create and insert nullable columns in random order + auto col_c = vectorized::ColumnVector::create(); + col_c->insert_value(1); + auto nullable_c = vectorized::make_nullable(col_c->get_ptr()); + block.insert({nullable_c, nullable_type, "c"}); + + auto col_a = vectorized::ColumnVector::create(); + col_a->insert_value(2); + auto nullable_a = vectorized::make_nullable(col_a->get_ptr()); + block.insert({nullable_a, nullable_type, "a"}); + + auto col_b = vectorized::ColumnVector::create(); + col_b->insert_value(3); + auto nullable_b = vectorized::make_nullable(col_b->get_ptr()); + block.insert({nullable_b, nullable_type, "b"}); + + // Sort and verify + auto sorted_block = block.sort_columns(); + auto sorted_names = sorted_block.get_names(); EXPECT_EQ("c", sorted_names[0]); EXPECT_EQ("b", sorted_names[1]); EXPECT_EQ("a", sorted_names[2]); - // Verify original block remains unchanged - original_names = unsorted_block.get_names(); - EXPECT_EQ("c", original_names[0]); - EXPECT_EQ("a", original_names[1]); - EXPECT_EQ("b", original_names[2]); + // Verify nullable status is preserved + for (size_t i = 0; i < 3; ++i) { + EXPECT_TRUE(sorted_block.get_by_position(i).type->is_nullable()); + } + } - // Verify column count remains the same - EXPECT_EQ(unsorted_block.columns(), sorted_block.columns()); + // Test sort_columns with mixed column types + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = vectorized::make_nullable(base_type); + + // Insert regular column + auto regular_col = vectorized::ColumnVector::create(); + regular_col->insert_value(1); + block.insert({regular_col->get_ptr(), base_type, "c"}); + + // Insert const column + auto const_base = vectorized::ColumnVector::create(); + const_base->insert_value(2); + auto const_col = vectorized::ColumnConst::create(const_base->get_ptr(), 1); + block.insert({const_col->get_ptr(), base_type, "a"}); + + // Insert nullable column + auto nullable_base = vectorized::ColumnVector::create(); + nullable_base->insert_value(3); + auto nullable_col = vectorized::make_nullable(nullable_base->get_ptr()); + block.insert({nullable_col, nullable_type, "b"}); + + // Sort and verify + auto sorted_block = block.sort_columns(); + auto sorted_names = sorted_block.get_names(); + EXPECT_EQ("c", sorted_names[0]); + EXPECT_EQ("b", sorted_names[1]); + EXPECT_EQ("a", sorted_names[2]); // Verify column types are preserved - EXPECT_EQ(type, sorted_block.get_data_type(0)); - EXPECT_EQ(type, sorted_block.get_data_type(1)); - EXPECT_EQ(type, sorted_block.get_data_type(2)); + EXPECT_EQ(nullptr, + typeid_cast(sorted_block.get_by_position(0).column.get())); + EXPECT_TRUE(sorted_block.get_by_position(1).type->is_nullable()); + EXPECT_NE(nullptr, + typeid_cast(sorted_block.get_by_position(2).column.get())); } } From 26b9f3f568f47cf067684f66c79d87337cb1fcfe Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Tue, 26 Nov 2024 22:04:05 +0800 Subject: [PATCH 17/41] Refactor whitespace in block_test.cpp to improve code readability and consistency across tests. --- be/test/vec/core/block_test.cpp | 42 ++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 202e962fe85cab..b2ace824e44d41 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1037,26 +1037,26 @@ TEST(BlockTest, ColumnOperations) { EXPECT_THROW(empty_block.safe_get_by_position(0), Exception); EXPECT_THROW(empty_block.get_by_name("non_existent"), Exception); EXPECT_EQ(nullptr, empty_block.try_get_by_name("non_existent")); - + // Test has EXPECT_FALSE(empty_block.has("non_existent")); // Test get_position_by_name EXPECT_THROW(empty_block.get_position_by_name("non_existent"), Exception); - + // Test get_names auto names = empty_block.get_names(); EXPECT_EQ(0, names.size()); - + // Test get_data_types auto types = empty_block.get_data_types(); EXPECT_EQ(0, types.size()); - + // Test replace_by_position auto col = vectorized::ColumnVector::create(); vectorized::DataTypePtr type(std::make_shared()); EXPECT_DEATH(empty_block.replace_by_position(0, col->get_ptr()), ""); - + // Test replace_by_position_if_const EXPECT_DEATH(empty_block.replace_by_position_if_const(0), ""); @@ -1097,7 +1097,7 @@ TEST(BlockTest, ColumnOperations) { EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); // Test has - EXPECT_TRUE(block.has("col1")); + EXPECT_TRUE(block.has("col1")); EXPECT_FALSE(block.has("non_existent")); // Test get_position_by_name @@ -1137,16 +1137,16 @@ TEST(BlockTest, ColumnOperations) { block.replace_by_position(2, const_column->get_ptr()); // Verify it's const column before replacement - EXPECT_NE(nullptr, - typeid_cast(block.get_by_position(2).column.get())); + EXPECT_NE(nullptr, typeid_cast( + block.get_by_position(2).column.get())); // Replace const column with full column block.replace_by_position_if_const(2); EXPECT_DEATH(block.replace_by_position_if_const(10), ""); // Verify it's no longer const column after replacement - EXPECT_EQ(nullptr, - typeid_cast(block.get_by_position(2).column.get())); + EXPECT_EQ(nullptr, typeid_cast( + block.get_by_position(2).column.get())); // Test get_columns_with_type_and_name const auto& columns = block.get_columns_with_type_and_name(); @@ -1156,7 +1156,7 @@ TEST(BlockTest, ColumnOperations) { EXPECT_EQ("col3", columns[2].name); } - // Test with const columns + // Test with const columns { vectorized::Block block; vectorized::DataTypePtr type(std::make_shared()); @@ -1225,8 +1225,8 @@ TEST(BlockTest, ColumnOperations) { // Test replace_by_position_if_const block.replace_by_position_if_const(0); - EXPECT_EQ(nullptr, - typeid_cast(block.get_by_position(0).column.get())); + EXPECT_EQ(nullptr, typeid_cast( + block.get_by_position(0).column.get())); EXPECT_DEATH(block.replace_by_position_if_const(10), ""); // Test get_columns_with_type_and_name @@ -1303,8 +1303,8 @@ TEST(BlockTest, ColumnOperations) { // Test replace_by_position_if_const block.replace_by_position_if_const(0); - EXPECT_NE(nullptr, - typeid_cast(block.get_by_position(0).column.get())); + EXPECT_NE(nullptr, typeid_cast( + block.get_by_position(0).column.get())); EXPECT_DEATH(block.replace_by_position_if_const(10), ""); // Test get_columns_with_type_and_name @@ -1390,8 +1390,8 @@ TEST(BlockTest, SortColumns) { // Verify columns remain const for (size_t i = 0; i < 3; ++i) { - EXPECT_NE(nullptr, - typeid_cast(sorted_block.get_by_position(i).column.get())); + EXPECT_NE(nullptr, typeid_cast( + sorted_block.get_by_position(i).column.get())); } } @@ -1461,11 +1461,11 @@ TEST(BlockTest, SortColumns) { EXPECT_EQ("a", sorted_names[2]); // Verify column types are preserved - EXPECT_EQ(nullptr, - typeid_cast(sorted_block.get_by_position(0).column.get())); + EXPECT_EQ(nullptr, typeid_cast( + sorted_block.get_by_position(0).column.get())); EXPECT_TRUE(sorted_block.get_by_position(1).type->is_nullable()); - EXPECT_NE(nullptr, - typeid_cast(sorted_block.get_by_position(2).column.get())); + EXPECT_EQ(nullptr, typeid_cast( + sorted_block.get_by_position(2).column.get())); } } From dc1e2324c71a46e5083d66446b9c5bb0d58f3103 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Tue, 26 Nov 2024 22:05:38 +0800 Subject: [PATCH 18/41] Remove unnecessary whitespace in block_test.cpp to enhance code clarity and maintain consistency in test formatting. --- be/test/vec/core/block_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index b2ace824e44d41..611ff9e61618fa 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1031,7 +1031,7 @@ TEST(BlockTest, ColumnOperations) { // Test with empty block { vectorized::Block empty_block; - + // Test get operations with empty block EXPECT_DEATH(empty_block.get_by_position(0), ""); EXPECT_THROW(empty_block.safe_get_by_position(0), Exception); From 0b83471a97a7a105835ddd0ea8df6c233a374715 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Tue, 26 Nov 2024 22:52:08 +0800 Subject: [PATCH 19/41] Enhance BlockTest with additional tests for row operations, including handling of empty, const, nullable, and mixed column types. --- be/test/vec/core/block_test.cpp | 172 +++++++++++++++++++++++++++----- 1 file changed, 145 insertions(+), 27 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 611ff9e61618fa..f90768c268174f 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1470,41 +1470,159 @@ TEST(BlockTest, SortColumns) { } TEST(BlockTest, RowOperations) { - vectorized::Block block; - // Test empty block - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.columns()); - EXPECT_TRUE(block.empty()); - EXPECT_TRUE(block.is_empty_column()); + { + vectorized::Block empty_block; + EXPECT_EQ(0, empty_block.rows()); + EXPECT_EQ(0, empty_block.columns()); + EXPECT_TRUE(empty_block.empty()); + EXPECT_TRUE(empty_block.is_empty_column()); - // Add columns with data - auto col1 = vectorized::ColumnVector::create(); - auto col2 = vectorized::ColumnString::create(); - vectorized::DataTypePtr type1(std::make_shared()); - vectorized::DataTypePtr type2(std::make_shared()); + // Test row operations on empty block + EXPECT_NO_THROW(empty_block.set_num_rows(0)); + int64_t offset = 0; + EXPECT_NO_THROW(empty_block.skip_num_rows(offset)); + } - for (int i = 0; i < 100; ++i) { - col1->insert_value(i); - col2->insert_data(std::to_string(i).c_str(), std::to_string(i).length()); + // Test with regular columns + { + vectorized::Block block; + auto col1 = vectorized::ColumnVector::create(); + auto col2 = vectorized::ColumnString::create(); + vectorized::DataTypePtr type1(std::make_shared()); + vectorized::DataTypePtr type2(std::make_shared()); + + for (int i = 0; i < 100; ++i) { + col1->insert_value(i); + col2->insert_data(std::to_string(i).c_str(), std::to_string(i).length()); + } + + block.insert({col1->get_ptr(), type1, "col1"}); + block.insert({col2->get_ptr(), type2, "col2"}); + + // Test basic properties + EXPECT_EQ(100, block.rows()); + EXPECT_EQ(2, block.columns()); + EXPECT_FALSE(block.empty()); + EXPECT_FALSE(block.is_empty_column()); + + // Test row operations + block.set_num_rows(50); + EXPECT_EQ(50, block.rows()); + + int64_t offset = 20; + block.skip_num_rows(offset); + EXPECT_EQ(30, block.rows()); } - block.insert({col1->get_ptr(), type1, "col1"}); - block.insert({col2->get_ptr(), type2, "col2"}); + // Test with const columns + { + vectorized::Block block; + vectorized::DataTypePtr type(std::make_shared()); + + // Create and insert const columns + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(42); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 100); + block.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(24); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 100); + block.insert({const_col2->get_ptr(), type, "const_col2"}); + + // Test basic properties + EXPECT_EQ(100, block.rows()); + EXPECT_EQ(2, block.columns()); + EXPECT_FALSE(block.empty()); + EXPECT_FALSE(block.is_empty_column()); + + // Test row operations + block.set_num_rows(50); + EXPECT_EQ(50, block.rows()); - // Test basic properties - EXPECT_EQ(100, block.rows()); - EXPECT_EQ(2, block.columns()); - EXPECT_FALSE(block.empty()); - EXPECT_FALSE(block.is_empty_column()); + int64_t offset = 20; + block.skip_num_rows(offset); + EXPECT_EQ(30, block.rows()); + } - // Test row operations - block.set_num_rows(50); // LIMIT - EXPECT_EQ(50, block.rows()); + // Test with nullable columns + { + vectorized::Block block; + vectorized::DataTypePtr base_type(std::make_shared()); + auto nullable_type = vectorized::make_nullable(base_type); - int64_t offset = 20; - block.skip_num_rows(offset); // OFFSET - EXPECT_EQ(30, block.rows()); + // Create and insert nullable columns + auto col1 = vectorized::ColumnVector::create(); + for (int i = 0; i < 100; ++i) { + col1->insert_value(i); + } + auto nullable_col1 = vectorized::make_nullable(col1->get_ptr()); + block.insert({nullable_col1, nullable_type, "nullable_col1"}); + + auto col2 = vectorized::ColumnVector::create(); + for (int i = 0; i < 100; ++i) { + col2->insert_value(i * 2); + } + auto nullable_col2 = vectorized::make_nullable(col2->get_ptr()); + block.insert({nullable_col2, nullable_type, "nullable_col2"}); + + // Test basic properties + EXPECT_EQ(100, block.rows()); + EXPECT_EQ(2, block.columns()); + EXPECT_FALSE(block.empty()); + EXPECT_FALSE(block.is_empty_column()); + + // Test row operations + block.set_num_rows(50); + EXPECT_EQ(50, block.rows()); + + int64_t offset = 20; + block.skip_num_rows(offset); + EXPECT_EQ(30, block.rows()); + } + + // Test with mixed column types + { + vectorized::Block block; + vectorized::DataTypePtr type(std::make_shared()); + auto nullable_type = vectorized::make_nullable(type); + + // Insert regular column + auto regular_col = vectorized::ColumnVector::create(); + for (int i = 0; i < 100; ++i) { + regular_col->insert_value(i); + } + block.insert({regular_col->get_ptr(), type, "regular"}); + + // Insert const column + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(42); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 100); + block.insert({const_col->get_ptr(), type, "const"}); + + // Insert nullable column + auto nullable_base = vectorized::ColumnVector::create(); + for (int i = 0; i < 100; ++i) { + nullable_base->insert_value(i * 2); + } + auto nullable_col = vectorized::make_nullable(nullable_base->get_ptr()); + block.insert({nullable_col, nullable_type, "nullable"}); + + // Test basic properties + EXPECT_EQ(100, block.rows()); + EXPECT_EQ(3, block.columns()); + EXPECT_FALSE(block.empty()); + EXPECT_FALSE(block.is_empty_column()); + + // Test row operations + block.set_num_rows(50); + EXPECT_EQ(50, block.rows()); + + int64_t offset = 20; + block.skip_num_rows(offset); + EXPECT_EQ(30, block.rows()); + } } TEST(BlockTest, MemoryAndSize) { From a75537ef73150e7feb8ed753a298c2f9cd844071 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Fri, 29 Nov 2024 14:11:10 +0800 Subject: [PATCH 20/41] Enhance BlockTest with comprehensive tests for empty, const, nullable, and mixed column types. --- be/test/vec/core/block_test.cpp | 211 ++++++++++++++++++++++++-------- 1 file changed, 161 insertions(+), 50 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index f90768c268174f..7119a1ca10de7a 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1626,69 +1626,180 @@ TEST(BlockTest, RowOperations) { } TEST(BlockTest, MemoryAndSize) { - vectorized::Block block; + // Test empty block + { + vectorized::Block empty_block; + EXPECT_EQ(0, empty_block.bytes()); + EXPECT_EQ(0, empty_block.allocated_bytes()); + EXPECT_EQ("column bytes: []", empty_block.columns_bytes()); + } - // Test empty block (no columns) - EXPECT_EQ(0, block.bytes()); - EXPECT_EQ(0, block.allocated_bytes()); - EXPECT_EQ("column bytes: []", block.columns_bytes()); + // Test with regular columns + { + vectorized::Block block; + auto type = std::make_shared(); - // Add first column (Int32) - auto col1 = vectorized::ColumnVector::create(); - vectorized::DataTypePtr type1(std::make_shared()); - for (int i = 0; i < 1000; ++i) { - col1->insert_value(i); + // Add first column (Int32) + auto col1 = vectorized::ColumnVector::create(); + for (int i = 0; i < 1000; ++i) { + col1->insert_value(i); + } + block.insert({col1->get_ptr(), type, "col1"}); + + // Test with single column + size_t bytes_one_col = block.bytes(); + size_t allocated_bytes_one_col = block.allocated_bytes(); + EXPECT_GT(bytes_one_col, 0); + EXPECT_GT(allocated_bytes_one_col, 0); + EXPECT_GE(allocated_bytes_one_col, bytes_one_col); + + // Add second column (String) + auto col2 = vectorized::ColumnString::create(); + auto string_type = std::make_shared(); + for (int i = 0; i < 1000; ++i) { + std::string val = "test" + std::to_string(i); + col2->insert_data(val.c_str(), val.length()); + } + block.insert({col2->get_ptr(), string_type, "col2"}); + + // Test with two columns + size_t bytes_two_cols = block.bytes(); + EXPECT_GT(bytes_two_cols, bytes_one_col); + + // Test after erasing first column + block.erase(0); + EXPECT_EQ(block.bytes(), col2->byte_size()); + + // Test after clearing + block.clear(); + EXPECT_EQ(0, block.bytes()); + EXPECT_EQ(0, block.allocated_bytes()); + EXPECT_EQ("column bytes: []", block.columns_bytes()); } - block.insert({col1->get_ptr(), type1, "col1"}); - // Test with valid column - size_t bytes_one_col = block.bytes(); - size_t allocated_bytes_one_col = block.allocated_bytes(); - EXPECT_GT(bytes_one_col, 0); - EXPECT_GT(allocated_bytes_one_col, 0); - EXPECT_GE(allocated_bytes_one_col, bytes_one_col); + // Test with const columns + { + vectorized::Block block; + auto type = std::make_shared(); - // Test with nullptr column (should throw exception) - vectorized::Block block_with_null; - block_with_null.insert({nullptr, type1, "null_col"}); + // Add first const column + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(42); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 1000); + block.insert({const_col1->get_ptr(), type, "const_col1"}); - // bytes() should throw exception when there is a nullptr column - EXPECT_THROW(block_with_null.bytes(), Exception); + // Test with single const column + size_t bytes_one_col = block.bytes(); + size_t allocated_bytes_one_col = block.allocated_bytes(); + EXPECT_GT(bytes_one_col, 0); + EXPECT_GT(allocated_bytes_one_col, 0); + EXPECT_GE(allocated_bytes_one_col, bytes_one_col); - // columns_bytes() should throw exception when there is a nullptr column - EXPECT_THROW(block_with_null.columns_bytes(), Exception); + // Add second const column + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(24); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 1000); + block.insert({const_col2->get_ptr(), type, "const_col2"}); - // allocated_bytes() should return 0 when there is a nullptr column - EXPECT_EQ(0, block_with_null.allocated_bytes()); + // Test with two const columns + size_t bytes_two_cols = block.bytes(); + EXPECT_GT(bytes_two_cols, bytes_one_col); - // Add second valid column (String) - auto col2 = vectorized::ColumnString::create(); - vectorized::DataTypePtr type2(std::make_shared()); - for (int i = 0; i < 1000; ++i) { - std::string val = "test" + std::to_string(i); - col2->insert_data(val.c_str(), val.length()); + // Test columns_bytes output + std::string bytes_info = block.columns_bytes(); + EXPECT_TRUE(bytes_info.find("column bytes") != std::string::npos); } - block.insert({col2->get_ptr(), type2, "col2"}); - // Test with two valid columns - size_t bytes_two_cols = block.bytes(); - EXPECT_GT(bytes_two_cols, bytes_one_col); + // Test with nullable columns + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = vectorized::make_nullable(base_type); - // Test after erasing first column - block.erase(0); - EXPECT_EQ(block.bytes(), col2->byte_size()); + // Add first nullable column + auto col1 = vectorized::ColumnVector::create(); + for (int i = 0; i < 1000; ++i) { + col1->insert_value(i); + } + auto nullable_col1 = vectorized::make_nullable(col1->get_ptr()); + block.insert({nullable_col1, nullable_type, "nullable_col1"}); - // Test after clearing all columns - block.clear(); - EXPECT_EQ(0, block.bytes()); - EXPECT_EQ(0, block.allocated_bytes()); - EXPECT_EQ("column bytes: []", block.columns_bytes()); - - // Test with multiple nullptr columns - vectorized::Block multi_null_block; - multi_null_block.insert({nullptr, type1, "null_col1"}); - multi_null_block.insert({nullptr, type2, "null_col2"}); - EXPECT_THROW(multi_null_block.bytes(), Exception); + // Test with single nullable column + size_t bytes_one_col = block.bytes(); + size_t allocated_bytes_one_col = block.allocated_bytes(); + EXPECT_GT(bytes_one_col, 0); + EXPECT_GT(allocated_bytes_one_col, 0); + EXPECT_GE(allocated_bytes_one_col, bytes_one_col); + + // Add second nullable column + auto col2 = vectorized::ColumnVector::create(); + for (int i = 0; i < 1000; ++i) { + col2->insert_value(i * 2); + } + auto nullable_col2 = vectorized::make_nullable(col2->get_ptr()); + block.insert({nullable_col2, nullable_type, "nullable_col2"}); + + // Test with two nullable columns + size_t bytes_two_cols = block.bytes(); + EXPECT_GT(bytes_two_cols, bytes_one_col); + + // Test columns_bytes output + std::string bytes_info = block.columns_bytes(); + EXPECT_TRUE(bytes_info.find("column bytes") != std::string::npos); + } + + // Test with nullptr columns + { + auto type = std::make_shared(); + + // Test with single nullptr column + vectorized::Block block_with_null; + block_with_null.insert({nullptr, type, "null_col"}); + EXPECT_THROW(block_with_null.bytes(), Exception); + EXPECT_THROW(block_with_null.columns_bytes(), Exception); + EXPECT_EQ(0, block_with_null.allocated_bytes()); + + // Test with multiple nullptr columns + vectorized::Block multi_null_block; + multi_null_block.insert({nullptr, type, "null_col1"}); + multi_null_block.insert({nullptr, type, "null_col2"}); + EXPECT_THROW(multi_null_block.bytes(), Exception); + EXPECT_THROW(multi_null_block.columns_bytes(), Exception); + EXPECT_EQ(0, multi_null_block.allocated_bytes()); + } + + // Test with mixed column types + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = vectorized::make_nullable(base_type); + + // Add regular column + auto regular_col = vectorized::ColumnVector::create(); + regular_col->insert_value(1); + block.insert({regular_col->get_ptr(), base_type, "regular"}); + + // Add const column + auto const_base = vectorized::ColumnVector::create(); + const_base->insert_value(42); + auto const_col = vectorized::ColumnConst::create(const_base->get_ptr(), 1); + block.insert({const_col->get_ptr(), base_type, "const"}); + + // Add nullable column + auto nullable_base = vectorized::ColumnVector::create(); + nullable_base->insert_value(100); + auto nullable_col = vectorized::make_nullable(nullable_base->get_ptr()); + block.insert({nullable_col, nullable_type, "nullable"}); + + // Test memory operations + EXPECT_GT(block.bytes(), 0); + EXPECT_GT(block.allocated_bytes(), 0); + EXPECT_GE(block.allocated_bytes(), block.bytes()); + + // Test columns_bytes output + std::string bytes_info = block.columns_bytes(); + EXPECT_TRUE(bytes_info.find("column bytes") != std::string::npos); + } } TEST(BlockTest, DumpMethods) { From 8c9e988cb870fd6c6aecc21411d55ccdb3347c3b Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Fri, 29 Nov 2024 15:30:14 +0800 Subject: [PATCH 21/41] Enhance BlockTest with extensive tests for empty, const, nullable, and mixed column types. --- be/test/vec/core/block_test.cpp | 335 +++++++++++++++++++++++--------- 1 file changed, 238 insertions(+), 97 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 7119a1ca10de7a..0b8cf50192c557 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1803,126 +1803,267 @@ TEST(BlockTest, MemoryAndSize) { } TEST(BlockTest, DumpMethods) { - vectorized::Block block; - // Test empty block - EXPECT_EQ("", block.dump_names()); - EXPECT_EQ("", block.dump_types()); - EXPECT_TRUE(block.dump_structure().empty()); + { + vectorized::Block empty_block; + EXPECT_EQ("", empty_block.dump_names()); + EXPECT_EQ("", empty_block.dump_types()); + EXPECT_TRUE(empty_block.dump_structure().empty()); + EXPECT_FALSE(empty_block.dump_data().empty()); + } - // Add first column (Int32) - auto col1 = vectorized::ColumnVector::create(); - vectorized::DataTypePtr type1(std::make_shared()); - col1->insert_value(123); - col1->insert_value(456); - block.insert({col1->get_ptr(), type1, "col1"}); + // Test with regular columns + { + vectorized::Block block; + + // Add Int32 column + auto col1 = vectorized::ColumnVector::create(); + vectorized::DataTypePtr type1(std::make_shared()); + col1->insert_value(123); + col1->insert_value(456); + block.insert({col1->get_ptr(), type1, "col1"}); - // Test single column - EXPECT_EQ("col1", block.dump_names()); - EXPECT_EQ("Int32", block.dump_types()); + // Test single column dumps + EXPECT_EQ("col1", block.dump_names()); + EXPECT_EQ("Int32", block.dump_types()); + EXPECT_FALSE(block.dump_structure().empty()); - // Add second column (String) - auto col2 = vectorized::ColumnString::create(); - vectorized::DataTypePtr type2(std::make_shared()); - col2->insert_data("hello", 5); - col2->insert_data("world", 5); - block.insert({col2->get_ptr(), type2, "col2"}); + // Add String column + auto col2 = vectorized::ColumnString::create(); + vectorized::DataTypePtr type2(std::make_shared()); + col2->insert_data("hello", 5); + col2->insert_data("world", 5); + block.insert({col2->get_ptr(), type2, "col2"}); - // Test multiple columns - EXPECT_EQ("col1, col2", block.dump_names()); - EXPECT_EQ("Int32, String", block.dump_types()); + // Test multiple columns dumps + EXPECT_EQ("col1, col2", block.dump_names()); + EXPECT_EQ("Int32, String", block.dump_types()); - // Test dump_data with different parameters - { - // Default parameters - std::string data = block.dump_data(); - EXPECT_FALSE(data.empty()); - EXPECT_TRUE(data.find("col1(Int32)") != std::string::npos); - EXPECT_TRUE(data.find("col2(String)") != std::string::npos); - EXPECT_TRUE(data.find("123") != std::string::npos); - EXPECT_TRUE(data.find("hello") != std::string::npos); - } + // Test dump_data variations + std::string full_data = block.dump_data(); + EXPECT_FALSE(full_data.empty()); + EXPECT_TRUE(full_data.find("col1(Int32)") != std::string::npos); + EXPECT_TRUE(full_data.find("col2(String)") != std::string::npos); + EXPECT_TRUE(full_data.find("123") != std::string::npos); + EXPECT_TRUE(full_data.find("hello") != std::string::npos); - { - // Test with begin offset - std::string data = block.dump_data(1); - EXPECT_TRUE(data.find("456") != std::string::npos); - EXPECT_TRUE(data.find("world") != std::string::npos); - EXPECT_FALSE(data.find("123") != std::string::npos); - } + std::string offset_data = block.dump_data(1); + EXPECT_TRUE(offset_data.find("456") != std::string::npos); + EXPECT_FALSE(offset_data.find("123") != std::string::npos); - { - // Test with row limit - std::string data = block.dump_data(0, 1); - LOG(INFO) << "dump_data with limit:\n" << data; - EXPECT_TRUE(data.find("123") != std::string::npos); - EXPECT_FALSE(data.find("456") != std::string::npos); + std::string limited_data = block.dump_data(0, 1); + EXPECT_TRUE(limited_data.find("123") != std::string::npos); + EXPECT_FALSE(limited_data.find("456") != std::string::npos); + + // Test dump_one_line + EXPECT_EQ("123 hello", block.dump_one_line(0, 2)); + EXPECT_EQ("456 world", block.dump_one_line(1, 2)); + EXPECT_EQ("123", block.dump_one_line(0, 1)); + + // Test dump_column + std::string int_dump = vectorized::Block::dump_column(col1->get_ptr(), type1); + EXPECT_TRUE(int_dump.find("123") != std::string::npos); + EXPECT_TRUE(int_dump.find("456") != std::string::npos); + + std::string str_dump = vectorized::Block::dump_column(col2->get_ptr(), type2); + EXPECT_TRUE(str_dump.find("hello") != std::string::npos); + EXPECT_TRUE(str_dump.find("world") != std::string::npos); } - // Test dump_one_line + // Test with const columns { - std::string line = block.dump_one_line(0, 2); - EXPECT_EQ("123 hello", line); + vectorized::Block block; + auto type = std::make_shared(); + + // Create and insert const columns + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(42); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 2); + block.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(24); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 2); + block.insert({const_col2->get_ptr(), type, "const_col2"}); + + // Test basic dumps + EXPECT_EQ("const_col1, const_col2", block.dump_names()); + EXPECT_EQ("Int32, Int32", block.dump_types()); + EXPECT_FALSE(block.dump_structure().empty()); + + // Test dump_data variations + std::string full_data = block.dump_data(); + EXPECT_TRUE(full_data.find("42") != std::string::npos); + EXPECT_TRUE(full_data.find("24") != std::string::npos); + + std::string offset_data = block.dump_data(1); + EXPECT_TRUE(offset_data.find("42") != std::string::npos); + EXPECT_TRUE(offset_data.find("24") != std::string::npos); - line = block.dump_one_line(1, 2); - EXPECT_EQ("456 world", line); + std::string limited_data = block.dump_data(0, 1); + EXPECT_TRUE(limited_data.find("42") != std::string::npos); + EXPECT_TRUE(limited_data.find("24") != std::string::npos); - line = block.dump_one_line(0, 1); - EXPECT_EQ("123", line); + // Test dump_one_line + EXPECT_EQ("42 24", block.dump_one_line(0, 2)); + EXPECT_EQ("42 24", block.dump_one_line(1, 2)); + EXPECT_EQ("42", block.dump_one_line(0, 1)); + + // Test dump_column + std::string const_dump1 = vectorized::Block::dump_column(const_col1->get_ptr(), type); + EXPECT_TRUE(const_dump1.find("42") != std::string::npos); + + std::string const_dump2 = vectorized::Block::dump_column(const_col2->get_ptr(), type); + EXPECT_TRUE(const_dump2.find("24") != std::string::npos); } - // Test dump_structure + // Test with nullable columns { - std::string structure = block.dump_structure(); - LOG(INFO) << "Structure:\n" << structure; - EXPECT_TRUE(structure.find("col1") != std::string::npos); - EXPECT_TRUE(structure.find("Int32") != std::string::npos); - EXPECT_TRUE(structure.find("col2") != std::string::npos); - EXPECT_TRUE(structure.find("String") != std::string::npos); - } + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = vectorized::make_nullable(base_type); - // Test with nullable column - auto nullable_type = std::make_shared(type1); - auto null_map = vectorized::ColumnUInt8::create(); - auto nested_col = col1->clone(); - auto nullable_col = - vectorized::ColumnNullable::create(nested_col->get_ptr(), null_map->get_ptr()); - block.insert({nullable_col->get_ptr(), nullable_type, "nullable_col"}); + // Create and insert nullable columns + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(123); + col1->insert_value(456); + auto null_map1 = vectorized::ColumnUInt8::create(); + null_map1->insert_value(0); // Not null + null_map1->insert_value(1); // Null + auto nullable_col1 = vectorized::ColumnNullable::create(col1->get_ptr(), null_map1->get_ptr()); + block.insert({nullable_col1->get_ptr(), nullable_type, "nullable_col1"}); - { - std::string data = block.dump_data(0, 100, true); - LOG(INFO) << "dump_data with nullable:\n" << data; - EXPECT_TRUE(data.find("nullable_col") != std::string::npos); - EXPECT_TRUE(data.find("Nullable(Int32)") != std::string::npos); + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(789); + col2->insert_value(321); + auto null_map2 = vectorized::ColumnUInt8::create(); + null_map2->insert_value(1); // Null + null_map2->insert_value(0); // Not null + auto nullable_col2 = vectorized::ColumnNullable::create(col2->get_ptr(), null_map2->get_ptr()); + block.insert({nullable_col2->get_ptr(), nullable_type, "nullable_col2"}); + + // Test basic dumps + EXPECT_EQ("nullable_col1, nullable_col2", block.dump_names()); + EXPECT_EQ("Nullable(Int32), Nullable(Int32)", block.dump_types()); + EXPECT_FALSE(block.dump_structure().empty()); + + // Test dump_data variations + std::string full_data = block.dump_data(); + EXPECT_TRUE(full_data.find("123") != std::string::npos); + EXPECT_TRUE(full_data.find("NULL") != std::string::npos); + + std::string offset_data = block.dump_data(1); + EXPECT_TRUE(offset_data.find("321") != std::string::npos); + EXPECT_FALSE(offset_data.find("789") != std::string::npos); + + std::string limited_data = block.dump_data(0, 1); + EXPECT_TRUE(limited_data.find("123") != std::string::npos); + EXPECT_TRUE(limited_data.find("NULL") != std::string::npos); + + // Test dump_one_line + EXPECT_EQ("123 NULL", block.dump_one_line(0, 2)); + EXPECT_EQ("NULL 321", block.dump_one_line(1, 2)); + EXPECT_EQ("123", block.dump_one_line(0, 1)); + + // Test dump_column + std::string nullable_dump1 = vectorized::Block::dump_column(nullable_col1->get_ptr(), nullable_type); + EXPECT_TRUE(nullable_dump1.find("123") != std::string::npos); + EXPECT_TRUE(nullable_dump1.find("NULL") != std::string::npos); + + std::string nullable_dump2 = vectorized::Block::dump_column(nullable_col2->get_ptr(), nullable_type); + EXPECT_TRUE(nullable_dump2.find("321") != std::string::npos); + EXPECT_TRUE(nullable_dump2.find("NULL") != std::string::npos); } - // Test dump_column static method + // Test with mixed column types { - // Test Int32 column - std::string int_dump = vectorized::Block::dump_column(col1->get_ptr(), type1); - EXPECT_FALSE(int_dump.empty()); - EXPECT_TRUE(int_dump.find("123") != std::string::npos); - EXPECT_TRUE(int_dump.find("456") != std::string::npos); + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = vectorized::make_nullable(base_type); - // Test String column - std::string str_dump = vectorized::Block::dump_column(col2->get_ptr(), type2); - LOG(INFO) << "String column dump:\n" << str_dump; - EXPECT_FALSE(str_dump.empty()); - EXPECT_TRUE(str_dump.find("hello") != std::string::npos); - EXPECT_TRUE(str_dump.find("world") != std::string::npos); + // Add regular column + auto regular_col = vectorized::ColumnVector::create(); + regular_col->insert_value(1); + regular_col->insert_value(2); + block.insert({regular_col->get_ptr(), base_type, "regular"}); + + // Add const column + auto const_base = vectorized::ColumnVector::create(); + const_base->insert_value(42); + auto const_col = vectorized::ColumnConst::create(const_base->get_ptr(), 2); + block.insert({const_col->get_ptr(), base_type, "const"}); + + // Add nullable column + auto nullable_base = vectorized::ColumnVector::create(); + nullable_base->insert_value(3); + nullable_base->insert_value(4); + auto null_map = vectorized::ColumnUInt8::create(); + null_map->insert_value(0); + null_map->insert_value(1); + auto nullable_col = vectorized::ColumnNullable::create(nullable_base->get_ptr(), null_map->get_ptr()); + block.insert({nullable_col->get_ptr(), nullable_type, "nullable"}); + + // Test basic dumps + EXPECT_EQ("regular, const, nullable", block.dump_names()); + EXPECT_EQ("Int32, Int32, Nullable(Int32)", block.dump_types()); + EXPECT_FALSE(block.dump_structure().empty()); - // Test Nullable column - std::string nullable_dump = - vectorized::Block::dump_column(nullable_col->get_ptr(), nullable_type); - LOG(INFO) << "Nullable column dump:\n" << nullable_dump; - EXPECT_FALSE(nullable_dump.empty()); - EXPECT_FALSE(nullable_dump.find("123") != std::string::npos); - - // Test empty column - auto empty_col = vectorized::ColumnVector::create(); - auto empty_dump = vectorized::Block::dump_column(empty_col->get_ptr(), type1); - LOG(INFO) << "Empty column dump:\n" << empty_dump; - EXPECT_FALSE(empty_dump.empty()); // Should still return formatted empty table + // Test dump_data variations + std::string full_data = block.dump_data(); + EXPECT_TRUE(full_data.find('1') != std::string::npos); + EXPECT_TRUE(full_data.find('4') != std::string::npos); + EXPECT_TRUE(full_data.find('3') != std::string::npos); + + // Test dump_one_line + EXPECT_EQ("1 42 3", block.dump_one_line(0, 3)); + EXPECT_EQ("2 42 NULL", block.dump_one_line(1, 3)); + + // Test dump_column for each type + std::string regular_dump = vectorized::Block::dump_column(regular_col->get_ptr(), base_type); + EXPECT_TRUE(regular_dump.find('1') != std::string::npos); + EXPECT_TRUE(regular_dump.find('2') != std::string::npos); + + std::string const_dump = vectorized::Block::dump_column(const_col->get_ptr(), base_type); + EXPECT_TRUE(const_dump.find("42") != std::string::npos); + + std::string nullable_dump = vectorized::Block::dump_column(nullable_col->get_ptr(), nullable_type); + EXPECT_TRUE(nullable_dump.find('3') != std::string::npos); + EXPECT_TRUE(nullable_dump.find("NULL") != std::string::npos); + } + + // Test with empty columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Add empty regular column + auto empty_regular = vectorized::ColumnVector::create(); + block.insert({empty_regular->get_ptr(), type, "empty_regular"}); + + // Add empty const column + auto empty_const_base = vectorized::ColumnVector::create(); + empty_const_base->insert_value(0); + auto empty_const = vectorized::ColumnConst::create(empty_const_base->get_ptr(), 0); + block.insert({empty_const->get_ptr(), type, "empty_const"}); + + // Test basic dumps + EXPECT_EQ("empty_regular, empty_const", block.dump_names()); + EXPECT_EQ("Int32, Int32", block.dump_types()); + EXPECT_FALSE(block.dump_structure().empty()); + + // Test dump_data + std::string data = block.dump_data(); + EXPECT_FALSE(data.empty()); + + // Test dump_one_line + EXPECT_EQ("0 0", block.dump_one_line(0, 2)); + + // Test dump_column + std::string empty_regular_dump = vectorized::Block::dump_column(empty_regular->get_ptr(), type); + EXPECT_FALSE(empty_regular_dump.empty()); + + std::string empty_const_dump = vectorized::Block::dump_column(empty_const->get_ptr(), type); + EXPECT_FALSE(empty_const_dump.empty()); } } From fb88229df73a4c67e43bf8900f4e4c5ff4eb5cbd Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Fri, 29 Nov 2024 15:37:33 +0800 Subject: [PATCH 22/41] Refactor whitespace in block_test.cpp to improve readability and maintain consistency in test formatting. --- be/test/vec/core/block_test.cpp | 48 +++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 0b8cf50192c557..0a3c9b6b9c542f 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1815,7 +1815,7 @@ TEST(BlockTest, DumpMethods) { // Test with regular columns { vectorized::Block block; - + // Add Int32 column auto col1 = vectorized::ColumnVector::create(); vectorized::DataTypePtr type1(std::make_shared()); @@ -1928,18 +1928,20 @@ TEST(BlockTest, DumpMethods) { col1->insert_value(123); col1->insert_value(456); auto null_map1 = vectorized::ColumnUInt8::create(); - null_map1->insert_value(0); // Not null - null_map1->insert_value(1); // Null - auto nullable_col1 = vectorized::ColumnNullable::create(col1->get_ptr(), null_map1->get_ptr()); + null_map1->insert_value(0); // Not null + null_map1->insert_value(1); // Null + auto nullable_col1 = + vectorized::ColumnNullable::create(col1->get_ptr(), null_map1->get_ptr()); block.insert({nullable_col1->get_ptr(), nullable_type, "nullable_col1"}); auto col2 = vectorized::ColumnVector::create(); col2->insert_value(789); col2->insert_value(321); auto null_map2 = vectorized::ColumnUInt8::create(); - null_map2->insert_value(1); // Null - null_map2->insert_value(0); // Not null - auto nullable_col2 = vectorized::ColumnNullable::create(col2->get_ptr(), null_map2->get_ptr()); + null_map2->insert_value(1); // Null + null_map2->insert_value(0); // Not null + auto nullable_col2 = + vectorized::ColumnNullable::create(col2->get_ptr(), null_map2->get_ptr()); block.insert({nullable_col2->get_ptr(), nullable_type, "nullable_col2"}); // Test basic dumps @@ -1966,11 +1968,13 @@ TEST(BlockTest, DumpMethods) { EXPECT_EQ("123", block.dump_one_line(0, 1)); // Test dump_column - std::string nullable_dump1 = vectorized::Block::dump_column(nullable_col1->get_ptr(), nullable_type); + std::string nullable_dump1 = + vectorized::Block::dump_column(nullable_col1->get_ptr(), nullable_type); EXPECT_TRUE(nullable_dump1.find("123") != std::string::npos); EXPECT_TRUE(nullable_dump1.find("NULL") != std::string::npos); - std::string nullable_dump2 = vectorized::Block::dump_column(nullable_col2->get_ptr(), nullable_type); + std::string nullable_dump2 = + vectorized::Block::dump_column(nullable_col2->get_ptr(), nullable_type); EXPECT_TRUE(nullable_dump2.find("321") != std::string::npos); EXPECT_TRUE(nullable_dump2.find("NULL") != std::string::npos); } @@ -2000,7 +2004,8 @@ TEST(BlockTest, DumpMethods) { auto null_map = vectorized::ColumnUInt8::create(); null_map->insert_value(0); null_map->insert_value(1); - auto nullable_col = vectorized::ColumnNullable::create(nullable_base->get_ptr(), null_map->get_ptr()); + auto nullable_col = + vectorized::ColumnNullable::create(nullable_base->get_ptr(), null_map->get_ptr()); block.insert({nullable_col->get_ptr(), nullable_type, "nullable"}); // Test basic dumps @@ -2019,14 +2024,16 @@ TEST(BlockTest, DumpMethods) { EXPECT_EQ("2 42 NULL", block.dump_one_line(1, 3)); // Test dump_column for each type - std::string regular_dump = vectorized::Block::dump_column(regular_col->get_ptr(), base_type); + std::string regular_dump = + vectorized::Block::dump_column(regular_col->get_ptr(), base_type); EXPECT_TRUE(regular_dump.find('1') != std::string::npos); EXPECT_TRUE(regular_dump.find('2') != std::string::npos); std::string const_dump = vectorized::Block::dump_column(const_col->get_ptr(), base_type); EXPECT_TRUE(const_dump.find("42") != std::string::npos); - std::string nullable_dump = vectorized::Block::dump_column(nullable_col->get_ptr(), nullable_type); + std::string nullable_dump = + vectorized::Block::dump_column(nullable_col->get_ptr(), nullable_type); EXPECT_TRUE(nullable_dump.find('3') != std::string::npos); EXPECT_TRUE(nullable_dump.find("NULL") != std::string::npos); } @@ -2035,33 +2042,34 @@ TEST(BlockTest, DumpMethods) { { vectorized::Block block; auto type = std::make_shared(); - + // Add empty regular column auto empty_regular = vectorized::ColumnVector::create(); block.insert({empty_regular->get_ptr(), type, "empty_regular"}); - + // Add empty const column auto empty_const_base = vectorized::ColumnVector::create(); empty_const_base->insert_value(0); auto empty_const = vectorized::ColumnConst::create(empty_const_base->get_ptr(), 0); block.insert({empty_const->get_ptr(), type, "empty_const"}); - + // Test basic dumps EXPECT_EQ("empty_regular, empty_const", block.dump_names()); EXPECT_EQ("Int32, Int32", block.dump_types()); EXPECT_FALSE(block.dump_structure().empty()); - + // Test dump_data std::string data = block.dump_data(); EXPECT_FALSE(data.empty()); - + // Test dump_one_line EXPECT_EQ("0 0", block.dump_one_line(0, 2)); - + // Test dump_column - std::string empty_regular_dump = vectorized::Block::dump_column(empty_regular->get_ptr(), type); + std::string empty_regular_dump = + vectorized::Block::dump_column(empty_regular->get_ptr(), type); EXPECT_FALSE(empty_regular_dump.empty()); - + std::string empty_const_dump = vectorized::Block::dump_column(empty_const->get_ptr(), type); EXPECT_FALSE(empty_const_dump.empty()); } From a60a460c52bbe0265721286b0f1ad2e298299b98 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Fri, 29 Nov 2024 18:50:24 +0800 Subject: [PATCH 23/41] Enhance BlockTest with extensive tests for empty, regular, const, and nullable columns. --- be/test/vec/core/block_test.cpp | 393 +++++++++++++++++++++----------- 1 file changed, 264 insertions(+), 129 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 0a3c9b6b9c542f..e7a42d6a1080d1 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -2076,163 +2076,298 @@ TEST(BlockTest, DumpMethods) { } TEST(BlockTest, CloneOperations) { - vectorized::Block block; - auto col1 = vectorized::ColumnVector::create(); - auto col2 = vectorized::ColumnVector::create(); - vectorized::DataTypePtr type(std::make_shared()); + // Test with empty block + { + vectorized::Block empty_block; + + // Test clone_empty + auto cloned_empty = empty_block.clone_empty(); + EXPECT_EQ(0, cloned_empty.columns()); + EXPECT_EQ(0, cloned_empty.rows()); + + // Test get_columns and get_columns_and_convert + auto columns = empty_block.get_columns(); + auto converted_columns = empty_block.get_columns_and_convert(); + EXPECT_EQ(0, columns.size()); + EXPECT_EQ(0, converted_columns.size()); + + // Test clone_empty_columns + auto empty_columns = empty_block.clone_empty_columns(); + EXPECT_EQ(0, empty_columns.size()); + + // Test mutate_columns + auto mutable_cols = empty_block.mutate_columns(); + EXPECT_EQ(0, mutable_cols.size()); - col1->insert_value(1); - col2->insert_value(2); - - block.insert({col1->get_ptr(), type, "col1"}); - block.insert({col2->get_ptr(), type, "col2"}); - - // Test clone_empty - auto empty_block = block.clone_empty(); - EXPECT_EQ(block.columns(), empty_block.columns()); - EXPECT_EQ(0, empty_block.rows()); - - // Test get_columns and get_columns_and_convert - auto columns = block.get_columns(); - auto converted_columns = block.get_columns_and_convert(); - EXPECT_EQ(2, columns.size()); - EXPECT_EQ(2, converted_columns.size()); - - // Test clone_empty_columns - auto empty_columns = block.clone_empty_columns(); - EXPECT_EQ(2, empty_columns.size()); - EXPECT_EQ(0, empty_columns[0]->size()); - EXPECT_EQ(0, empty_columns[1]->size()); - - // Test mutate_columns - auto mutable_cols = block.mutate_columns(); - EXPECT_EQ(2, mutable_cols.size()); - - // Test set_columns with const columns - vectorized::Block new_block = block.clone_empty(); - new_block.set_columns(columns); - EXPECT_EQ(block.rows(), new_block.rows()); - EXPECT_EQ(block.columns(), new_block.columns()); - EXPECT_EQ("col1", new_block.get_by_position(0).name); - EXPECT_EQ("col2", new_block.get_by_position(1).name); - EXPECT_EQ(type, new_block.get_by_position(0).type); - EXPECT_EQ(type, new_block.get_by_position(1).type); - EXPECT_EQ(1, assert_cast*>( - new_block.get_by_position(0).column.get()) - ->get_data()[0]); - EXPECT_EQ(2, assert_cast*>( - new_block.get_by_position(1).column.get()) - ->get_data()[0]); - - // Test clone_with_columns - auto cloned_with_cols = block.clone_with_columns(columns); - EXPECT_EQ(block.rows(), cloned_with_cols.rows()); - EXPECT_EQ(block.columns(), cloned_with_cols.columns()); - EXPECT_EQ("col1", cloned_with_cols.get_by_position(0).name); - EXPECT_EQ("col2", cloned_with_cols.get_by_position(1).name); - EXPECT_EQ(type, cloned_with_cols.get_by_position(0).type); - EXPECT_EQ(type, cloned_with_cols.get_by_position(1).type); - EXPECT_EQ(1, assert_cast*>( - cloned_with_cols.get_by_position(0).column.get()) - ->get_data()[0]); - EXPECT_EQ(2, assert_cast*>( - cloned_with_cols.get_by_position(1).column.get()) - ->get_data()[0]); - - // Test clone_without_columns - std::vector column_offset = {0}; - auto partial_block = block.clone_without_columns(&column_offset); - EXPECT_EQ(1, partial_block.columns()); - EXPECT_EQ("col1", partial_block.get_by_position(0).name); - EXPECT_EQ(nullptr, partial_block.get_by_position(0).column.get()); - - // Test set_columns with mutable columns - { - auto mutable_columns = block.clone_empty_columns(); - auto* tmp_col0 = assert_cast*>(mutable_columns[0].get()); - auto* tmp_col1 = assert_cast*>(mutable_columns[1].get()); - tmp_col0->insert_value(3); - tmp_col1->insert_value(4); - block.set_columns(std::move(mutable_columns)); - EXPECT_EQ(1, block.rows()); - EXPECT_EQ(3, assert_cast*>( - block.get_by_position(0).column.get()) + // Test set_columns + vectorized::Block new_block = empty_block.clone_empty(); + new_block.set_columns(columns); + EXPECT_EQ(0, new_block.rows()); + EXPECT_EQ(0, new_block.columns()); + + // Test clone_with_columns + auto cloned_with_cols = empty_block.clone_with_columns(columns); + EXPECT_EQ(0, cloned_with_cols.rows()); + EXPECT_EQ(0, cloned_with_cols.columns()); + + // Test clone_without_columns + std::vector column_offset; + auto partial_block = empty_block.clone_without_columns(&column_offset); + EXPECT_EQ(0, partial_block.columns()); + + // Test copy_block with different combinations + std::vector empty_columns_indices; + auto copy = empty_block.copy_block(empty_columns_indices); + EXPECT_EQ(0, copy.columns()); + EXPECT_EQ(0, copy.rows()); + } + + // Test with regular columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Create and insert regular columns + auto col1 = vectorized::ColumnVector::create(); + auto col2 = vectorized::ColumnVector::create(); + col1->insert_value(1); + col2->insert_value(2); + block.insert({col1->get_ptr(), type, "col1"}); + block.insert({col2->get_ptr(), type, "col2"}); + + // Test clone_empty + auto empty_block = block.clone_empty(); + EXPECT_EQ(block.columns(), empty_block.columns()); + EXPECT_EQ(0, empty_block.rows()); + + // Test get_columns and get_columns_and_convert + auto columns = block.get_columns(); + auto converted_columns = block.get_columns_and_convert(); + EXPECT_EQ(2, columns.size()); + EXPECT_EQ(2, converted_columns.size()); + + // Test clone_empty_columns + auto empty_columns = block.clone_empty_columns(); + EXPECT_EQ(2, empty_columns.size()); + EXPECT_EQ(0, empty_columns[0]->size()); + EXPECT_EQ(0, empty_columns[1]->size()); + + // Test mutate_columns + auto mutable_cols = block.mutate_columns(); + EXPECT_EQ(2, mutable_cols.size()); + + // Test set_columns with const columns + vectorized::Block new_block = block.clone_empty(); + new_block.set_columns(columns); + EXPECT_EQ(block.rows(), new_block.rows()); + EXPECT_EQ(block.columns(), new_block.columns()); + EXPECT_EQ("col1", new_block.get_by_position(0).name); + EXPECT_EQ("col2", new_block.get_by_position(1).name); + EXPECT_EQ(1, assert_cast*>( + new_block.get_by_position(0).column.get()) ->get_data()[0]); - EXPECT_EQ(4, assert_cast*>( - block.get_by_position(1).column.get()) + EXPECT_EQ(2, assert_cast*>( + new_block.get_by_position(1).column.get()) ->get_data()[0]); - } - // Test clone_with_columns with mutable columns - { - auto new_mutable_columns = block.clone_empty_columns(); - auto* tmp_col0 = - assert_cast*>(new_mutable_columns[0].get()); - auto* tmp_col1 = - assert_cast*>(new_mutable_columns[1].get()); - tmp_col0->insert_value(5); - tmp_col1->insert_value(6); - auto cloned_with_mutable = block.clone_with_columns(std::move(new_mutable_columns)); - EXPECT_EQ(1, cloned_with_mutable.rows()); - EXPECT_EQ(5, assert_cast*>( - cloned_with_mutable.get_by_position(0).column.get()) + + // Test clone_with_columns + auto cloned_with_cols = block.clone_with_columns(columns); + EXPECT_EQ(block.rows(), cloned_with_cols.rows()); + EXPECT_EQ(block.columns(), cloned_with_cols.columns()); + EXPECT_EQ(1, assert_cast*>( + cloned_with_cols.get_by_position(0).column.get()) ->get_data()[0]); - EXPECT_EQ(6, assert_cast*>( - cloned_with_mutable.get_by_position(1).column.get()) + EXPECT_EQ(2, assert_cast*>( + cloned_with_cols.get_by_position(1).column.get()) ->get_data()[0]); - } - // Test copy_block - { - // Test copying single column + // Test clone_without_columns + std::vector column_offset = {0}; + auto partial_block = block.clone_without_columns(&column_offset); + EXPECT_EQ(1, partial_block.columns()); + EXPECT_EQ("col1", partial_block.get_by_position(0).name); + EXPECT_EQ(nullptr, partial_block.get_by_position(0).column.get()); + + // Test copy_block with different combinations std::vector single_column = {0}; auto single_copy = block.copy_block(single_column); EXPECT_EQ(1, single_copy.columns()); EXPECT_EQ("col1", single_copy.get_by_position(0).name); - EXPECT_EQ(type, single_copy.get_by_position(0).type); - EXPECT_EQ(3, assert_cast*>( - single_copy.get_by_position(0).column.get()) - ->get_data()[0]); - // Test copying multiple columns std::vector multiple_columns = {0, 1}; auto multi_copy = block.copy_block(multiple_columns); EXPECT_EQ(2, multi_copy.columns()); EXPECT_EQ("col1", multi_copy.get_by_position(0).name); EXPECT_EQ("col2", multi_copy.get_by_position(1).name); - EXPECT_EQ(type, multi_copy.get_by_position(0).type); - EXPECT_EQ(type, multi_copy.get_by_position(1).type); - EXPECT_EQ(3, assert_cast*>( - multi_copy.get_by_position(0).column.get()) - ->get_data()[0]); - EXPECT_EQ(4, assert_cast*>( - multi_copy.get_by_position(1).column.get()) - ->get_data()[0]); - // Test copying columns in different order std::vector reordered_columns = {1, 0}; auto reordered_copy = block.copy_block(reordered_columns); EXPECT_EQ(2, reordered_copy.columns()); EXPECT_EQ("col2", reordered_copy.get_by_position(0).name); EXPECT_EQ("col1", reordered_copy.get_by_position(1).name); - EXPECT_EQ(4, assert_cast*>( - reordered_copy.get_by_position(0).column.get()) - ->get_data()[0]); - EXPECT_EQ(3, assert_cast*>( - reordered_copy.get_by_position(1).column.get()) - ->get_data()[0]); - // Test copying same column multiple times std::vector duplicate_columns = {0, 0}; auto duplicate_copy = block.copy_block(duplicate_columns); EXPECT_EQ(2, duplicate_copy.columns()); EXPECT_EQ("col1", duplicate_copy.get_by_position(0).name); EXPECT_EQ("col1", duplicate_copy.get_by_position(1).name); - EXPECT_EQ(3, assert_cast*>( - duplicate_copy.get_by_position(0).column.get()) - ->get_data()[0]); - EXPECT_EQ(3, assert_cast*>( - duplicate_copy.get_by_position(1).column.get()) - ->get_data()[0]); + } + + // Test with const columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Create and insert const columns + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(42); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 1); + block.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(24); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 1); + block.insert({const_col2->get_ptr(), type, "const_col2"}); + + // Test all clone operations + auto empty_block = block.clone_empty(); + EXPECT_EQ(block.columns(), empty_block.columns()); + EXPECT_EQ(0, empty_block.rows()); + + auto columns = block.get_columns(); + auto converted_columns = block.get_columns_and_convert(); + EXPECT_EQ(2, columns.size()); + EXPECT_EQ(2, converted_columns.size()); + + auto empty_columns = block.clone_empty_columns(); + EXPECT_EQ(2, empty_columns.size()); + EXPECT_EQ(0, empty_columns[0]->size()); + EXPECT_EQ(0, empty_columns[1]->size()); + + auto mutable_cols = block.mutate_columns(); + EXPECT_EQ(2, mutable_cols.size()); + + vectorized::Block new_block = block.clone_empty(); + new_block.set_columns(columns); + EXPECT_EQ(block.rows(), new_block.rows()); + EXPECT_EQ(block.columns(), new_block.columns()); + EXPECT_EQ("const_col1", new_block.get_by_position(0).name); + EXPECT_EQ("const_col2", new_block.get_by_position(1).name); + + auto cloned_with_cols = block.clone_with_columns(columns); + EXPECT_EQ(block.rows(), cloned_with_cols.rows()); + EXPECT_EQ(block.columns(), cloned_with_cols.columns()); + + std::vector column_offset = {0}; + auto partial_block = block.clone_without_columns(&column_offset); + EXPECT_EQ(1, partial_block.columns()); + EXPECT_EQ("const_col1", partial_block.get_by_position(0).name); + EXPECT_EQ(nullptr, partial_block.get_by_position(0).column.get()); + + // Test copy_block with different combinations + std::vector single_column = {0}; + auto single_copy = block.copy_block(single_column); + EXPECT_EQ(1, single_copy.columns()); + EXPECT_EQ("const_col1", single_copy.get_by_position(0).name); + + std::vector multiple_columns = {0, 1}; + auto multi_copy = block.copy_block(multiple_columns); + EXPECT_EQ(2, multi_copy.columns()); + EXPECT_EQ("const_col1", multi_copy.get_by_position(0).name); + EXPECT_EQ("const_col2", multi_copy.get_by_position(1).name); + + std::vector reordered_columns = {1, 0}; + auto reordered_copy = block.copy_block(reordered_columns); + EXPECT_EQ(2, reordered_copy.columns()); + EXPECT_EQ("const_col2", reordered_copy.get_by_position(0).name); + EXPECT_EQ("const_col1", reordered_copy.get_by_position(1).name); + + std::vector duplicate_columns = {0, 0}; + auto duplicate_copy = block.copy_block(duplicate_columns); + EXPECT_EQ(2, duplicate_copy.columns()); + EXPECT_EQ("const_col1", duplicate_copy.get_by_position(0).name); + EXPECT_EQ("const_col1", duplicate_copy.get_by_position(1).name); + } + + // Test with nullable columns + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = vectorized::make_nullable(base_type); + + // Create and insert nullable columns + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + auto null_map1 = vectorized::ColumnUInt8::create(); + null_map1->insert_value(0); // Not null + auto nullable_col1 = vectorized::ColumnNullable::create(col1->get_ptr(), null_map1->get_ptr()); + block.insert({nullable_col1->get_ptr(), nullable_type, "nullable_col1"}); + + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(2); + auto null_map2 = vectorized::ColumnUInt8::create(); + null_map2->insert_value(1); // Null + auto nullable_col2 = vectorized::ColumnNullable::create(col2->get_ptr(), null_map2->get_ptr()); + block.insert({nullable_col2->get_ptr(), nullable_type, "nullable_col2"}); + + // Test all clone operations + auto empty_block = block.clone_empty(); + EXPECT_EQ(block.columns(), empty_block.columns()); + EXPECT_EQ(0, empty_block.rows()); + + auto columns = block.get_columns(); + auto converted_columns = block.get_columns_and_convert(); + EXPECT_EQ(2, columns.size()); + EXPECT_EQ(2, converted_columns.size()); + + auto empty_columns = block.clone_empty_columns(); + EXPECT_EQ(2, empty_columns.size()); + EXPECT_EQ(0, empty_columns[0]->size()); + EXPECT_EQ(0, empty_columns[1]->size()); + + auto mutable_cols = block.mutate_columns(); + EXPECT_EQ(2, mutable_cols.size()); + + vectorized::Block new_block = block.clone_empty(); + new_block.set_columns(columns); + EXPECT_EQ(block.rows(), new_block.rows()); + EXPECT_EQ(block.columns(), new_block.columns()); + EXPECT_EQ("nullable_col1", new_block.get_by_position(0).name); + EXPECT_EQ("nullable_col2", new_block.get_by_position(1).name); + + auto cloned_with_cols = block.clone_with_columns(columns); + EXPECT_EQ(block.rows(), cloned_with_cols.rows()); + EXPECT_EQ(block.columns(), cloned_with_cols.columns()); + + std::vector column_offset = {0}; + auto partial_block = block.clone_without_columns(&column_offset); + EXPECT_EQ(1, partial_block.columns()); + EXPECT_EQ("nullable_col1", partial_block.get_by_position(0).name); + EXPECT_EQ(nullptr, partial_block.get_by_position(0).column.get()); + + // Test copy_block with different combinations + std::vector single_column = {0}; + auto single_copy = block.copy_block(single_column); + EXPECT_EQ(1, single_copy.columns()); + EXPECT_EQ("nullable_col1", single_copy.get_by_position(0).name); + + std::vector multiple_columns = {0, 1}; + auto multi_copy = block.copy_block(multiple_columns); + EXPECT_EQ(2, multi_copy.columns()); + EXPECT_EQ("nullable_col1", multi_copy.get_by_position(0).name); + EXPECT_EQ("nullable_col2", multi_copy.get_by_position(1).name); + + std::vector reordered_columns = {1, 0}; + auto reordered_copy = block.copy_block(reordered_columns); + EXPECT_EQ(2, reordered_copy.columns()); + EXPECT_EQ("nullable_col2", reordered_copy.get_by_position(0).name); + EXPECT_EQ("nullable_col1", reordered_copy.get_by_position(1).name); + + std::vector duplicate_columns = {0, 0}; + auto duplicate_copy = block.copy_block(duplicate_columns); + EXPECT_EQ(2, duplicate_copy.columns()); + EXPECT_EQ("nullable_col1", duplicate_copy.get_by_position(0).name); + EXPECT_EQ("nullable_col1", duplicate_copy.get_by_position(1).name); } } From ee8117110f4bb57cc219379738e1f11c1b566cab Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Mon, 2 Dec 2024 12:22:08 +0800 Subject: [PATCH 24/41] Enhance BlockTest with comprehensive tests for filtering operations on empty, regular, const, and nullable columns. --- be/test/vec/core/block_test.cpp | 882 +++++++++++++++++++++++++------- 1 file changed, 705 insertions(+), 177 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index e7a42d6a1080d1..3a552a1d80ef35 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -2372,242 +2373,769 @@ TEST(BlockTest, CloneOperations) { } TEST(BlockTest, FilterAndSelector) { - auto create_test_block = [](int size) { - vectorized::Block test_block; - auto test_col1 = vectorized::ColumnVector::create(); - auto test_col2 = vectorized::ColumnVector::create(); - auto type = std::make_shared(); + // Test empty block + { + vectorized::Block empty_block; + + // Test filter_block_internal + vectorized::IColumn::Filter filter(0); + EXPECT_NO_THROW(vectorized::Block::filter_block_internal(&empty_block, filter)); + EXPECT_EQ(0, empty_block.rows()); + EXPECT_EQ(0, empty_block.columns()); + + // Test filter_block + std::vector columns_to_filter; + EXPECT_DEATH(vectorized::Block::filter_block(&empty_block, columns_to_filter, 0, 0).ok(), ""); + EXPECT_EQ(0, empty_block.rows()); + + // Test append_to_block_by_selector + vectorized::Block dst_block; + vectorized::MutableBlock dst(&dst_block); + vectorized::IColumn::Selector selector(0); + EXPECT_TRUE(empty_block.append_to_block_by_selector(&dst, selector).ok()); + EXPECT_EQ(0, dst.rows()); + } - for (int i = 0; i < size; ++i) { - test_col1->insert_value(i); - test_col2->insert_value(i * 2); + // Test with regular columns + { + auto create_test_block = [](int size) { + vectorized::Block test_block; + auto test_col1 = vectorized::ColumnVector::create(); + auto test_col2 = vectorized::ColumnVector::create(); + auto type = std::make_shared(); + + for (int i = 0; i < size; ++i) { + test_col1->insert_value(i); + test_col2->insert_value(i * 2); + } + + test_block.insert({test_col1->get_ptr(), type, "col1"}); + test_block.insert({test_col2->get_ptr(), type, "col2"}); + return test_block; + }; + + // Test filter_block_internal with filter only + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); // Initialize with all 1s (keep all rows) + filter[0] = 0; // Filter out first row + filter[5] = 0; // Filter out sixth row + + vectorized::Block::filter_block_internal(&test_block, filter); + EXPECT_EQ(8, test_block.rows()); + + // Verify filtered data for both columns + const auto* filtered_col1 = assert_cast*>( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast*>( + test_block.get_by_position(1).column.get()); + + // Expected values after filtering + std::vector expected_col1 = {1, 2, 3, 4, 6, 7, 8, 9}; + std::vector expected_col2 = {2, 4, 6, 8, 12, 14, 16, 18}; + + for (size_t i = 0; i < expected_col1.size(); ++i) { + EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); + EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + } } - test_block.insert({test_col1->get_ptr(), type, "col1"}); - test_block.insert({test_col2->get_ptr(), type, "col2"}); - return test_block; - }; + // Test filter_block_internal with specific columns + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); + filter[0] = 0; + std::vector columns_to_filter = {0}; // Only filter first column - // Create original block - auto block = create_test_block(10); + vectorized::Block::filter_block_internal(&test_block, columns_to_filter, filter); + EXPECT_EQ(9, test_block.rows()); - // Test filter_block_internal with filter only - { - auto test_block = create_test_block(10); - vectorized::IColumn::Filter filter(10, 1); // Initialize with all 1s (keep all rows) - filter[0] = 0; // Filter out first row - filter[5] = 0; // Filter out sixth row + const auto* filtered_col1 = assert_cast*>( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast*>( + test_block.get_by_position(1).column.get()); + EXPECT_EQ(1, filtered_col1->get_data()[0]); // First column filtered + EXPECT_EQ(0, filtered_col2->get_data()[0]); // Second column unchanged + } - vectorized::Block::filter_block_internal(&test_block, filter); - EXPECT_EQ(8, test_block.rows()); + // Test filter_block_internal with column_to_keep + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); + filter[0] = 0; // Filter out first row + filter[5] = 0; // Filter out sixth row + uint32_t column_to_keep = 1; // Only filter first column, keep the rest columns + + vectorized::Block::filter_block_internal(&test_block, filter, column_to_keep); + + // Verify row count after filtering + EXPECT_EQ(8, test_block.rows()); + EXPECT_EQ(2, test_block.columns()); + + // Verify filtered data for both columns + const auto* filtered_col1 = assert_cast*>( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast*>( + test_block.get_by_position(1).column.get()); + + // Expected values after filtering + std::vector expected_col1 = {1, 2, 3, 4, 6, 7, 8, 9}; + std::vector expected_col2 = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + + // Verify each value in filtered columns + for (size_t i = 0; i < expected_col1.size(); ++i) { + EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); + } + for (size_t i = 0; i < expected_col2.size(); ++i) { + EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + } + } + + // Test filter_block with nullable filter column + { + auto test_block = create_test_block(10); - // Verify filtered data for both columns - const auto* filtered_col1 = assert_cast*>( - test_block.get_by_position(0).column.get()); - const auto* filtered_col2 = assert_cast*>( - test_block.get_by_position(1).column.get()); + // Create nullable filter column + auto nullable_filter = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(10, 1), // all true + vectorized::ColumnVector::create(10, 0) // no nulls + ); + auto filter_type = std::make_shared( + std::make_shared()); - // Expected values after filtering - std::vector expected_col1 = {1, 2, 3, 4, 6, 7, 8, 9}; - std::vector expected_col2 = {2, 4, 6, 8, 12, 14, 16, 18}; + // Add filter column to block + test_block.insert({nullable_filter->get_ptr(), filter_type, "filter"}); - for (size_t i = 0; i < expected_col1.size(); ++i) { - EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); - EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + // Test four-parameter version + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(10, test_block.rows()); // All rows kept + + // Test three-parameter version + auto test_block2 = create_test_block(10); + test_block2.insert({nullable_filter->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(10, test_block2.rows()); // All rows kept } - } - // Test filter_block_internal with specific columns - { - auto test_block = create_test_block(10); - vectorized::IColumn::Filter filter(10, 1); - filter[0] = 0; - std::vector columns_to_filter = {0}; // Only filter first column + // Test filter_block with const filter column + { + auto test_block = create_test_block(10); - vectorized::Block::filter_block_internal(&test_block, columns_to_filter, filter); - EXPECT_EQ(9, test_block.rows()); + // Create const filter column (false) + auto const_filter = vectorized::ColumnConst::create( + vectorized::ColumnVector::create(1, 0), // false + 10); + auto filter_type = std::make_shared(); - const auto* filtered_col1 = assert_cast*>( - test_block.get_by_position(0).column.get()); - const auto* filtered_col2 = assert_cast*>( - test_block.get_by_position(1).column.get()); - EXPECT_EQ(1, filtered_col1->get_data()[0]); // First column filtered - EXPECT_EQ(0, filtered_col2->get_data()[0]); // Second column unchanged - } + // Add filter column to block + test_block.insert({const_filter->get_ptr(), filter_type, "filter"}); - // Test filter_block_internal with column_to_keep - { - auto test_block = create_test_block(10); - vectorized::IColumn::Filter filter(10, 1); - filter[0] = 0; // Filter out first row - filter[5] = 0; // Filter out sixth row - uint32_t column_to_keep = 1; // Only filter first column, keep the rest columns + // Test four-parameter version + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(0, test_block.rows()); // All rows filtered out + + // Test three-parameter version + auto test_block2 = create_test_block(10); + test_block2.insert({const_filter->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(0, test_block2.rows()); // All rows filtered out + } + + // Test filter_block with regular filter column + { + auto test_block = create_test_block(10); + + // Create regular filter column + auto filter_column = vectorized::ColumnVector::create(); + for (size_t i = 0; i < 10; ++i) { + filter_column->insert_value(i % 2); // Keep odd-indexed rows + } + auto filter_type = std::make_shared(); + + // Add filter column to block + test_block.insert({filter_column->get_ptr(), filter_type, "filter"}); - vectorized::Block::filter_block_internal(&test_block, filter, column_to_keep); + // Test four-parameter version + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(5, test_block.rows()); // Half rows kept - // Verify row count after filtering - EXPECT_EQ(8, test_block.rows()); - EXPECT_EQ(2, test_block.columns()); + // Verify filtered data + const auto* filtered_col1 = assert_cast*>( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast*>( + test_block.get_by_position(1).column.get()); - // Verify filtered data for both columns - const auto* filtered_col1 = assert_cast*>( - test_block.get_by_position(0).column.get()); - const auto* filtered_col2 = assert_cast*>( - test_block.get_by_position(1).column.get()); + std::vector expected_col1 = {1, 3, 5, 7, 9}; + std::vector expected_col2 = {2, 6, 10, 14, 18}; - // Expected values after filtering - std::vector expected_col1 = {1, 2, 3, 4, 6, 7, 8, 9}; - std::vector expected_col2 = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + for (size_t i = 0; i < expected_col1.size(); ++i) { + EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); + EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + } - // Verify each value in filtered columns - for (size_t i = 0; i < expected_col1.size(); ++i) { - EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); + // Test three-parameter version + auto test_block2 = create_test_block(10); + test_block2.insert({filter_column->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(5, test_block2.rows()); // Half rows kept + + // Verify filtered data + filtered_col1 = assert_cast*>( + test_block2.get_by_position(0).column.get()); + filtered_col2 = assert_cast*>( + test_block2.get_by_position(1).column.get()); + + for (size_t i = 0; i < expected_col1.size(); ++i) { + EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); + EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + } } - for (size_t i = 0; i < expected_col2.size(); ++i) { - EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + + // Test append_to_block_by_selector + { + auto block = create_test_block(10); + // Create destination block with proper columns + auto type = std::make_shared(); + vectorized::Block dst_block; + dst_block.insert({type->create_column(), type, "col1"}); + dst_block.insert({type->create_column(), type, "col2"}); + vectorized::MutableBlock dst(&dst_block); + + // Create selector to select every other row + vectorized::IColumn::Selector selector(5, 0); + for (size_t i = 0; i < 5; ++i) { + selector[i] = i * 2; // Select rows 0,2,4,6,8 + } + + // Perform selection + EXPECT_TRUE(block.append_to_block_by_selector(&dst, selector).ok()); + EXPECT_EQ(5, dst.rows()); + + // Verify selected data + const vectorized::Block& result_block = dst.to_block(); + + const auto* selected_col1 = assert_cast*>( + result_block.get_by_position(0).column.get()); + const auto* selected_col2 = assert_cast*>( + result_block.get_by_position(1).column.get()); + + // Expected values after selection + std::vector expected_col1 = {0, 2, 4, 6, 8}; + std::vector expected_col2 = {0, 4, 8, 12, 16}; + + for (size_t i = 0; i < expected_col1.size(); ++i) { + EXPECT_EQ(expected_col1[i], selected_col1->get_data()[i]); + EXPECT_EQ(expected_col2[i], selected_col2->get_data()[i]); + } } } - // Test filter_block with nullable filter column + // Test with const columns { - auto test_block = create_test_block(10); + auto create_test_block = [](int size) { + vectorized::Block test_block; + auto type = std::make_shared(); - // Create nullable filter column - auto nullable_filter = vectorized::ColumnNullable::create( - vectorized::ColumnVector::create(10, 1), // all true - vectorized::ColumnVector::create(10, 0) // no nulls - ); - auto filter_type = std::make_shared( - std::make_shared()); + // Create const columns with fixed values + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(42); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), size); - // Add filter column to block - test_block.insert({nullable_filter->get_ptr(), filter_type, "filter"}); + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(24); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), size); - // Test four-parameter version - std::vector columns_to_filter = {0, 1}; - EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); - EXPECT_EQ(10, test_block.rows()); // All rows kept + test_block.insert({const_col1->get_ptr(), type, "const_col1"}); + test_block.insert({const_col2->get_ptr(), type, "const_col2"}); + return test_block; + }; - // Test three-parameter version - auto test_block2 = create_test_block(10); - test_block2.insert({nullable_filter->get_ptr(), filter_type, "filter"}); - EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); - EXPECT_EQ(10, test_block2.rows()); // All rows kept - } + // Test filter_block_internal with filter only + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); + filter[0] = 0; + filter[5] = 0; - // Test filter_block with const filter column - { - auto test_block = create_test_block(10); + vectorized::Block::filter_block_internal(&test_block, filter); + EXPECT_EQ(8, test_block.rows()); - // Create const filter column (false) - auto const_filter = vectorized::ColumnConst::create( - vectorized::ColumnVector::create(1, 0), // false - 10); - auto filter_type = std::make_shared(); + const auto* filtered_col1 = assert_cast( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast( + test_block.get_by_position(1).column.get()); - // Add filter column to block - test_block.insert({const_filter->get_ptr(), filter_type, "filter"}); + EXPECT_EQ(42, filtered_col1->get_int(0)); + EXPECT_EQ(24, filtered_col2->get_int(0)); + EXPECT_EQ(8, filtered_col1->size()); + EXPECT_EQ(8, filtered_col2->size()); + } - // Test four-parameter version - std::vector columns_to_filter = {0, 1}; - EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); - EXPECT_EQ(0, test_block.rows()); // All rows filtered out + // Test filter_block_internal with specific columns + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); + filter[0] = 0; + std::vector columns_to_filter = {0}; - // Test three-parameter version - auto test_block2 = create_test_block(10); - test_block2.insert({const_filter->get_ptr(), filter_type, "filter"}); - EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); - EXPECT_EQ(0, test_block2.rows()); // All rows filtered out - } + vectorized::Block::filter_block_internal(&test_block, columns_to_filter, filter); + EXPECT_EQ(9, test_block.rows()); - // Test filter_block with regular filter column - { - auto test_block = create_test_block(10); + const auto* filtered_col1 = assert_cast( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast( + test_block.get_by_position(1).column.get()); - // Create regular filter column - auto filter_column = vectorized::ColumnVector::create(); - for (size_t i = 0; i < 10; ++i) { - filter_column->insert_value(i % 2); // Keep odd-indexed rows + EXPECT_EQ(42, filtered_col1->get_int(0)); + EXPECT_EQ(24, filtered_col2->get_int(0)); + EXPECT_EQ(9, filtered_col1->size()); + EXPECT_EQ(10, filtered_col2->size()); // Second column unchanged } - auto filter_type = std::make_shared(); - // Add filter column to block - test_block.insert({filter_column->get_ptr(), filter_type, "filter"}); + // Test filter_block_internal with column_to_keep + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); + filter[0] = 0; + filter[5] = 0; + uint32_t column_to_keep = 1; - // Test four-parameter version - std::vector columns_to_filter = {0, 1}; - EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); - EXPECT_EQ(5, test_block.rows()); // Half rows kept + vectorized::Block::filter_block_internal(&test_block, filter, column_to_keep); + EXPECT_EQ(8, test_block.rows()); + EXPECT_EQ(2, test_block.columns()); - // Verify filtered data - const auto* filtered_col1 = assert_cast*>( - test_block.get_by_position(0).column.get()); - const auto* filtered_col2 = assert_cast*>( - test_block.get_by_position(1).column.get()); + const auto* filtered_col1 = assert_cast( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast( + test_block.get_by_position(1).column.get()); - std::vector expected_col1 = {1, 3, 5, 7, 9}; - std::vector expected_col2 = {2, 6, 10, 14, 18}; + EXPECT_EQ(42, filtered_col1->get_int(0)); + EXPECT_EQ(24, filtered_col2->get_int(0)); + EXPECT_EQ(8, filtered_col1->size()); + EXPECT_EQ(10, filtered_col2->size()); // Second column unchanged + } - for (size_t i = 0; i < expected_col1.size(); ++i) { - EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); - EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + // Test filter_block with nullable filter column + { + auto test_block = create_test_block(10); + + auto nullable_filter = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(10, 1), + vectorized::ColumnVector::create(10, 0) + ); + auto filter_type = std::make_shared( + std::make_shared()); + + test_block.insert({nullable_filter->get_ptr(), filter_type, "filter"}); + + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(10, test_block.rows()); + + auto test_block2 = create_test_block(10); + test_block2.insert({nullable_filter->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(10, test_block2.rows()); } - // Test three-parameter version - auto test_block2 = create_test_block(10); - test_block2.insert({filter_column->get_ptr(), filter_type, "filter"}); - EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); - EXPECT_EQ(5, test_block2.rows()); // Half rows kept + // Test filter_block with const filter column + { + auto test_block = create_test_block(10); - // Verify filtered data - filtered_col1 = assert_cast*>( - test_block2.get_by_position(0).column.get()); - filtered_col2 = assert_cast*>( - test_block2.get_by_position(1).column.get()); + auto const_filter = vectorized::ColumnConst::create( + vectorized::ColumnVector::create(1, 0), + 10); + auto filter_type = std::make_shared(); - for (size_t i = 0; i < expected_col1.size(); ++i) { - EXPECT_EQ(expected_col1[i], filtered_col1->get_data()[i]); - EXPECT_EQ(expected_col2[i], filtered_col2->get_data()[i]); + test_block.insert({const_filter->get_ptr(), filter_type, "filter"}); + + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(0, test_block.rows()); + + auto test_block2 = create_test_block(10); + test_block2.insert({const_filter->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(0, test_block2.rows()); + } + // Test filter_block with regular filter column + { + auto test_block = create_test_block(10); + + // Create regular filter column + auto filter_column = vectorized::ColumnVector::create(); + for (size_t i = 0; i < 10; ++i) { + filter_column->insert_value(i % 2); // Keep odd-indexed rows + } + auto filter_type = std::make_shared(); + + // Add filter column to block + test_block.insert({filter_column->get_ptr(), filter_type, "filter"}); + + // Test four-parameter version + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(5, test_block.rows()); // Half rows kept + + // Verify filtered data + const auto* filtered_col1 = assert_cast( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast( + test_block.get_by_position(1).column.get()); + + EXPECT_EQ(42, filtered_col1->get_int(0)); + EXPECT_EQ(24, filtered_col2->get_int(0)); + EXPECT_EQ(5, filtered_col1->size()); + EXPECT_EQ(5, filtered_col2->size()); + + // Test three-parameter version + auto test_block2 = create_test_block(10); + test_block2.insert({filter_column->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(5, test_block2.rows()); // Half rows kept + + // Verify filtered data + filtered_col1 = assert_cast( + test_block2.get_by_position(0).column.get()); + filtered_col2 = assert_cast( + test_block2.get_by_position(1).column.get()); + + EXPECT_EQ(42, filtered_col1->get_int(0)); + EXPECT_EQ(24, filtered_col2->get_int(0)); + EXPECT_EQ(5, filtered_col1->size()); + EXPECT_EQ(5, filtered_col2->size()); + } + + // Test append_to_block_by_selector + { + auto block = create_test_block(10); + // Create destination block with proper columns + auto type = std::make_shared(); + vectorized::Block dst_block; + dst_block.insert({type->create_column(), type, "const_col1"}); + dst_block.insert({type->create_column(), type, "const_col2"}); + vectorized::MutableBlock dst(&dst_block); + + // Create selector to select every other row + vectorized::IColumn::Selector selector(5, 0); + for (size_t i = 0; i < 5; ++i) { + selector[i] = i * 2; // Select rows 0,2,4,6,8 + } + + // Perform selection + EXPECT_TRUE(block.append_to_block_by_selector(&dst, selector).ok()); + // Skip const columns + EXPECT_EQ(0, dst.rows()); + + // Verify selected data + const vectorized::Block& result_block = dst.to_block(); + + const auto* selected_col1 = assert_cast*>( + result_block.get_by_position(0).column.get()); + const auto* selected_col2 = assert_cast*>( + result_block.get_by_position(1).column.get()); + + EXPECT_EQ(0, selected_col1->get_int(0)); + EXPECT_EQ(0, selected_col2->get_int(0)); + EXPECT_EQ(0, selected_col1->size()); + EXPECT_EQ(0, selected_col2->size()); } } - // Test append_to_block_by_selector + // Test with nullable columns { - // Create destination block with proper columns - auto type = std::make_shared(); - vectorized::Block dst_block; - dst_block.insert({type->create_column(), type, "col1"}); - dst_block.insert({type->create_column(), type, "col2"}); - vectorized::MutableBlock dst(&dst_block); + auto create_test_block = [](int size) { + vectorized::Block test_block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Create nullable columns + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + + // Insert test data + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + + for (int i = 0; i < size; ++i) { + nested1->insert_value(i); + nested2->insert_value(i * 2); + null_map1->insert_value(i % 2); // Even rows are not null + null_map2->insert_value((i + 1) % 2); // Odd rows are not null + } - // Create selector to select every other row - vectorized::IColumn::Selector selector(5, 0); - for (size_t i = 0; i < 5; ++i) { - selector[i] = i * 2; // Select rows 0,2,4,6,8 + test_block.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); + test_block.insert({col2->get_ptr(), nullable_type, "nullable_col2"}); + return test_block; + }; + + // Test filter_block_internal with filter only + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); + filter[0] = 0; + filter[5] = 0; + + vectorized::Block::filter_block_internal(&test_block, filter); + EXPECT_EQ(8, test_block.rows()); + + const auto* filtered_col1 = assert_cast( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast( + test_block.get_by_position(1).column.get()); + // Verify filtered data + for (size_t i = 0; i < 8; ++i) { + size_t original_row = (i < 4) ? i + 1 : i + 2; + bool expected_null_col1 = original_row % 2; + bool expected_null_col2 = (original_row + 1) % 2; + EXPECT_EQ(expected_null_col1, filtered_col1->is_null_at(i)); + EXPECT_EQ(expected_null_col2, filtered_col2->is_null_at(i)); + + if (!filtered_col1->is_null_at(i)) { + EXPECT_EQ(original_row, assert_cast*>( + filtered_col1->get_nested_column_ptr().get())->get_data()[i]); + } + if (!filtered_col2->is_null_at(i)) { + EXPECT_EQ(original_row * 2, assert_cast*>( + filtered_col2->get_nested_column_ptr().get())->get_data()[i]); + } + } } - // Perform selection - EXPECT_TRUE(block.append_to_block_by_selector(&dst, selector).ok()); - EXPECT_EQ(5, dst.rows()); + // Test filter_block_internal with specific columns + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); + filter[0] = 0; + std::vector columns_to_filter = {0}; + + vectorized::Block::filter_block_internal(&test_block, columns_to_filter, filter); + EXPECT_EQ(9, test_block.rows()); + + const auto* filtered_col1 = assert_cast( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast( + test_block.get_by_position(1).column.get()); + + // Verify filtered data for col1 + for (size_t i = 0; i < 9; ++i) { + size_t original_row = i + 1; + EXPECT_EQ(original_row % 2, filtered_col1->is_null_at(i)); + if (!filtered_col1->is_null_at(i)) { + EXPECT_EQ(original_row, assert_cast*>( + filtered_col1->get_nested_column_ptr().get())->get_data()[i]); + } + } - // Verify selected data - const vectorized::Block& result_block = dst.to_block(); + // Verify col2 remains unchanged + for (size_t i = 0; i < 10; ++i) { + EXPECT_EQ((i + 1) % 2, filtered_col2->is_null_at(i)); + if (!filtered_col2->is_null_at(i)) { + EXPECT_EQ(i * 2, assert_cast*>( + filtered_col2->get_nested_column_ptr().get())->get_data()[i]); + } + } + } + // Test filter_block_internal with column_to_keep + { + auto test_block = create_test_block(10); + vectorized::IColumn::Filter filter(10, 1); + filter[0] = 0; + filter[5] = 0; + uint32_t column_to_keep = 1; + + vectorized::Block::filter_block_internal(&test_block, filter, column_to_keep); + EXPECT_EQ(8, test_block.rows()); + + const auto* filtered_col1 = assert_cast( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast( + test_block.get_by_position(1).column.get()); + + // Verify filtered data for col1 + for (size_t i = 0; i < 8; ++i) { + size_t original_row = (i < 4) ? i + 1 : i + 2; + bool expected_null_col1 = original_row % 2; + EXPECT_EQ(expected_null_col1, filtered_col1->is_null_at(i)); + if (!filtered_col1->is_null_at(i)) { + EXPECT_EQ(original_row, assert_cast*>( + filtered_col1->get_nested_column_ptr().get())->get_data()[i]); + } + } - const auto* selected_col1 = assert_cast*>( - result_block.get_by_position(0).column.get()); - const auto* selected_col2 = assert_cast*>( - result_block.get_by_position(1).column.get()); + // Verify col2 remains unchanged + for (size_t i = 0; i < 10; ++i) { + EXPECT_EQ((i + 1) % 2, filtered_col2->is_null_at(i)); + if (!filtered_col2->is_null_at(i)) { + EXPECT_EQ(i * 2, assert_cast*>( + filtered_col2->get_nested_column_ptr().get())->get_data()[i]); + } + } + } + + // Test filter_block with nullable filter column + { + auto test_block = create_test_block(10); + + // Create nullable filter column + auto nullable_filter = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(10, 1), + vectorized::ColumnVector::create(10, 0) + ); + auto filter_type = std::make_shared( + std::make_shared()); + + test_block.insert({nullable_filter->get_ptr(), filter_type, "filter"}); + + // Test four-parameter version + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(10, test_block.rows()); // All rows kept + + // Test three-parameter version + auto test_block2 = create_test_block(10); + test_block2.insert({nullable_filter->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(10, test_block2.rows()); // All rows kept + } + + // Test filter_block with const filter column + { + auto test_block = create_test_block(10); + + // Create const filter column (false) + auto const_filter = vectorized::ColumnConst::create( + vectorized::ColumnVector::create(1, 0), + 10); + auto filter_type = std::make_shared(); + + test_block.insert({const_filter->get_ptr(), filter_type, "filter"}); + + // Test four-parameter version + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(0, test_block.rows()); // All rows filtered out + + // Test three-parameter version + auto test_block2 = create_test_block(10); + test_block2.insert({const_filter->get_ptr(), filter_type, "filter"}); + EXPECT_TRUE(vectorized::Block::filter_block(&test_block2, 2, 2).ok()); + EXPECT_EQ(0, test_block2.rows()); // All rows filtered out + } + + // Test filter_block with regular filter column + { + auto test_block = create_test_block(10); - // Expected values after selection - std::vector expected_col1 = {0, 2, 4, 6, 8}; - std::vector expected_col2 = {0, 4, 8, 12, 16}; + // Create regular filter column + auto filter_column = vectorized::ColumnVector::create(); + for (size_t i = 0; i < 10; ++i) { + filter_column->insert_value(i % 2); // Keep odd-indexed rows + } + auto filter_type = std::make_shared(); + + test_block.insert({filter_column->get_ptr(), filter_type, "filter"}); + + // Test four-parameter version + std::vector columns_to_filter = {0, 1}; + EXPECT_TRUE(vectorized::Block::filter_block(&test_block, columns_to_filter, 2, 2).ok()); + EXPECT_EQ(5, test_block.rows()); // Half rows kept - for (size_t i = 0; i < expected_col1.size(); ++i) { - EXPECT_EQ(expected_col1[i], selected_col1->get_data()[i]); - EXPECT_EQ(expected_col2[i], selected_col2->get_data()[i]); + // Verify filtered data + const auto* filtered_col1 = assert_cast( + test_block.get_by_position(0).column.get()); + const auto* filtered_col2 = assert_cast( + test_block.get_by_position(1).column.get()); + + for (size_t i = 0; i < 5; ++i) { + size_t original_row = i * 2 + 1; + EXPECT_EQ(original_row % 2, filtered_col1->is_null_at(i)); + EXPECT_EQ((original_row + 1) % 2, filtered_col2->is_null_at(i)); + + if (!filtered_col1->is_null_at(i)) { + EXPECT_EQ(original_row, assert_cast*>( + filtered_col1->get_nested_column_ptr().get())->get_data()[i]); + } + if (!filtered_col2->is_null_at(i)) { + EXPECT_EQ(original_row * 2, assert_cast*>( + filtered_col2->get_nested_column_ptr().get())->get_data()[i]); + } + } + } + // Test append_to_block_by_selector + { + auto block = create_test_block(10); + + // Create destination block with proper columns + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + vectorized::Block dst_block; + + // Create nullable columns for destination + auto dst_col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto dst_col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + + dst_block.insert({dst_col1->get_ptr(), nullable_type, "nullable_col1"}); + dst_block.insert({dst_col2->get_ptr(), nullable_type, "nullable_col2"}); + vectorized::MutableBlock dst(&dst_block); + + // Create selector to select specific rows + vectorized::IColumn::Selector selector(5); + for (size_t i = 0; i < 5; ++i) { + selector[i] = i * 2; // Select rows 0,2,4,6,8 + } + + // Perform selection + EXPECT_TRUE(block.append_to_block_by_selector(&dst, selector).ok()); + EXPECT_EQ(5, dst.rows()); + + // Verify selected data + const vectorized::Block& result_block = dst.to_block(); + + const auto* selected_col1 = assert_cast( + result_block.get_by_position(0).column.get()); + const auto* selected_col2 = assert_cast( + result_block.get_by_position(1).column.get()); + + // Verify data and null map for selected rows + for (size_t i = 0; i < 5; ++i) { + size_t original_row = i * 2; + + // Verify null flags + EXPECT_EQ(original_row % 2, selected_col1->is_null_at(i)); + EXPECT_EQ((original_row + 1) % 2, selected_col2->is_null_at(i)); + + // Verify values for non-null elements + if (!selected_col1->is_null_at(i)) { + EXPECT_EQ(original_row, assert_cast*>( + selected_col1->get_nested_column_ptr().get())->get_data()[i]); + } + if (!selected_col2->is_null_at(i)) { + EXPECT_EQ(original_row * 2, assert_cast*>( + selected_col2->get_nested_column_ptr().get())->get_data()[i]); + } + } } } } + TEST(BlockTest, RowCheck) { vectorized::Block block; auto type = std::make_shared(); From e17870e83891b03b05f9d97c8abe5626917bdea1 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Mon, 2 Dec 2024 20:16:17 +0800 Subject: [PATCH 25/41] Enhance BlockTest with extensive tests for row operations on empty, regular, const, and nullable columns. --- be/test/vec/core/block_test.cpp | 180 +++++++++++++++++++++++++++----- 1 file changed, 155 insertions(+), 25 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 3a552a1d80ef35..b1006e7f2bebb2 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3137,42 +3137,172 @@ TEST(BlockTest, FilterAndSelector) { } TEST(BlockTest, RowCheck) { - vectorized::Block block; - auto type = std::make_shared(); - - // Add columns with same number of rows + // Test with empty block { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - block.insert({std::move(col1), type, "col1"}); + vectorized::Block empty_block; + + // Test row number check + EXPECT_NO_THROW(empty_block.check_number_of_rows()); + EXPECT_EQ(0, empty_block.rows()); + + // Test clear operations + empty_block.clear_column_data(1); + EXPECT_EQ(0, empty_block.columns()); + + empty_block.clear(); + EXPECT_EQ(0, empty_block.columns()); + + // Test swap operations + vectorized::Block other_empty_block; + empty_block.swap(other_empty_block); + EXPECT_EQ(0, empty_block.columns()); + EXPECT_EQ(0, other_empty_block.columns()); } + // Test with regular columns { - auto col2 = vectorized::ColumnVector::create(); - block.insert({std::move(col2), type, "col2"}); - } + vectorized::Block block; + auto type = std::make_shared(); + + // Test row number check with different row counts + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "col1"}); + + auto col2 = vectorized::ColumnVector::create(); + block.insert({std::move(col2), type, "col2"}); - // Test row number check - EXPECT_THROW(block.check_number_of_rows(), Exception); + EXPECT_THROW(block.check_number_of_rows(), Exception); + } - // Test clear operations - block.clear_column_data(1); // Clear first column and delete the rest columns - EXPECT_EQ(1, block.columns()); + // Test clear operations + { + block.clear_column_data(1); + EXPECT_EQ(1, block.columns()); - block.clear(); - EXPECT_EQ(0, block.columns()); + block.clear(); + EXPECT_EQ(0, block.columns()); + } + + // Test swap operations + { + vectorized::Block other_block; + auto col = vectorized::ColumnVector::create(); + col->insert_value(1); + other_block.insert({std::move(col), type, "col1"}); + + block.swap(other_block); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, other_block.columns()); + } + } - // Test swap operations - vectorized::Block other_block; + // Test with const columns { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - other_block.insert({std::move(col1), type, "col1"}); + vectorized::Block block; + auto type = std::make_shared(); + + // Test row number check with const columns + { + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(42); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 5); + block.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(24); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); + block.insert({const_col2->get_ptr(), type, "const_col2"}); + + EXPECT_NO_THROW(block.check_number_of_rows()); + EXPECT_EQ(5, block.rows()); + } + + // Test clear operations + { + block.clear_column_data(1); + EXPECT_EQ(1, block.columns()); + + block.clear(); + EXPECT_EQ(0, block.columns()); + } + + // Test swap operations + { + vectorized::Block other_block; + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(42); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); + other_block.insert({const_col->get_ptr(), type, "const_col1"}); + + block.swap(other_block); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, other_block.columns()); + } } - block.swap(other_block); - EXPECT_EQ(1, block.columns()); - EXPECT_EQ(0, other_block.columns()); + // Test with nullable columns + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Test row number check with nullable columns + { + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + + // Need to cast to concrete type before calling insert_value + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(1); + null_map1->insert_value(0); + + block.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); + + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + block.insert({col2->get_ptr(), nullable_type, "nullable_col2"}); + + EXPECT_THROW(block.check_number_of_rows(), Exception); + } + + // Test clear operations + { + block.clear_column_data(1); + EXPECT_EQ(1, block.columns()); + + block.clear(); + EXPECT_EQ(0, block.columns()); + } + + // Test swap operations + { + vectorized::Block other_block; + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + + // Need to cast to concrete type before calling insert_value + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + nested->insert_value(1); + null_map->insert_value(0); + + other_block.insert({col->get_ptr(), nullable_type, "nullable_col1"}); + + block.swap(other_block); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, other_block.columns()); + } + } } TEST(BlockTest, ClearColumnData) { From 2899971ac70bd5124edebcfc69c7f58aea5a5510 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Sun, 8 Dec 2024 13:23:28 +0800 Subject: [PATCH 26/41] Enhance BlockTest with extensive tests for clearing column data across empty, regular, const, and nullable columns. --- be/test/vec/core/block_test.cpp | 288 +++++++++++++++++++++++--------- 1 file changed, 205 insertions(+), 83 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index b1006e7f2bebb2..7990967d364541 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3306,126 +3306,248 @@ TEST(BlockTest, RowCheck) { } TEST(BlockTest, ClearColumnData) { - auto type = std::make_shared(); - - // Test case 1: Clear with column_size == -1 (clear all data but keep columns) + // Test with empty block { - vectorized::Block block; - - // Insert two columns with data + // Test clear with column_size == -1 { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - col1->insert_value(2); - block.insert({std::move(col1), type, "col1"}); + vectorized::Block block; + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); + + block.clear_column_data(-1); + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); } + + // Test clear with column_size == 0 { - auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(3); - col2->insert_value(4); - block.insert({std::move(col2), type, "col2"}); + vectorized::Block block; + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); + + block.clear_column_data(0); + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); } - EXPECT_EQ(2, block.rows()); - EXPECT_EQ(2, block.columns()); + // Test clear with column_size > 0 + { + vectorized::Block block; + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); - // Clear data with column_size = -1 - block.clear_column_data(-1); + block.clear_column_data(1); + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); + } - // Verify columns are kept but data is cleared - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - EXPECT_EQ(0, block.get_by_position(1).column->size()); - } + // Test clear after insert empty column + { + vectorized::Block block; + auto type = std::make_shared(); + auto col = vectorized::ColumnVector::create(); + block.insert({std::move(col), type, "empty_col"}); - // Test case 2: Clear with specific column_size (remove extra columns) - { - vectorized::Block block; + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); - // Insert three columns - { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - block.insert({std::move(col1), type, "col1"}); - } - { - auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(2); - block.insert({std::move(col2), type, "col2"}); + block.clear_column_data(-1); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); } + + // Test clear after multiple empty columns { - auto col3 = vectorized::ColumnVector::create(); - col3->insert_value(3); - block.insert({std::move(col3), type, "col3"}); - } + vectorized::Block block; + auto type = std::make_shared(); + + for (int i = 0; i < 3; ++i) { + auto col = vectorized::ColumnVector::create(); + block.insert({std::move(col), type, "empty_col" + std::to_string(i)}); + } - EXPECT_EQ(3, block.columns()); + EXPECT_EQ(3, block.columns()); + EXPECT_EQ(0, block.rows()); - // Clear data and keep only 2 columns - block.clear_column_data(2); + // Test clear with different column_size values + block.clear_column_data(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); - // Verify extra columns are removed and remaining data is cleared - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - EXPECT_EQ(0, block.get_by_position(1).column->size()); + block.clear_column_data(-1); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + + block.clear_column_data(0); + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); + } } - // Test case 3: Clear with column_size larger than actual size + // Test with regular columns { - vectorized::Block block; + auto create_test_block = [](int num_columns) { + vectorized::Block block; + auto type = std::make_shared(); + + for (int i = 0; i < num_columns; ++i) { + auto col = vectorized::ColumnVector::create(); + col->insert_value(i + 1); + block.insert({std::move(col), type, "col" + std::to_string(i + 1)}); + } + return block; + }; - // Insert one column + // Test clear with column_size == -1 { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - block.insert({std::move(col1), type, "col1"}); + auto block = create_test_block(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(1, block.rows()); + + block.clear_column_data(-1); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); } - EXPECT_EQ(1, block.columns()); + // Test clear with specific column_size + { + auto block = create_test_block(3); + EXPECT_EQ(3, block.columns()); + + block.clear_column_data(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + } - // Clear data with column_size > actual size - block.clear_column_data(2); + // Test clear with column_size larger than actual size + { + auto block = create_test_block(1); + EXPECT_EQ(1, block.columns()); - // Verify column is kept but data is cleared - EXPECT_EQ(1, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); + block.clear_column_data(2); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + } } - // Test case 4: Clear empty block + // Test with const columns { - vectorized::Block block; - EXPECT_EQ(0, block.columns()); + auto create_test_block = [](int num_columns) { + vectorized::Block block; + auto type = std::make_shared(); + + for (int i = 0; i < num_columns; ++i) { + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(42 + i); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); + block.insert({const_col->get_ptr(), type, "const_col" + std::to_string(i + 1)}); + } + return block; + }; - // Should not crash - block.clear_column_data(-1); - block.clear_column_data(0); - block.clear_column_data(1); + // Test clear with column_size == -1 + { + auto block = create_test_block(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(5, block.rows()); - EXPECT_EQ(0, block.columns()); + block.clear_column_data(-1); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + } + + // Test clear with specific column_size + { + auto block = create_test_block(3); + EXPECT_EQ(3, block.columns()); + + block.clear_column_data(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + } + + // Test clear with column_size larger than actual size + { + auto block = create_test_block(1); + EXPECT_EQ(1, block.columns()); + + block.clear_column_data(2); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + } } - // Test case 5: Verify row_same_bit is cleared + // Test with nullable columns { - vectorized::Block block; + auto create_test_block = [](int num_columns) { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + for (int i = 0; i < num_columns; ++i) { + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + + nested->insert_value(i + 1); + null_map->insert_value(i % 2); + + block.insert({col->get_ptr(), nullable_type, "nullable_col" + std::to_string(i + 1)}); + } + return block; + }; - // Insert column with data + // Test clear with column_size == -1 { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - block.insert({std::move(col1), type, "col1"}); + auto block = create_test_block(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(1, block.rows()); + + block.clear_column_data(-1); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); } - // Set some row_same_bit data (if possible) - // Note: This might need adjustment based on how row_same_bit is actually used - block.clear_column_data(-1); + // Test clear with specific column_size + { + auto block = create_test_block(3); + EXPECT_EQ(3, block.columns()); + + block.clear_column_data(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + } - // Verify everything is cleared - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(1, block.columns()); - // Could add verification for row_same_bit if there's a way to check it + // Test clear with column_size larger than actual size + { + auto block = create_test_block(1); + EXPECT_EQ(1, block.columns()); + + block.clear_column_data(2); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + } } } From ff6abd9422f712f328c90c81285d4bde933c92aa Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 00:24:54 +0800 Subject: [PATCH 27/41] Enhance BlockTest with comprehensive tests for index operations on empty, regular, const, and nullable columns --- be/test/vec/core/block_test.cpp | 208 ++++++++++++++++++++++++++------ 1 file changed, 173 insertions(+), 35 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 7990967d364541..d06f485b5e116c 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3552,51 +3552,189 @@ TEST(BlockTest, ClearColumnData) { } TEST(BlockTest, IndexByName) { - vectorized::Block block; - auto col = vectorized::ColumnVector::create(); - vectorized::DataTypePtr type(std::make_shared()); + // Test with empty block + { + vectorized::Block block; + block.initialize_index_by_name(); + + // Test basic name operations + EXPECT_FALSE(block.has("col1")); + EXPECT_THROW(block.get_position_by_name("col1"), Exception); + EXPECT_THROW(block.get_by_name("col1"), Exception); + EXPECT_EQ(nullptr, block.try_get_by_name("col1")); + } - // Add columns with duplicate names - block.insert({col->get_ptr(), type, "col1"}); - block.insert({col->get_ptr(), type, "col2"}); - block.insert({col->get_ptr(), type, "col1"}); // Duplicate name + // Test with regular columns + { + vectorized::Block block; + auto type = std::make_shared(); - // Test get_position_by_name returns first occurrence - EXPECT_EQ(0, block.get_position_by_name("col1")); - EXPECT_EQ(1, block.get_position_by_name("col2")); + // Add columns with regular values + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "col1"}); - // Initialize index - block.initialize_index_by_name(); + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(2); + block.insert({std::move(col2), type, "col2"}); - // Test get_position_by_name returns last occurrence - EXPECT_EQ(2, block.get_position_by_name("col1")); - EXPECT_EQ(1, block.get_position_by_name("col2")); + auto col3 = vectorized::ColumnVector::create(); + col3->insert_value(3); + block.insert({std::move(col3), type, "col1"}); // Duplicate name + } - // Test has with duplicate names - EXPECT_TRUE(block.has("col1")); - EXPECT_TRUE(block.has("col2")); - EXPECT_FALSE(block.has("col3")); + // Test before index initialization + EXPECT_EQ(0, block.get_position_by_name("col1")); // Returns first occurrence + EXPECT_EQ(1, block.get_position_by_name("col2")); - // Test get_by_name with duplicate names - EXPECT_EQ(0, block.get_by_name("col1").column->size()); - EXPECT_THROW(block.get_by_name("col3"), Exception); + // Test after index initialization + block.initialize_index_by_name(); + EXPECT_EQ(2, block.get_position_by_name("col1")); // Returns last occurrence + EXPECT_EQ(1, block.get_position_by_name("col2")); - // Test try_get_by_name with duplicate names - EXPECT_NE(nullptr, block.try_get_by_name("col1")); - EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); + // Test has() function + EXPECT_TRUE(block.has("col1")); + EXPECT_TRUE(block.has("col2")); + EXPECT_FALSE(block.has("col3")); - // Test after modifying block structure - block.erase(2); // Remove last "col1" - block.initialize_index_by_name(); // Re-initialize index + // Test get_by_name + const auto& col1 = block.get_by_name("col1"); + EXPECT_EQ(1, col1.column->size()); + EXPECT_THROW(block.get_by_name("col3"), Exception); - // Now the first "col1" should be found - EXPECT_EQ(0, block.get_position_by_name("col1")); + // Test try_get_by_name + EXPECT_NE(nullptr, block.try_get_by_name("col1")); + EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); - // Test with empty block - block.clear(); - block.initialize_index_by_name(); - EXPECT_FALSE(block.has("col1")); - EXPECT_THROW(block.get_position_by_name("col1"), Exception); + // Test after structure modification + block.erase(2); // Remove last "col1" + block.initialize_index_by_name(); + EXPECT_EQ(0, block.get_position_by_name("col1")); // Now first "col1" is found + } + + // Test with const columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Add columns with const values + { + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(42); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 5); + block.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(24); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); + block.insert({const_col2->get_ptr(), type, "const_col2"}); + + auto base_col3 = vectorized::ColumnVector::create(); + base_col3->insert_value(33); + auto const_col3 = vectorized::ColumnConst::create(base_col3->get_ptr(), 5); + block.insert({const_col3->get_ptr(), type, "const_col1"}); // Duplicate name + } + + // Test before index initialization + EXPECT_EQ(0, block.get_position_by_name("const_col1")); // Returns first occurrence + EXPECT_EQ(1, block.get_position_by_name("const_col2")); + + // Test after index initialization + block.initialize_index_by_name(); + EXPECT_EQ(2, block.get_position_by_name("const_col1")); // Returns last occurrence + EXPECT_EQ(1, block.get_position_by_name("const_col2")); + + // Test has() function + EXPECT_TRUE(block.has("const_col1")); + EXPECT_TRUE(block.has("const_col2")); + EXPECT_FALSE(block.has("const_col3")); + + // Test get_by_name + const auto& col1 = block.get_by_name("const_col1"); + EXPECT_EQ(5, col1.column->size()); + EXPECT_THROW(block.get_by_name("const_col3"), Exception); + + // Test try_get_by_name + EXPECT_NE(nullptr, block.try_get_by_name("const_col1")); + EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); + + // Test after structure modification + block.erase(2); // Remove last "const_col1" + block.initialize_index_by_name(); + EXPECT_EQ(0, block.get_position_by_name("const_col1")); // Now first "const_col1" is found + } + + // Test with nullable columns + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Add columns with nullable values + { + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(1); + null_map1->insert_value(0); + block.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); + + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + nested2->insert_value(2); + null_map2->insert_value(1); + block.insert({col2->get_ptr(), nullable_type, "nullable_col2"}); + + auto col3 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested3 = assert_cast*>( + col3->get_nested_column_ptr().get()); + auto* null_map3 = assert_cast*>( + col3->get_null_map_column_ptr().get()); + nested3->insert_value(3); + null_map3->insert_value(0); + block.insert({col3->get_ptr(), nullable_type, "nullable_col1"}); // Duplicate name + } + + // Test before index initialization + EXPECT_EQ(0, block.get_position_by_name("nullable_col1")); // Returns first occurrence + EXPECT_EQ(1, block.get_position_by_name("nullable_col2")); + + // Test after index initialization + block.initialize_index_by_name(); + EXPECT_EQ(2, block.get_position_by_name("nullable_col1")); // Returns last occurrence + EXPECT_EQ(1, block.get_position_by_name("nullable_col2")); + + // Test has() function + EXPECT_TRUE(block.has("nullable_col1")); + EXPECT_TRUE(block.has("nullable_col2")); + EXPECT_FALSE(block.has("nullable_col3")); + + // Test get_by_name + const auto& col1 = block.get_by_name("nullable_col1"); + EXPECT_EQ(1, col1.column->size()); + EXPECT_THROW(block.get_by_name("nullable_col3"), Exception); + + // Test try_get_by_name + EXPECT_NE(nullptr, block.try_get_by_name("nullable_col1")); + EXPECT_EQ(nullptr, block.try_get_by_name("non_existent")); + + // Test after structure modification + block.erase(2); // Remove last "nullable_col1" + block.initialize_index_by_name(); + EXPECT_EQ(0, block.get_position_by_name("nullable_col1")); // Now first "nullable_col1" is found + } } TEST(BlockTest, ReplaceIfOverflow) { From 628726fbb9af9d9df84a019e7b625ada3f894dcd Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 00:29:37 +0800 Subject: [PATCH 28/41] Remove obsolete test for ReplaceIfOverflow from BlockTest to streamline test suite --- be/test/vec/core/block_test.cpp | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index d06f485b5e116c..7f78ce84723416 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3737,26 +3737,6 @@ TEST(BlockTest, IndexByName) { } } -TEST(BlockTest, ReplaceIfOverflow) { - vectorized::Block block; - auto col = vectorized::ColumnVector::create(); - vectorized::DataTypePtr type(std::make_shared()); - - // Add some data to the column - auto& data = col->get_data(); - for (int i = 0; i < 100; ++i) { - data.push_back(i); - } - - block.insert({col->get_ptr(), type, "col1"}); - - // Test replace_if_overflow - block.replace_if_overflow(); - - // Verify column is still intact - EXPECT_EQ(100, block.get_by_position(0).column->size()); -} - TEST(BlockTest, ColumnTransformations) { vectorized::Block block; auto type = std::make_shared(); From c30bc593722e4775e195e960d800724c1222a76d Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 00:32:34 +0800 Subject: [PATCH 29/41] Enhance BlockTest with comprehensive tests for shuffling operations on empty, regular, const, and nullable columns, ensuring correct order and data preservation. --- be/test/vec/core/block_test.cpp | 161 +++++++++++++++++++++++++++----- 1 file changed, 136 insertions(+), 25 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 7f78ce84723416..0ca0683c6a295d 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3738,41 +3738,152 @@ TEST(BlockTest, IndexByName) { } TEST(BlockTest, ColumnTransformations) { - vectorized::Block block; - auto type = std::make_shared(); + // Test with empty block + { + vectorized::Block block; + std::vector positions = {}; + block.shuffle_columns(positions); + EXPECT_EQ(0, block.columns()); + } - // Insert columns with unique data + // Test with regular columns { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - block.insert({std::move(col1), type, "col1"}); + vectorized::Block block; + auto type = std::make_shared(); + + // Insert columns with regular values + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block.insert({std::move(col1), type, "col1"}); + + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(2); + block.insert({std::move(col2), type, "col2"}); + } + + // Verify initial order + EXPECT_EQ("col1", block.get_by_position(0).name); + EXPECT_EQ("col2", block.get_by_position(1).name); + + // Test shuffle_columns + std::vector positions = {1, 0}; // change the order of columns + block.shuffle_columns(positions); + + // Verify shuffled order + EXPECT_EQ("col2", block.get_by_position(0).name); + EXPECT_EQ("col1", block.get_by_position(1).name); + + // Verify column data is correctly shuffled + const auto* col1 = assert_cast*>( + block.get_by_position(1).column.get()); + const auto* col2 = assert_cast*>( + block.get_by_position(0).column.get()); + + EXPECT_EQ(1, col1->get_data()[0]); + EXPECT_EQ(2, col2->get_data()[0]); } + + // Test with const columns { - auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(2); - block.insert({std::move(col2), type, "col2"}); + vectorized::Block block; + auto type = std::make_shared(); + + // Insert columns with const values + { + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(42); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 5); + block.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(24); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); + block.insert({const_col2->get_ptr(), type, "const_col2"}); + } + + // Verify initial order + EXPECT_EQ("const_col1", block.get_by_position(0).name); + EXPECT_EQ("const_col2", block.get_by_position(1).name); + + // Test shuffle_columns + std::vector positions = {1, 0}; + block.shuffle_columns(positions); + + // Verify shuffled order + EXPECT_EQ("const_col2", block.get_by_position(0).name); + EXPECT_EQ("const_col1", block.get_by_position(1).name); + + // Verify const values are preserved + const auto* col1 = assert_cast( + block.get_by_position(1).column.get()); + const auto* col2 = assert_cast( + block.get_by_position(0).column.get()); + + EXPECT_EQ(42, col1->get_int(0)); + EXPECT_EQ(24, col2->get_int(0)); } - // Verify initial order - EXPECT_EQ("col1", block.get_by_position(0).name); - EXPECT_EQ("col2", block.get_by_position(1).name); + // Test with nullable columns + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); - // Test shuffle_columns - std::vector positions = {1, 0}; // change the order of columns - block.shuffle_columns(positions); + // Insert columns with nullable values + { + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(1); + null_map1->insert_value(0); + block.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); + + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + nested2->insert_value(2); + null_map2->insert_value(1); + block.insert({col2->get_ptr(), nullable_type, "nullable_col2"}); + } + + // Verify initial order + EXPECT_EQ("nullable_col1", block.get_by_position(0).name); + EXPECT_EQ("nullable_col2", block.get_by_position(1).name); - // Verify shuffled order - EXPECT_EQ("col2", block.get_by_position(0).name); // col2 is now in the first position - EXPECT_EQ("col1", block.get_by_position(1).name); // col1 is now in the second position + // Test shuffle_columns + std::vector positions = {1, 0}; + block.shuffle_columns(positions); - // Verify column data is also correctly shuffled - const auto* col1 = assert_cast*>( - block.get_by_position(1).column.get()); // col1 is now in position 1 - const auto* col2 = assert_cast*>( - block.get_by_position(0).column.get()); // col2 is now in position 0 + // Verify shuffled order + EXPECT_EQ("nullable_col2", block.get_by_position(0).name); + EXPECT_EQ("nullable_col1", block.get_by_position(1).name); - EXPECT_EQ(1, col1->get_data()[0]); // the value of col1 should be 1 - EXPECT_EQ(2, col2->get_data()[0]); // the value of col2 should be 2 + // Verify nullable values and null states are preserved + const auto* col1 = assert_cast( + block.get_by_position(1).column.get()); + const auto* col2 = assert_cast( + block.get_by_position(0).column.get()); + + EXPECT_FALSE(col1->is_null_at(0)); + EXPECT_TRUE(col2->is_null_at(0)); + + const auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + const auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + + EXPECT_EQ(1, nested1->get_data()[0]); + EXPECT_EQ(2, nested2->get_data()[0]); + } } TEST(BlockTest, HashUpdate) { From 72569f377e0e09dfb31ca14583ab0f47b9b17ed0 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 00:39:52 +0800 Subject: [PATCH 30/41] Enhance BlockTest with comprehensive tests for hash updates on empty, regular, const, and nullable columns, ensuring consistent hash values for identical data and different hashes for varying data arrangements. --- be/test/vec/core/block_test.cpp | 396 +++++++++++++++++++++++++------- 1 file changed, 313 insertions(+), 83 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 0ca0683c6a295d..7ca95147c078ca 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3887,117 +3887,347 @@ TEST(BlockTest, ColumnTransformations) { } TEST(BlockTest, HashUpdate) { - // Test case 1: Single column with single value + // Test with empty block { - vectorized::Block block; - auto col = vectorized::ColumnVector::create(); - col->insert_value(42); - auto type = std::make_shared(); - block.insert({std::move(col), type, "col1"}); + // Single empty block + { + vectorized::Block empty_block; + SipHash hash1; + empty_block.update_hash(hash1); + uint64_t hash1_value = hash1.get64(); + + // Same empty block should produce same hash + SipHash hash2; + empty_block.update_hash(hash2); + EXPECT_EQ(hash1_value, hash2.get64()); + } - SipHash hash1; - block.update_hash(hash1); - uint64_t hash1_value = hash1.get64(); + // Multiple empty blocks + { + vectorized::Block block1; + vectorized::Block block2; - // Same data should produce same hash - SipHash hash2; - block.update_hash(hash2); - EXPECT_EQ(hash1_value, hash2.get64()); + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_EQ(hash1.get64(), hash2.get64()); + } } - // Test case 2: Multiple columns + // Test with regular columns { - vectorized::Block block; - auto type = std::make_shared(); - - // First column + // Single column with single value { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - block.insert({std::move(col1), type, "col1"}); + vectorized::Block block; + auto type = std::make_shared(); + + auto col = vectorized::ColumnVector::create(); + col->insert_value(42); + block.insert({std::move(col), type, "col1"}); + + SipHash hash1; + block.update_hash(hash1); + uint64_t hash1_value = hash1.get64(); + + // Same data should produce same hash + SipHash hash2; + block.update_hash(hash2); + EXPECT_EQ(hash1_value, hash2.get64()); } - // Second column + // Multiple columns { - auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(2); - block.insert({std::move(col2), type, "col2"}); + vectorized::Block block1; + auto type = std::make_shared(); + + // Create first block with values [1, 2] + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block1.insert({std::move(col1), type, "col1"}); + + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(2); + block1.insert({std::move(col2), type, "col2"}); + } + + // Create second block with values [2, 1] + vectorized::Block block2; + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(2); + block2.insert({std::move(col1), type, "col1"}); + + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(1); + block2.insert({std::move(col2), type, "col2"}); + } + + // Different order of same values should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); } - SipHash hash1; - block.update_hash(hash1); - uint64_t hash1_value = hash1.get64(); + // Multiple rows + { + vectorized::Block block1; + auto type = std::make_shared(); - // Different order of same values should produce different hash - vectorized::Block block2; + // Create first block with ascending values + { + auto col = vectorized::ColumnVector::create(); + for (int i = 0; i < 5; ++i) { + col->insert_value(i); + } + block1.insert({std::move(col), type, "col1"}); + } + + // Create second block with descending values + vectorized::Block block2; + { + auto col = vectorized::ColumnVector::create(); + for (int i = 4; i >= 0; --i) { + col->insert_value(i); + } + block2.insert({std::move(col), type, "col1"}); + } + + // Different order of same values should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); + } + } + + // Test with const columns + { + // Single column with single value { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(2); - block2.insert({std::move(col1), type, "col1"}); + vectorized::Block block; + auto type = std::make_shared(); + + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(42); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); + block.insert({const_col->get_ptr(), type, "const_col"}); + + SipHash hash1; + block.update_hash(hash1); + uint64_t hash1_value = hash1.get64(); + + // Same data should produce same hash + SipHash hash2; + block.update_hash(hash2); + EXPECT_EQ(hash1_value, hash2.get64()); } + + // Multiple columns { - auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(1); - block2.insert({std::move(col2), type, "col2"}); + vectorized::Block block1; + auto type = std::make_shared(); + + // Create first block with const values [1, 2] + { + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(1); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 5); + block1.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(2); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); + block1.insert({const_col2->get_ptr(), type, "const_col2"}); + } + + // Create second block with const values [2, 1] + vectorized::Block block2; + { + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(2); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 5); + block2.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(1); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); + block2.insert({const_col2->get_ptr(), type, "const_col2"}); + } + + // Different order of same values should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); } - SipHash hash2; - block2.update_hash(hash2); - EXPECT_NE(hash1_value, hash2.get64()); + // Multiple rows (same value repeated) + { + vectorized::Block block1; + auto type = std::make_shared(); + + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(42); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); + block1.insert({const_col->get_ptr(), type, "const_col"}); + + // Create second block with same value but different row count + vectorized::Block block2; + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(42); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 3); + block2.insert({const_col2->get_ptr(), type, "const_col"}); + + // Different row counts should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); + } } - // Test case 3: Multiple rows + // Test with nullable columns { - vectorized::Block block; - auto col = vectorized::ColumnVector::create(); - for (int i = 0; i < 5; ++i) { - col->insert_value(i); + // Single column with single value + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + nested->insert_value(42); + null_map->insert_value(0); + block.insert({col->get_ptr(), nullable_type, "nullable_col"}); + + SipHash hash1; + block.update_hash(hash1); + uint64_t hash1_value = hash1.get64(); + + // Same data should produce same hash + SipHash hash2; + block.update_hash(hash2); + EXPECT_EQ(hash1_value, hash2.get64()); } - auto type = std::make_shared(); - block.insert({std::move(col), type, "col1"}); - SipHash hash1; - block.update_hash(hash1); - uint64_t hash1_value = hash1.get64(); + // Multiple columns + { + vectorized::Block block1; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Create first block with values [1(not null), 2(null)] + { + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(1); + null_map1->insert_value(0); + block1.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); + + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + nested2->insert_value(2); + null_map2->insert_value(1); + block1.insert({col2->get_ptr(), nullable_type, "nullable_col2"}); + } - // Different order of same values should produce different hash - auto col2 = vectorized::ColumnVector::create(); - for (int i = 4; i >= 0; --i) { - col2->insert_value(i); + // Create second block with values [2(null), 1(not null)] + vectorized::Block block2; + { + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(2); + null_map1->insert_value(1); + block2.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); + + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + nested2->insert_value(1); + null_map2->insert_value(0); + block2.insert({col2->get_ptr(), nullable_type, "nullable_col2"}); + } + + // Different order of same values should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); } - vectorized::Block block2; - block2.insert({std::move(col2), type, "col1"}); - SipHash hash2; - block2.update_hash(hash2); - EXPECT_NE(hash1_value, hash2.get64()); - } + // Multiple rows + { + vectorized::Block block1; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); - // Test case 4: Empty block - { - vectorized::Block empty_block; - SipHash hash; - empty_block.update_hash(hash); - // Should not crash - } + // Create first block with ascending values and alternating null flags + { + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + + for (int i = 0; i < 5; ++i) { + nested->insert_value(i); + null_map->insert_value(i % 2); + } + block1.insert({col->get_ptr(), nullable_type, "nullable_col"}); + } - // Test case 5: Nullable column - { - vectorized::Block block; - auto col = vectorized::ColumnVector::create(); - col->insert_value(1); - auto nullable_col = vectorized::make_nullable(std::move(col)); - auto type = vectorized::make_nullable(std::make_shared()); - block.insert({std::move(nullable_col), type, "nullable_col"}); - - SipHash hash1; - block.update_hash(hash1); - uint64_t hash1_value = hash1.get64(); - - // Same nullable column should produce same hash - SipHash hash2; - block.update_hash(hash2); - EXPECT_EQ(hash1_value, hash2.get64()); + // Create second block with descending values and alternating null flags + vectorized::Block block2; + { + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + + for (int i = 4; i >= 0; --i) { + nested->insert_value(i); + null_map->insert_value(i % 2); + } + block2.insert({col->get_ptr(), nullable_type, "nullable_col"}); + } + + // Different order of same values should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); + } } } From 11263c304e6174e337c9e046a14eadc29fb9f636 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 12:58:00 +0800 Subject: [PATCH 31/41] Enhance BlockTest with comprehensive tests for erase operations, compare methods, same bit operations, and temporary column management across empty, regular, const, and nullable columns, ensuring robust functionality and data integrity. --- be/test/vec/core/block_test.cpp | 943 ++++++++++++++++++++++++++++---- 1 file changed, 850 insertions(+), 93 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 7ca95147c078ca..e349e3fd24d1f8 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -4231,37 +4231,554 @@ TEST(BlockTest, HashUpdate) { } } -TEST(BlockTest, BlockOperations) { - // Test erase_useless_column +TEST(BlockTest, EraseUselessColumn) { + // Test with empty block + { + vectorized::Block block; + vectorized::Block::erase_useless_column(&block, 0); + EXPECT_EQ(0, block.columns()); + } + + // Test with regular columns { vectorized::Block block; auto type = std::make_shared(); // Insert three columns + for (int i = 1; i <= 3; ++i) { + auto col = vectorized::ColumnVector::create(); + col->insert_value(i); + block.insert({std::move(col), type, "col" + std::to_string(i)}); + } + + EXPECT_EQ(3, block.columns()); + vectorized::Block::erase_useless_column(&block, 2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ("col1", block.get_by_position(0).name); + EXPECT_EQ("col2", block.get_by_position(1).name); + } + + // Test with const columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Insert three const columns + for (int i = 1; i <= 3; ++i) { + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(i); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); + block.insert({const_col->get_ptr(), type, "const_col" + std::to_string(i)}); + } + + EXPECT_EQ(3, block.columns()); + vectorized::Block::erase_useless_column(&block, 2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ("const_col1", block.get_by_position(0).name); + EXPECT_EQ("const_col2", block.get_by_position(1).name); + } + + // Test with nullable columns + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Insert three nullable columns + for (int i = 1; i <= 3; ++i) { + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + nested->insert_value(i); + null_map->insert_value(i % 2); + block.insert({col->get_ptr(), nullable_type, "nullable_col" + std::to_string(i)}); + } + + EXPECT_EQ(3, block.columns()); + vectorized::Block::erase_useless_column(&block, 2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ("nullable_col1", block.get_by_position(0).name); + EXPECT_EQ("nullable_col2", block.get_by_position(1).name); + } +} + +TEST(BlockTest, CompareAt) { + // Test with empty blocks + { + vectorized::Block block1, block2; + + // Test basic compare_at + EXPECT_EQ(0, block1.compare_at(0, 0, block2, 0)); + EXPECT_DEATH(block1.compare_at(1, 1, block2, 0), ""); + + // Test compare_at with num_columns + EXPECT_DEATH(block1.compare_at(0, 0, 1, block2, 0), ""); + + // Test compare_at with specific columns + std::vector compare_cols = {0}; + EXPECT_DEATH(block1.compare_at(0, 0, &compare_cols, block2, 0), ""); + } + + // Test with regular columns + { + vectorized::Block block1, block2; + auto type = std::make_shared(); + + // Create first block with ascending values { + // First column: [1, 2] auto col1 = vectorized::ColumnVector::create(); col1->insert_value(1); - block.insert({std::move(col1), type, "col1"}); + col1->insert_value(2); + block1.insert({std::move(col1), type, "col1"}); + + // Second column: [3, 4] + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(3); + col2->insert_value(4); + block1.insert({std::move(col2), type, "col2"}); } + + // Create second block with different values { + // First column: [1, 3] + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + col1->insert_value(3); + block2.insert({std::move(col1), type, "col1"}); + + // Second column: [3, 5] auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(2); - block.insert({std::move(col2), type, "col2"}); + col2->insert_value(3); + col2->insert_value(5); + block2.insert({std::move(col2), type, "col2"}); + } + + // Test basic compare_at + EXPECT_EQ(0, block1.compare_at(0, 0, block2, 0)); // Equal rows + EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // [1,3] < [3,5] + EXPECT_GT(block1.compare_at(1, 0, block2, 0), 0); // [2,4] > [1,3] + + // Test compare_at with num_columns + EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 0)); // Compare only first column + + // Test compare_at with specific columns + std::vector compare_cols = {1}; // Compare only second column + EXPECT_EQ(0, block1.compare_at(0, 0, &compare_cols, block2, 0)); + } + + // Test with const columns + { + vectorized::Block block1, block2; + auto type = std::make_shared(); + + // Create first block with const columns + { + // First column: const(1) + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(1); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 2); + block1.insert({const_col1->get_ptr(), type, "col1"}); + + // Second column: const(2) + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(2); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 2); + block1.insert({const_col2->get_ptr(), type, "col2"}); + } + + // Create second block with different const values + { + // First column: const(1) + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(1); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 2); + block2.insert({const_col1->get_ptr(), type, "col1"}); + + // Second column: const(3) + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(3); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 2); + block2.insert({const_col2->get_ptr(), type, "col2"}); + } + + // Test basic compare_at + EXPECT_EQ(-1, block1.compare_at(0, 0, block2, 0)); + EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); + EXPECT_EQ(-1, block1.compare_at(1, 0, block2, 0)); + + // Test compare_at with num_columns + EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 0)); // Compare only first column + + // Test compare_at with specific columns + std::vector compare_cols = {1}; // Compare only second column + EXPECT_LT(block1.compare_at(0, 0, &compare_cols, block2, 0), 0); // const(2) < const(3) + } + + // Test with nullable columns + { + vectorized::Block block1, block2; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Create first block with nullable columns + { + // First column: [1, null] + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(1); + nested1->insert_value(2); + null_map1->insert_value(0); // not null + null_map1->insert_value(1); // null + block1.insert({col1->get_ptr(), nullable_type, "col1"}); + + // Second column: [null, 4] + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + nested2->insert_value(3); + nested2->insert_value(4); + null_map2->insert_value(1); // null + null_map2->insert_value(0); // not null + block1.insert({col2->get_ptr(), nullable_type, "col2"}); + } + + // Create second block with different nullable values + { + // First column: [1, 3] + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(1); + nested1->insert_value(3); + null_map1->insert_value(0); // not null + null_map1->insert_value(0); // not null + block2.insert({col1->get_ptr(), nullable_type, "col1"}); + + // Second column: [3, null] + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + nested2->insert_value(3); + nested2->insert_value(5); + null_map2->insert_value(0); // not null + null_map2->insert_value(1); // null + block2.insert({col2->get_ptr(), nullable_type, "col2"}); + } + + // Test basic compare_at + EXPECT_EQ(0, block1.compare_at(0, 0, block2, 0)); // Equal non-null values + EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // null < non-null + EXPECT_GT(block1.compare_at(1, 0, block2, 0), 0); // non-null > null + + // Test compare_at with num_columns + EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 0)); // Compare only first column + + // Test compare_at with specific columns + std::vector compare_cols = {1}; // Compare only second column + EXPECT_EQ(0, block1.compare_at(0, 0, &compare_cols, block2, 0)); // null > non-null + } +} + +TEST(BlockTest, CompareColumnAt) { + // Test with empty blocks + { + vectorized::Block block1, block2; + EXPECT_DEATH(block1.compare_column_at(0, 0, 0, block2, 0), ""); // 空块不应该比较列 + } + + // Test with regular columns + { + vectorized::Block block1, block2; + auto type = std::make_shared(); + + // Create first block with ascending values + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + col1->insert_value(2); + block1.insert({std::move(col1), type, "col1"}); + + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(3); + col2->insert_value(4); + block1.insert({std::move(col2), type, "col2"}); + } + + // Create second block with different values + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + col1->insert_value(3); + block2.insert({std::move(col1), type, "col1"}); + + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(3); + col2->insert_value(5); + block2.insert({std::move(col2), type, "col2"}); + } + + // Test compare_column_at for each column + EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 0)); // First column, equal values + EXPECT_LT(block1.compare_column_at(0, 1, 0, block2, 1), 0); // First column, 2 < 3 + EXPECT_EQ(0, block1.compare_column_at(0, 0, 1, block2, 0)); // Second column, equal values + EXPECT_LT(block1.compare_column_at(0, 1, 1, block2, 1), 0); // Second column, 4 < 5 + } + + // Test with const columns + { + vectorized::Block block1, block2; + auto type = std::make_shared(); + + // Create first block with const columns + { + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(1); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 2); + block1.insert({const_col1->get_ptr(), type, "col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(2); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 2); + block1.insert({const_col2->get_ptr(), type, "col2"}); + } + + // Create second block with different const values + { + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(1); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 2); + block2.insert({const_col1->get_ptr(), type, "col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(3); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 2); + block2.insert({const_col2->get_ptr(), type, "col2"}); + } + + // Test compare_column_at for each column + EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 0)); // First column, equal values + EXPECT_EQ(0, block1.compare_column_at(1, 1, 0, block2, 1)); // First column, equal values + EXPECT_LT(block1.compare_column_at(0, 0, 1, block2, 0), 0); // Second column, 2 < 3 + EXPECT_LT(block1.compare_column_at(1, 1, 1, block2, 1), 0); // Second column, 2 < 3 + } + + // Test with nullable columns + { + vectorized::Block block1, block2; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Create first block with nullable columns + { + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(1); + nested1->insert_value(2); + null_map1->insert_value(0); // not null + null_map1->insert_value(1); // null + block1.insert({col1->get_ptr(), nullable_type, "col1"}); + + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + nested2->insert_value(3); + nested2->insert_value(4); + null_map2->insert_value(1); // null + null_map2->insert_value(0); // not null + block1.insert({col2->get_ptr(), nullable_type, "col2"}); + } + + // Create second block with different nullable values + { + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(1); + nested1->insert_value(3); + null_map1->insert_value(0); // not null + null_map1->insert_value(0); // not null + block2.insert({col1->get_ptr(), nullable_type, "col1"}); + + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + nested2->insert_value(3); + nested2->insert_value(5); + null_map2->insert_value(0); // not null + null_map2->insert_value(1); // null + block2.insert({col2->get_ptr(), nullable_type, "col2"}); + } + + // Test compare_column_at for each column + EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 0)); // First column, equal non-null values + EXPECT_GT(block1.compare_column_at(1, 1, 0, block2, 1), 0); // First column, null < non-null + EXPECT_EQ(block1.compare_column_at(0, 0, 1, block2, 0), 0); // Second column, non-null > null + EXPECT_LT(block1.compare_column_at(1, 1, 1, block2, 1), 0); // Second column, non-null < null + } +} + +TEST(BlockTest, SameBitOperations) { + // Test with empty block + { + vectorized::Block block; + std::vector same_bits = {}; + block.set_same_bit(same_bits.begin(), same_bits.end()); + EXPECT_FALSE(block.get_same_bit(0)); + } + + // Test with regular columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Create block with data + auto col = vectorized::ColumnVector::create(); + for (int i = 0; i < 3; ++i) { + col->insert_value(i); + } + block.insert({std::move(col), type, "col1"}); + + // Test set_same_bit + std::vector same_bits = {true, false, true}; + block.set_same_bit(same_bits.begin(), same_bits.end()); + + // Test get_same_bit + EXPECT_TRUE(block.get_same_bit(0)); + EXPECT_FALSE(block.get_same_bit(1)); + EXPECT_TRUE(block.get_same_bit(2)); + EXPECT_FALSE(block.get_same_bit(3)); // Out of range + + // Test clear_same_bit + block.clear_same_bit(); + for (int i = 0; i < 3; ++i) { + EXPECT_FALSE(block.get_same_bit(i)); + } + } + + // Test with const columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Create const column + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(1); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 3); + block.insert({const_col->get_ptr(), type, "const_col"}); + + // Test set_same_bit + std::vector same_bits = {true, false, true}; + block.set_same_bit(same_bits.begin(), same_bits.end()); + + // Test get_same_bit + EXPECT_TRUE(block.get_same_bit(0)); + EXPECT_FALSE(block.get_same_bit(1)); + EXPECT_TRUE(block.get_same_bit(2)); + + // Test clear_same_bit + block.clear_same_bit(); + for (int i = 0; i < 3; ++i) { + EXPECT_FALSE(block.get_same_bit(i)); + } + } + + // Test with nullable columns + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Create nullable column + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + for (int i = 0; i < 3; ++i) { + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + nested->insert_value(i); + null_map->insert_value(i % 2); + } + block.insert({col->get_ptr(), nullable_type, "nullable_col"}); + + // Test set_same_bit + std::vector same_bits = {true, false, true}; + block.set_same_bit(same_bits.begin(), same_bits.end()); + + // Test get_same_bit + EXPECT_TRUE(block.get_same_bit(0)); + EXPECT_FALSE(block.get_same_bit(1)); + EXPECT_TRUE(block.get_same_bit(2)); + + // Test clear_same_bit + block.clear_same_bit(); + for (int i = 0; i < 3; ++i) { + EXPECT_FALSE(block.get_same_bit(i)); } + } +} + +TEST(BlockTest, CreateSameStructBlock) { + // Test with empty block + { + // Test case 1: with default values (is_reserve = false) { - auto col3 = vectorized::ColumnVector::create(); - col3->insert_value(3); - block.insert({std::move(col3), type, "col3"}); + vectorized::Block original_block; + auto new_block = original_block.create_same_struct_block(5, false); + EXPECT_EQ(0, new_block->columns()); + EXPECT_EQ(0, new_block->rows()); } - EXPECT_EQ(3, block.columns()); - vectorized::Block::erase_useless_column(&block, 2); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ("col1", block.get_by_position(0).name); - EXPECT_EQ("col2", block.get_by_position(1).name); + // Test case 2: with reserved space (is_reserve = true) + { + vectorized::Block original_block; + auto new_block = original_block.create_same_struct_block(5, true); + EXPECT_EQ(0, new_block->columns()); + EXPECT_EQ(0, new_block->rows()); + } } - // Test create_same_struct_block + // Test with regular columns { vectorized::Block original_block; auto type = std::make_shared(); @@ -4299,138 +4816,378 @@ TEST(BlockTest, BlockOperations) { } } - // Test compare_at methods + // Test with const columns { - vectorized::Block block1; - vectorized::Block block2; + vectorized::Block original_block; auto type = std::make_shared(); - // Prepare two blocks with test data + // Create original block with data { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - col1->insert_value(2); - block1.insert({std::move(col1), type, "col1"}); + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(42); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 1); + original_block.insert({const_col->get_ptr(), type, "const_col"}); + } - auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(3); - col2->insert_value(4); - block1.insert({std::move(col2), type, "col2"}); + // Test case 1: with default values (is_reserve = false) + { + auto new_block = original_block.create_same_struct_block(5, false); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(5, new_block->rows()); // Should have 5 default values + EXPECT_EQ("const_col", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); + + // Verify default values are inserted + const auto* col = assert_cast*>( + new_block->get_by_position(0).column.get()); + for (size_t i = 0; i < 5; ++i) { + EXPECT_EQ(0, col->get_data()[i]); // Default value for Int32 is 0 + } } + // Test case 2: with reserved space (is_reserve = true) { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - col1->insert_value(3); - block2.insert({std::move(col1), type, "col1"}); + auto new_block = original_block.create_same_struct_block(5, true); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space + EXPECT_EQ("const_col", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); + } + } - auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(3); - col2->insert_value(4); - block2.insert({std::move(col2), type, "col2"}); + // Test with nullable columns + { + vectorized::Block original_block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Create original block with data + { + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + nested->insert_value(1); + null_map->insert_value(0); + original_block.insert({col->get_ptr(), nullable_type, "nullable_col"}); } - // Test basic compare_at - EXPECT_EQ(0, block1.compare_at(0, 0, block2, 1)); // First rows are equal - EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // 1 < 3 + // Test case 1: with default values (is_reserve = false) + { + auto new_block = original_block.create_same_struct_block(5, false); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(5, new_block->rows()); // Should have 5 default values + EXPECT_EQ("nullable_col", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*nullable_type)); - // Test compare_at with num_columns - EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 1)); // Compare only first column + // Verify default values are inserted + const auto* col = assert_cast( + new_block->get_by_position(0).column.get()); + const auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + const auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + for (size_t i = 0; i < 5; ++i) { + EXPECT_EQ(0, nested->get_data()[i]); // Default value for Int32 is 0 + EXPECT_EQ(1, null_map->get_data()[i]); // Default is null + } + } - // Test compare_at with specific columns - std::vector compare_cols = {1}; // Compare only second column - EXPECT_EQ(0, block1.compare_at(0, 0, &compare_cols, block2, 1)); + // Test case 2: with reserved space (is_reserve = true) + { + auto new_block = original_block.create_same_struct_block(5, true); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space + EXPECT_EQ("nullable_col", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*nullable_type)); + } + } +} - // Test compare_column_at - EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 1)); // Compare first column - EXPECT_LT(block1.compare_column_at(0, 1, 0, block2, 1), 0); // 1 < 3 +TEST(BlockTest, EraseTmpColumns) { + // Test with empty block + { + vectorized::Block block; + block.erase_tmp_columns(); + EXPECT_EQ(0, block.columns()); } - // Test same_bit operations + // Test with regular columns { vectorized::Block block; auto type = std::make_shared(); - // Create block with data - auto col = vectorized::ColumnVector::create(); - for (int i = 0; i < 3; ++i) { + // Add regular columns + for (int i = 1; i <= 3; ++i) { + auto col = vectorized::ColumnVector::create(); col->insert_value(i); + block.insert({std::move(col), type, "normal_col" + std::to_string(i)}); } - block.insert({std::move(col), type, "col1"}); - // Test set_same_bit - std::vector same_bits = {true, false, true}; - block.set_same_bit(same_bits.begin(), same_bits.end()); + // Add temporary columns + for (int i = 1; i <= 2; ++i) { + auto col = vectorized::ColumnVector::create(); + col->insert_value(i + 10); + block.insert({std::move(col), type, + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_col" + std::to_string(i)}); + } - // Test get_same_bit - EXPECT_TRUE(block.get_same_bit(0)); - EXPECT_FALSE(block.get_same_bit(1)); - EXPECT_TRUE(block.get_same_bit(2)); - EXPECT_FALSE(block.get_same_bit(3)); // Out of range + EXPECT_EQ(5, block.columns()); + block.erase_tmp_columns(); + EXPECT_EQ(3, block.columns()); - // Test clear_same_bit - block.clear_same_bit(); - EXPECT_FALSE(block.get_same_bit(0)); // After clear, all bits should be false + // Verify regular columns are kept + for (int i = 1; i <= 3; ++i) { + std::string col_name = "normal_col" + std::to_string(i); + EXPECT_TRUE(block.has(col_name)); + EXPECT_EQ(col_name, block.get_by_position(i-1).name); + } + + // Verify temporary columns are removed + for (int i = 1; i <= 2; ++i) { + EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_col" + std::to_string(i))); + } } - // Test erase_tmp_columns + // Test with const columns { vectorized::Block block; auto type = std::make_shared(); - // Add regular column - { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - block.insert({std::move(col1), type, "normal_col"}); + // Add regular const columns + for (int i = 1; i <= 2; ++i) { + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(i); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); + block.insert({const_col->get_ptr(), type, "const_col" + std::to_string(i)}); } - // Add temporary column with correct prefix - { - auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(2); - block.insert({std::move(col2), type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col"}); + // Add temporary const columns + for (int i = 1; i <= 2; ++i) { + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(i + 10); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); + block.insert({const_col->get_ptr(), type, + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_const" + std::to_string(i)}); } - // Add another temporary column - { - auto col3 = vectorized::ColumnVector::create(); - col3->insert_value(3); - block.insert({std::move(col3), type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col"}); + EXPECT_EQ(4, block.columns()); + block.erase_tmp_columns(); + EXPECT_EQ(2, block.columns()); + + // Verify regular const columns are kept + for (int i = 1; i <= 2; ++i) { + std::string col_name = "const_col" + std::to_string(i); + EXPECT_TRUE(block.has(col_name)); + EXPECT_EQ(col_name, block.get_by_position(i-1).name); } - EXPECT_EQ(3, block.columns()); + // Verify temporary const columns are removed + for (int i = 1; i <= 2; ++i) { + EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_const" + std::to_string(i))); + } + } + + // Test with nullable columns + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Add regular nullable columns + for (int i = 1; i <= 2; ++i) { + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + nested->insert_value(i); + null_map->insert_value(i % 2); + block.insert({col->get_ptr(), nullable_type, "nullable_col" + std::to_string(i)}); + } + + // Add temporary nullable columns + for (int i = 1; i <= 2; ++i) { + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + nested->insert_value(i + 10); + null_map->insert_value((i + 1) % 2); + block.insert({col->get_ptr(), nullable_type, + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_null" + std::to_string(i)}); + } + + EXPECT_EQ(4, block.columns()); block.erase_tmp_columns(); - EXPECT_EQ(1, block.columns()); - EXPECT_EQ("normal_col", block.get_by_position(0).name); + EXPECT_EQ(2, block.columns()); - // Verify temporary columns are removed - EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "col")); - EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "another_col")); + // Verify regular nullable columns are kept + for (int i = 1; i <= 2; ++i) { + std::string col_name = "nullable_col" + std::to_string(i); + EXPECT_TRUE(block.has(col_name)); + EXPECT_EQ(col_name, block.get_by_position(i-1).name); + } + + // Verify temporary nullable columns are removed + for (int i = 1; i <= 2; ++i) { + EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_null" + std::to_string(i))); + } + } +} + +TEST(BlockTest, ClearColumnMemNotKeep) { + // Test with empty block + { + vectorized::Block block; + std::vector keep_flags; + EXPECT_DEATH(block.clear_column_mem_not_keep(keep_flags, true), ""); } - // Test clear_column_mem_not_keep + // Test with regular columns { vectorized::Block block; auto type = std::make_shared(); - // Add three columns - for (int i = 0; i < 3; ++i) { + // Add multiple columns with regular values + const int num_columns = 3; + for (int i = 0; i < num_columns; ++i) { auto col = vectorized::ColumnVector::create(); - col->insert_value(i); + for (int j = 0; j < 5; ++j) { + col->insert_value(i * 10 + j); + } block.insert({std::move(col), type, "col" + std::to_string(i)}); } - std::vector keep_flags = {true, false, true}; + std::vector keep_flags(num_columns); + keep_flags[0] = true; + keep_flags[1] = false; + keep_flags[2] = true; + + EXPECT_EQ(keep_flags.size(), block.columns()); + block.clear_column_mem_not_keep(keep_flags, true); // Verify columns are kept but data is cleared for non-kept columns - EXPECT_EQ(3, block.columns()); - EXPECT_EQ(1, block.get_by_position(0).column->size()); // Kept + EXPECT_EQ(num_columns, block.columns()); + EXPECT_EQ(5, block.get_by_position(0).column->size()); // Kept + EXPECT_EQ(0, block.get_by_position(1).column->size()); // Cleared + EXPECT_EQ(5, block.get_by_position(2).column->size()); // Kept + + // Verify data in kept columns remains intact + const auto* col0 = assert_cast*>( + block.get_by_position(0).column.get()); + const auto* col2 = assert_cast*>( + block.get_by_position(2).column.get()); + + for (int i = 0; i < 5; ++i) { + EXPECT_EQ(i, col0->get_data()[i]); + EXPECT_EQ(20 + i, col2->get_data()[i]); + } + } + + // Test with const columns + { + vectorized::Block block; + auto type = std::make_shared(); + + // Add multiple const columns + const int num_columns = 3; + for (int i = 0; i < num_columns; ++i) { + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(i * 10); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); + block.insert({const_col->get_ptr(), type, "const_col" + std::to_string(i)}); + } + + std::vector keep_flags(num_columns); + keep_flags[0] = true; + keep_flags[1] = false; + keep_flags[2] = true; + + EXPECT_EQ(keep_flags.size(), block.columns()); + + block.clear_column_mem_not_keep(keep_flags, true); + + // Verify columns are kept but data is cleared for non-kept columns + EXPECT_EQ(num_columns, block.columns()); + EXPECT_EQ(5, block.get_by_position(0).column->size()); // Kept + EXPECT_EQ(0, block.get_by_position(1).column->size()); // Cleared + EXPECT_EQ(5, block.get_by_position(2).column->size()); // Kept + + // Verify const values in kept columns remain intact + const auto* col0 = assert_cast( + block.get_by_position(0).column.get()); + const auto* col2 = assert_cast( + block.get_by_position(2).column.get()); + + EXPECT_EQ(0, col0->get_int(0)); + EXPECT_EQ(20, col2->get_int(0)); + } + + // Test with nullable columns + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Add multiple nullable columns + const int num_columns = 3; + for (int i = 0; i < num_columns; ++i) { + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + + for (int j = 0; j < 5; ++j) { + nested->insert_value(i * 10 + j); + null_map->insert_value(j % 2); // Alternate between null and non-null + } + block.insert({col->get_ptr(), nullable_type, "nullable_col" + std::to_string(i)}); + } + + std::vector keep_flags(num_columns); + keep_flags[0] = true; + keep_flags[1] = false; + keep_flags[2] = true; + + EXPECT_EQ(keep_flags.size(), block.columns()); + + block.clear_column_mem_not_keep(keep_flags, true); + + // Verify columns are kept but data is cleared for non-kept columns + EXPECT_EQ(num_columns, block.columns()); + EXPECT_EQ(5, block.get_by_position(0).column->size()); // Kept EXPECT_EQ(0, block.get_by_position(1).column->size()); // Cleared - EXPECT_EQ(1, block.get_by_position(2).column->size()); // Kept + EXPECT_EQ(5, block.get_by_position(2).column->size()); // Kept + + // Verify data and null states in kept columns remain intact + const auto* col0 = assert_cast( + block.get_by_position(0).column.get()); + const auto* col2 = assert_cast( + block.get_by_position(2).column.get()); + + for (int i = 0; i < 5; ++i) { + EXPECT_EQ(i % 2, col0->is_null_at(i)); + EXPECT_EQ(i % 2, col2->is_null_at(i)); + if (!col0->is_null_at(i)) { + EXPECT_EQ(i, col0->get_nested_column_ptr()->get_int(i)); + } + if (!col2->is_null_at(i)) { + EXPECT_EQ(20 + i, col2->get_nested_column_ptr()->get_int(i)); + } + } } } From cb87c0b71c4786dd83a83b7db60699be47d23dbb Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 13:02:18 +0800 Subject: [PATCH 32/41] Enhance BlockTest with comprehensive tests for string operations, including shrinking string columns with trailing zeros and verifying data integrity across various scenarios, such as empty strings and array columns. --- be/test/vec/core/block_test.cpp | 142 ++++++++++++++++++++++---------- 1 file changed, 99 insertions(+), 43 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index e349e3fd24d1f8..e730420f54af37 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -5193,55 +5193,86 @@ TEST(BlockTest, ClearColumnMemNotKeep) { TEST(BlockTest, StringOperations) { using namespace std::string_literals; - // Test shrink_char_type_column_suffix_zero + + // Test with empty block + { + vectorized::Block block; + std::vector char_type_idx; + block.shrink_char_type_column_suffix_zero(char_type_idx); + EXPECT_EQ(0, block.columns()); + } + + // Test with regular string column { vectorized::Block block; // Add a string column with padding zeros { auto col = vectorized::ColumnString::create(); - // Add string with trailing zeros - std::string str1 = "hello\0\0\0"s; // 8bytes, contains 3 trailing zeros - std::string str2 = "world\0\0"s; // 7bytes, contains 2 trailing zeros - col->insert_data(str1.c_str(), str1.size()); - col->insert_data(str2.c_str(), str2.size()); + std::vector> test_strings = { + {"hello\0\0\0"s, 8}, // 8 bytes, 3 trailing zeros + {"world\0\0"s, 7}, // 7 bytes, 2 trailing zeros + {"test\0"s, 5}, // 5 bytes, 1 trailing zero + {""s, 0} // empty string + }; + + for (const auto& [str, size] : test_strings) { + col->insert_data(str.c_str(), size); + } auto type = std::make_shared(); block.insert({std::move(col), type, "str_col"}); } - // Add a non-string column + // Add a non-string column for comparison { auto col = vectorized::ColumnVector::create(); - col->insert_value(1); - col->insert_value(2); + std::vector test_values = {1, 2, 3, 4}; + for (const auto& val : test_values) { + col->insert_value(val); + } auto type = std::make_shared(); block.insert({std::move(col), type, "int_col"}); } + // Verify initial state + ASSERT_EQ(2, block.columns()); + // Test shrinking string column std::vector char_type_idx = {0}; // Index of string column + ASSERT_LT(char_type_idx[0], block.columns()); block.shrink_char_type_column_suffix_zero(char_type_idx); // Verify string column is shrunk - const auto* str_col = - assert_cast(block.get_by_position(0).column.get()); - - // Verify first string - StringRef ref1 = str_col->get_data_at(0); - EXPECT_EQ(5, ref1.size); // "hello" without zeros - EXPECT_EQ(0, memcmp(ref1.data, "hello", 5)); + const auto* str_col = assert_cast( + block.get_by_position(0).column.get()); + ASSERT_NE(nullptr, str_col); + + // Verify each string + std::vector> expected_results = { + {"hello", 5}, + {"world", 5}, + {"test", 4}, + {"", 0} + }; - // Verify second string - StringRef ref2 = str_col->get_data_at(1); - EXPECT_EQ(5, ref2.size); // "world" without zeros - EXPECT_EQ(0, memcmp(ref2.data, "world", 5)); + ASSERT_EQ(expected_results.size(), str_col->size()); + for (size_t i = 0; i < expected_results.size(); ++i) { + StringRef ref = str_col->get_data_at(i); + const auto& [expected_str, expected_size] = expected_results[i]; + EXPECT_EQ(expected_size, ref.size); + if (expected_size > 0) { + EXPECT_EQ(0, memcmp(ref.data, expected_str.c_str(), expected_size)); + } + } // Verify non-string column remains unchanged const auto* int_col = assert_cast*>( block.get_by_position(1).column.get()); - EXPECT_EQ(1, int_col->get_data()[0]); - EXPECT_EQ(2, int_col->get_data()[1]); + ASSERT_NE(nullptr, int_col); + for (size_t i = 0; i < 4; ++i) { + EXPECT_EQ(i + 1, int_col->get_data()[i]); + } } // Test with Array @@ -5252,43 +5283,68 @@ TEST(BlockTest, StringOperations) { auto string_type = std::make_shared(); auto array_type = std::make_shared(string_type); - // Add two strings with trailing zeros + // Add strings with trailing zeros auto string_col = vectorized::ColumnString::create(); - std::string str1 = "hello\0\0"s; - std::string str2 = "world\0"s; - string_col->insert_data(str1.c_str(), str1.size()); - string_col->insert_data(str2.c_str(), str2.size()); + std::vector> test_strings = { + {"hello\0\0"s, 7}, + {"world\0"s, 6}, + {"test\0\0\0"s, 8}, + {""s, 0} + }; + + for (const auto& [str, size] : test_strings) { + string_col->insert_data(str.c_str(), size); + } // Create array offsets column auto array_offsets = vectorized::ColumnArray::ColumnOffsets::create(); - array_offsets->get_data().push_back(2); // First array has 2 elements + array_offsets->get_data().push_back(2); // First array: ["hello", "world"] + array_offsets->get_data().push_back(4); // Second array: ["test", ""] - // Create array column - auto array_col = - vectorized::ColumnArray::create(std::move(string_col), std::move(array_offsets)); - - // Insert array column into block + // Create and insert array column + auto array_col = vectorized::ColumnArray::create(std::move(string_col), std::move(array_offsets)); block.insert({std::move(array_col), array_type, "array_str_col"}); + // Verify initial state + ASSERT_EQ(1, block.columns()); + // Shrink array column std::vector char_type_idx = {0}; + ASSERT_LT(char_type_idx[0], block.columns()); block.shrink_char_type_column_suffix_zero(char_type_idx); // Verify strings in array are shrunk - const auto* array_col_result = - assert_cast(block.get_by_position(0).column.get()); + const auto* array_col_result = assert_cast( + block.get_by_position(0).column.get()); + ASSERT_NE(nullptr, array_col_result); + const auto* string_col_result = assert_cast( array_col_result->get_data_ptr().get()); + ASSERT_NE(nullptr, string_col_result); + + // Verify each string in the arrays + std::vector> expected_results = { + {"hello", 5}, + {"world", 5}, + {"test", 4}, + {"", 0} + }; - // Verify first string in array - StringRef ref1 = string_col_result->get_data_at(0); - EXPECT_EQ(5, ref1.size); // "hello" without zeros - EXPECT_EQ(0, memcmp(ref1.data, "hello", 5)); + ASSERT_EQ(expected_results.size(), string_col_result->size()); + for (size_t i = 0; i < expected_results.size(); ++i) { + StringRef ref = string_col_result->get_data_at(i); + const auto& [expected_str, expected_size] = expected_results[i]; + EXPECT_EQ(expected_size, ref.size); + if (expected_size > 0) { + EXPECT_EQ(0, memcmp(ref.data, expected_str.c_str(), expected_size)); + } + } - // Verify second string in array - StringRef ref2 = string_col_result->get_data_at(1); - EXPECT_EQ(5, ref2.size); // "world" without zeros - EXPECT_EQ(0, memcmp(ref2.data, "world", 5)); + // Verify array structure remains intact + const auto& offsets = array_col_result->get_offsets(); + ASSERT_EQ(2, offsets.size()); + EXPECT_EQ(2, offsets[0]); + EXPECT_EQ(4, offsets[1]); } } From 00f7d1ab73e1e2df11a78ff37a2f73a8ec6e6614 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 13:13:59 +0800 Subject: [PATCH 33/41] code format --- be/test/vec/core/block_test.cpp | 38 +++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index e730420f54af37..277551dd87ba95 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -2080,7 +2080,7 @@ TEST(BlockTest, CloneOperations) { // Test with empty block { vectorized::Block empty_block; - + // Test clone_empty auto cloned_empty = empty_block.clone_empty(); EXPECT_EQ(0, cloned_empty.columns()); @@ -2301,14 +2301,16 @@ TEST(BlockTest, CloneOperations) { col1->insert_value(1); auto null_map1 = vectorized::ColumnUInt8::create(); null_map1->insert_value(0); // Not null - auto nullable_col1 = vectorized::ColumnNullable::create(col1->get_ptr(), null_map1->get_ptr()); + auto nullable_col1 = + vectorized::ColumnNullable::create(col1->get_ptr(), null_map1->get_ptr()); block.insert({nullable_col1->get_ptr(), nullable_type, "nullable_col1"}); auto col2 = vectorized::ColumnVector::create(); col2->insert_value(2); auto null_map2 = vectorized::ColumnUInt8::create(); null_map2->insert_value(1); // Null - auto nullable_col2 = vectorized::ColumnNullable::create(col2->get_ptr(), null_map2->get_ptr()); + auto nullable_col2 = + vectorized::ColumnNullable::create(col2->get_ptr(), null_map2->get_ptr()); block.insert({nullable_col2->get_ptr(), nullable_type, "nullable_col2"}); // Test all clone operations @@ -2376,7 +2378,7 @@ TEST(BlockTest, FilterAndSelector) { // Test empty block { vectorized::Block empty_block; - + // Test filter_block_internal vectorized::IColumn::Filter filter(0); EXPECT_NO_THROW(vectorized::Block::filter_block_internal(&empty_block, filter)); @@ -2385,9 +2387,10 @@ TEST(BlockTest, FilterAndSelector) { // Test filter_block std::vector columns_to_filter; - EXPECT_DEATH(vectorized::Block::filter_block(&empty_block, columns_to_filter, 0, 0).ok(), ""); + EXPECT_DEATH(vectorized::Block::filter_block(&empty_block, columns_to_filter, 0, 0).ok(), + ""); EXPECT_EQ(0, empty_block.rows()); - + // Test append_to_block_by_selector vectorized::Block dst_block; vectorized::MutableBlock dst(&dst_block); @@ -2724,8 +2727,7 @@ TEST(BlockTest, FilterAndSelector) { auto nullable_filter = vectorized::ColumnNullable::create( vectorized::ColumnVector::create(10, 1), - vectorized::ColumnVector::create(10, 0) - ); + vectorized::ColumnVector::create(10, 0)); auto filter_type = std::make_shared( std::make_shared()); @@ -2746,8 +2748,7 @@ TEST(BlockTest, FilterAndSelector) { auto test_block = create_test_block(10); auto const_filter = vectorized::ColumnConst::create( - vectorized::ColumnVector::create(1, 0), - 10); + vectorized::ColumnVector::create(1, 0), 10); auto filter_type = std::make_shared(); test_block.insert({const_filter->get_ptr(), filter_type, "filter"}); @@ -2873,7 +2874,7 @@ TEST(BlockTest, FilterAndSelector) { for (int i = 0; i < size; ++i) { nested1->insert_value(i); nested2->insert_value(i * 2); - null_map1->insert_value(i % 2); // Even rows are not null + null_map1->insert_value(i % 2); // Even rows are not null null_map2->insert_value((i + 1) % 2); // Odd rows are not null } @@ -2906,11 +2907,14 @@ TEST(BlockTest, FilterAndSelector) { if (!filtered_col1->is_null_at(i)) { EXPECT_EQ(original_row, assert_cast*>( - filtered_col1->get_nested_column_ptr().get())->get_data()[i]); + filtered_col1->get_nested_column_ptr().get()) + ->get_data()[i]); } if (!filtered_col2->is_null_at(i)) { - EXPECT_EQ(original_row * 2, assert_cast*>( - filtered_col2->get_nested_column_ptr().get())->get_data()[i]); + EXPECT_EQ(original_row * 2, + assert_cast*>( + filtered_col2->get_nested_column_ptr().get()) + ->get_data()[i]); } } } @@ -2936,7 +2940,8 @@ TEST(BlockTest, FilterAndSelector) { EXPECT_EQ(original_row % 2, filtered_col1->is_null_at(i)); if (!filtered_col1->is_null_at(i)) { EXPECT_EQ(original_row, assert_cast*>( - filtered_col1->get_nested_column_ptr().get())->get_data()[i]); + filtered_col1->get_nested_column_ptr().get()) + ->get_data()[i]); } } @@ -2945,7 +2950,8 @@ TEST(BlockTest, FilterAndSelector) { EXPECT_EQ((i + 1) % 2, filtered_col2->is_null_at(i)); if (!filtered_col2->is_null_at(i)) { EXPECT_EQ(i * 2, assert_cast*>( - filtered_col2->get_nested_column_ptr().get())->get_data()[i]); + filtered_col2->get_nested_column_ptr().get()) + ->get_data()[i]); } } } From ab454cd4f0d3fae37c06a83f77a146cd61100fc6 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 13:20:16 +0800 Subject: [PATCH 34/41] code format --- be/test/vec/core/block_test.cpp | 42 +++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 277551dd87ba95..128a3ea66068d0 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -2978,7 +2978,8 @@ TEST(BlockTest, FilterAndSelector) { EXPECT_EQ(expected_null_col1, filtered_col1->is_null_at(i)); if (!filtered_col1->is_null_at(i)) { EXPECT_EQ(original_row, assert_cast*>( - filtered_col1->get_nested_column_ptr().get())->get_data()[i]); + filtered_col1->get_nested_column_ptr().get()) + ->get_data()[i]); } } @@ -2987,7 +2988,8 @@ TEST(BlockTest, FilterAndSelector) { EXPECT_EQ((i + 1) % 2, filtered_col2->is_null_at(i)); if (!filtered_col2->is_null_at(i)) { EXPECT_EQ(i * 2, assert_cast*>( - filtered_col2->get_nested_column_ptr().get())->get_data()[i]); + filtered_col2->get_nested_column_ptr().get()) + ->get_data()[i]); } } } @@ -2999,8 +3001,7 @@ TEST(BlockTest, FilterAndSelector) { // Create nullable filter column auto nullable_filter = vectorized::ColumnNullable::create( vectorized::ColumnVector::create(10, 1), - vectorized::ColumnVector::create(10, 0) - ); + vectorized::ColumnVector::create(10, 0)); auto filter_type = std::make_shared( std::make_shared()); @@ -3024,8 +3025,7 @@ TEST(BlockTest, FilterAndSelector) { // Create const filter column (false) auto const_filter = vectorized::ColumnConst::create( - vectorized::ColumnVector::create(1, 0), - 10); + vectorized::ColumnVector::create(1, 0), 10); auto filter_type = std::make_shared(); test_block.insert({const_filter->get_ptr(), filter_type, "filter"}); @@ -3073,18 +3073,21 @@ TEST(BlockTest, FilterAndSelector) { if (!filtered_col1->is_null_at(i)) { EXPECT_EQ(original_row, assert_cast*>( - filtered_col1->get_nested_column_ptr().get())->get_data()[i]); + filtered_col1->get_nested_column_ptr().get()) + ->get_data()[i]); } if (!filtered_col2->is_null_at(i)) { - EXPECT_EQ(original_row * 2, assert_cast*>( - filtered_col2->get_nested_column_ptr().get())->get_data()[i]); + EXPECT_EQ(original_row * 2, + assert_cast*>( + filtered_col2->get_nested_column_ptr().get()) + ->get_data()[i]); } } } // Test append_to_block_by_selector { auto block = create_test_block(10); - + // Create destination block with proper columns auto base_type = std::make_shared(); auto nullable_type = std::make_shared(base_type); @@ -3123,7 +3126,7 @@ TEST(BlockTest, FilterAndSelector) { // Verify data and null map for selected rows for (size_t i = 0; i < 5; ++i) { size_t original_row = i * 2; - + // Verify null flags EXPECT_EQ(original_row % 2, selected_col1->is_null_at(i)); EXPECT_EQ((original_row + 1) % 2, selected_col2->is_null_at(i)); @@ -3131,11 +3134,14 @@ TEST(BlockTest, FilterAndSelector) { // Verify values for non-null elements if (!selected_col1->is_null_at(i)) { EXPECT_EQ(original_row, assert_cast*>( - selected_col1->get_nested_column_ptr().get())->get_data()[i]); + selected_col1->get_nested_column_ptr().get()) + ->get_data()[i]); } if (!selected_col2->is_null_at(i)) { - EXPECT_EQ(original_row * 2, assert_cast*>( - selected_col2->get_nested_column_ptr().get())->get_data()[i]); + EXPECT_EQ(original_row * 2, + assert_cast*>( + selected_col2->get_nested_column_ptr().get()) + ->get_data()[i]); } } } @@ -3259,7 +3265,7 @@ TEST(BlockTest, RowCheck) { auto col1 = vectorized::ColumnNullable::create( vectorized::ColumnVector::create(), vectorized::ColumnVector::create()); - + // Need to cast to concrete type before calling insert_value auto* nested1 = assert_cast*>( col1->get_nested_column_ptr().get()); @@ -3267,7 +3273,7 @@ TEST(BlockTest, RowCheck) { col1->get_null_map_column_ptr().get()); nested1->insert_value(1); null_map1->insert_value(0); - + block.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); auto col2 = vectorized::ColumnNullable::create( @@ -3293,7 +3299,7 @@ TEST(BlockTest, RowCheck) { auto col = vectorized::ColumnNullable::create( vectorized::ColumnVector::create(), vectorized::ColumnVector::create()); - + // Need to cast to concrete type before calling insert_value auto* nested = assert_cast*>( col->get_nested_column_ptr().get()); @@ -3301,7 +3307,7 @@ TEST(BlockTest, RowCheck) { col->get_null_map_column_ptr().get()); nested->insert_value(1); null_map->insert_value(0); - + other_block.insert({col->get_ptr(), nullable_type, "nullable_col1"}); block.swap(other_block); From 9ec8a8caa6b58ea0babb02543465b18b8000c224 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 13:32:11 +0800 Subject: [PATCH 35/41] code format --- be/test/vec/core/block_test.cpp | 206 ++++++++++++++++---------------- 1 file changed, 103 insertions(+), 103 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 128a3ea66068d0..da2b28b4880a57 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3373,7 +3373,7 @@ TEST(BlockTest, ClearColumnData) { { vectorized::Block block; auto type = std::make_shared(); - + for (int i = 0; i < 3; ++i) { auto col = vectorized::ColumnVector::create(); block.insert({std::move(col), type, "empty_col" + std::to_string(i)}); @@ -3402,7 +3402,7 @@ TEST(BlockTest, ClearColumnData) { auto create_test_block = [](int num_columns) { vectorized::Block block; auto type = std::make_shared(); - + for (int i = 0; i < num_columns; ++i) { auto col = vectorized::ColumnVector::create(); col->insert_value(i + 1); @@ -3453,7 +3453,7 @@ TEST(BlockTest, ClearColumnData) { auto create_test_block = [](int num_columns) { vectorized::Block block; auto type = std::make_shared(); - + for (int i = 0; i < num_columns; ++i) { auto base_col = vectorized::ColumnVector::create(); base_col->insert_value(42 + i); @@ -3506,21 +3506,22 @@ TEST(BlockTest, ClearColumnData) { vectorized::Block block; auto base_type = std::make_shared(); auto nullable_type = std::make_shared(base_type); - + for (int i = 0; i < num_columns; ++i) { auto col = vectorized::ColumnNullable::create( vectorized::ColumnVector::create(), vectorized::ColumnVector::create()); - + auto* nested = assert_cast*>( col->get_nested_column_ptr().get()); auto* null_map = assert_cast*>( col->get_null_map_column_ptr().get()); - + nested->insert_value(i + 1); null_map->insert_value(i % 2); - - block.insert({col->get_ptr(), nullable_type, "nullable_col" + std::to_string(i + 1)}); + + block.insert( + {col->get_ptr(), nullable_type, "nullable_col" + std::to_string(i + 1)}); } return block; }; @@ -3568,7 +3569,7 @@ TEST(BlockTest, IndexByName) { { vectorized::Block block; block.initialize_index_by_name(); - + // Test basic name operations EXPECT_FALSE(block.has("col1")); EXPECT_THROW(block.get_position_by_name("col1"), Exception); @@ -3745,7 +3746,8 @@ TEST(BlockTest, IndexByName) { // Test after structure modification block.erase(2); // Remove last "nullable_col1" block.initialize_index_by_name(); - EXPECT_EQ(0, block.get_position_by_name("nullable_col1")); // Now first "nullable_col1" is found + EXPECT_EQ(0, block.get_position_by_name( + "nullable_col1")); // Now first "nullable_col1" is found } } @@ -3827,10 +3829,10 @@ TEST(BlockTest, ColumnTransformations) { EXPECT_EQ("const_col1", block.get_by_position(1).name); // Verify const values are preserved - const auto* col1 = assert_cast( - block.get_by_position(1).column.get()); - const auto* col2 = assert_cast( - block.get_by_position(0).column.get()); + const auto* col1 = + assert_cast(block.get_by_position(1).column.get()); + const auto* col2 = + assert_cast(block.get_by_position(0).column.get()); EXPECT_EQ(42, col1->get_int(0)); EXPECT_EQ(24, col2->get_int(0)); @@ -3932,7 +3934,7 @@ TEST(BlockTest, HashUpdate) { { vectorized::Block block; auto type = std::make_shared(); - + auto col = vectorized::ColumnVector::create(); col->insert_value(42); block.insert({std::move(col), type, "col1"}); @@ -4208,7 +4210,7 @@ TEST(BlockTest, HashUpdate) { col->get_nested_column_ptr().get()); auto* null_map = assert_cast*>( col->get_null_map_column_ptr().get()); - + for (int i = 0; i < 5; ++i) { nested->insert_value(i); null_map->insert_value(i % 2); @@ -4226,7 +4228,7 @@ TEST(BlockTest, HashUpdate) { col->get_nested_column_ptr().get()); auto* null_map = assert_cast*>( col->get_null_map_column_ptr().get()); - + for (int i = 4; i >= 0; --i) { nested->insert_value(i); null_map->insert_value(i % 2); @@ -4322,11 +4324,11 @@ TEST(BlockTest, CompareAt) { // Test with empty blocks { vectorized::Block block1, block2; - + // Test basic compare_at EXPECT_EQ(0, block1.compare_at(0, 0, block2, 0)); EXPECT_DEATH(block1.compare_at(1, 1, block2, 0), ""); - + // Test compare_at with num_columns EXPECT_DEATH(block1.compare_at(0, 0, 1, block2, 0), ""); @@ -4371,15 +4373,15 @@ TEST(BlockTest, CompareAt) { } // Test basic compare_at - EXPECT_EQ(0, block1.compare_at(0, 0, block2, 0)); // Equal rows - EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // [1,3] < [3,5] - EXPECT_GT(block1.compare_at(1, 0, block2, 0), 0); // [2,4] > [1,3] + EXPECT_EQ(0, block1.compare_at(0, 0, block2, 0)); // Equal rows + EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // [1,3] < [3,5] + EXPECT_GT(block1.compare_at(1, 0, block2, 0), 0); // [2,4] > [1,3] // Test compare_at with num_columns - EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 0)); // Compare only first column + EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 0)); // Compare only first column // Test compare_at with specific columns - std::vector compare_cols = {1}; // Compare only second column + std::vector compare_cols = {1}; // Compare only second column EXPECT_EQ(0, block1.compare_at(0, 0, &compare_cols, block2, 0)); } @@ -4424,11 +4426,11 @@ TEST(BlockTest, CompareAt) { EXPECT_EQ(-1, block1.compare_at(1, 0, block2, 0)); // Test compare_at with num_columns - EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 0)); // Compare only first column + EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 0)); // Compare only first column // Test compare_at with specific columns - std::vector compare_cols = {1}; // Compare only second column - EXPECT_LT(block1.compare_at(0, 0, &compare_cols, block2, 0), 0); // const(2) < const(3) + std::vector compare_cols = {1}; // Compare only second column + EXPECT_LT(block1.compare_at(0, 0, &compare_cols, block2, 0), 0); // const(2) < const(3) } // Test with nullable columns @@ -4449,8 +4451,8 @@ TEST(BlockTest, CompareAt) { col1->get_null_map_column_ptr().get()); nested1->insert_value(1); nested1->insert_value(2); - null_map1->insert_value(0); // not null - null_map1->insert_value(1); // null + null_map1->insert_value(0); // not null + null_map1->insert_value(1); // null block1.insert({col1->get_ptr(), nullable_type, "col1"}); // Second column: [null, 4] @@ -4463,8 +4465,8 @@ TEST(BlockTest, CompareAt) { col2->get_null_map_column_ptr().get()); nested2->insert_value(3); nested2->insert_value(4); - null_map2->insert_value(1); // null - null_map2->insert_value(0); // not null + null_map2->insert_value(1); // null + null_map2->insert_value(0); // not null block1.insert({col2->get_ptr(), nullable_type, "col2"}); } @@ -4480,8 +4482,8 @@ TEST(BlockTest, CompareAt) { col1->get_null_map_column_ptr().get()); nested1->insert_value(1); nested1->insert_value(3); - null_map1->insert_value(0); // not null - null_map1->insert_value(0); // not null + null_map1->insert_value(0); // not null + null_map1->insert_value(0); // not null block2.insert({col1->get_ptr(), nullable_type, "col1"}); // Second column: [3, null] @@ -4494,21 +4496,21 @@ TEST(BlockTest, CompareAt) { col2->get_null_map_column_ptr().get()); nested2->insert_value(3); nested2->insert_value(5); - null_map2->insert_value(0); // not null - null_map2->insert_value(1); // null + null_map2->insert_value(0); // not null + null_map2->insert_value(1); // null block2.insert({col2->get_ptr(), nullable_type, "col2"}); } // Test basic compare_at - EXPECT_EQ(0, block1.compare_at(0, 0, block2, 0)); // Equal non-null values - EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // null < non-null - EXPECT_GT(block1.compare_at(1, 0, block2, 0), 0); // non-null > null + EXPECT_EQ(0, block1.compare_at(0, 0, block2, 0)); // Equal non-null values + EXPECT_LT(block1.compare_at(0, 1, block2, 1), 0); // null < non-null + EXPECT_GT(block1.compare_at(1, 0, block2, 0), 0); // non-null > null // Test compare_at with num_columns EXPECT_EQ(0, block1.compare_at(0, 0, 1, block2, 0)); // Compare only first column // Test compare_at with specific columns - std::vector compare_cols = {1}; // Compare only second column + std::vector compare_cols = {1}; // Compare only second column EXPECT_EQ(0, block1.compare_at(0, 0, &compare_cols, block2, 0)); // null > non-null } } @@ -4552,10 +4554,10 @@ TEST(BlockTest, CompareColumnAt) { } // Test compare_column_at for each column - EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 0)); // First column, equal values - EXPECT_LT(block1.compare_column_at(0, 1, 0, block2, 1), 0); // First column, 2 < 3 - EXPECT_EQ(0, block1.compare_column_at(0, 0, 1, block2, 0)); // Second column, equal values - EXPECT_LT(block1.compare_column_at(0, 1, 1, block2, 1), 0); // Second column, 4 < 5 + EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 0)); // First column, equal values + EXPECT_LT(block1.compare_column_at(0, 1, 0, block2, 1), 0); // First column, 2 < 3 + EXPECT_EQ(0, block1.compare_column_at(0, 0, 1, block2, 0)); // Second column, equal values + EXPECT_LT(block1.compare_column_at(0, 1, 1, block2, 1), 0); // Second column, 4 < 5 } // Test with const columns @@ -4590,10 +4592,10 @@ TEST(BlockTest, CompareColumnAt) { } // Test compare_column_at for each column - EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 0)); // First column, equal values - EXPECT_EQ(0, block1.compare_column_at(1, 1, 0, block2, 1)); // First column, equal values - EXPECT_LT(block1.compare_column_at(0, 0, 1, block2, 0), 0); // Second column, 2 < 3 - EXPECT_LT(block1.compare_column_at(1, 1, 1, block2, 1), 0); // Second column, 2 < 3 + EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 0)); // First column, equal values + EXPECT_EQ(0, block1.compare_column_at(1, 1, 0, block2, 1)); // First column, equal values + EXPECT_LT(block1.compare_column_at(0, 0, 1, block2, 0), 0); // Second column, 2 < 3 + EXPECT_LT(block1.compare_column_at(1, 1, 1, block2, 1), 0); // Second column, 2 < 3 } // Test with nullable columns @@ -4613,8 +4615,8 @@ TEST(BlockTest, CompareColumnAt) { col1->get_null_map_column_ptr().get()); nested1->insert_value(1); nested1->insert_value(2); - null_map1->insert_value(0); // not null - null_map1->insert_value(1); // null + null_map1->insert_value(0); // not null + null_map1->insert_value(1); // null block1.insert({col1->get_ptr(), nullable_type, "col1"}); auto col2 = vectorized::ColumnNullable::create( @@ -4626,8 +4628,8 @@ TEST(BlockTest, CompareColumnAt) { col2->get_null_map_column_ptr().get()); nested2->insert_value(3); nested2->insert_value(4); - null_map2->insert_value(1); // null - null_map2->insert_value(0); // not null + null_map2->insert_value(1); // null + null_map2->insert_value(0); // not null block1.insert({col2->get_ptr(), nullable_type, "col2"}); } @@ -4642,8 +4644,8 @@ TEST(BlockTest, CompareColumnAt) { col1->get_null_map_column_ptr().get()); nested1->insert_value(1); nested1->insert_value(3); - null_map1->insert_value(0); // not null - null_map1->insert_value(0); // not null + null_map1->insert_value(0); // not null + null_map1->insert_value(0); // not null block2.insert({col1->get_ptr(), nullable_type, "col1"}); auto col2 = vectorized::ColumnNullable::create( @@ -4655,16 +4657,19 @@ TEST(BlockTest, CompareColumnAt) { col2->get_null_map_column_ptr().get()); nested2->insert_value(3); nested2->insert_value(5); - null_map2->insert_value(0); // not null - null_map2->insert_value(1); // null + null_map2->insert_value(0); // not null + null_map2->insert_value(1); // null block2.insert({col2->get_ptr(), nullable_type, "col2"}); } // Test compare_column_at for each column - EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, 0)); // First column, equal non-null values - EXPECT_GT(block1.compare_column_at(1, 1, 0, block2, 1), 0); // First column, null < non-null - EXPECT_EQ(block1.compare_column_at(0, 0, 1, block2, 0), 0); // Second column, non-null > null - EXPECT_LT(block1.compare_column_at(1, 1, 1, block2, 1), 0); // Second column, non-null < null + EXPECT_EQ(0, block1.compare_column_at(0, 0, 0, block2, + 0)); // First column, equal non-null values + EXPECT_GT(block1.compare_column_at(1, 1, 0, block2, 1), 0); // First column, null < non-null + EXPECT_EQ(block1.compare_column_at(0, 0, 1, block2, 0), + 0); // Second column, non-null > null + EXPECT_LT(block1.compare_column_at(1, 1, 1, block2, 1), + 0); // Second column, non-null < null } } @@ -4903,7 +4908,7 @@ TEST(BlockTest, CreateSameStructBlock) { const auto* null_map = assert_cast*>( col->get_null_map_column_ptr().get()); for (size_t i = 0; i < 5; ++i) { - EXPECT_EQ(0, nested->get_data()[i]); // Default value for Int32 is 0 + EXPECT_EQ(0, nested->get_data()[i]); // Default value for Int32 is 0 EXPECT_EQ(1, null_map->get_data()[i]); // Default is null } } @@ -4944,7 +4949,8 @@ TEST(BlockTest, EraseTmpColumns) { auto col = vectorized::ColumnVector::create(); col->insert_value(i + 10); block.insert({std::move(col), type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_col" + std::to_string(i)}); + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_col" + + std::to_string(i)}); } EXPECT_EQ(5, block.columns()); @@ -4955,12 +4961,13 @@ TEST(BlockTest, EraseTmpColumns) { for (int i = 1; i <= 3; ++i) { std::string col_name = "normal_col" + std::to_string(i); EXPECT_TRUE(block.has(col_name)); - EXPECT_EQ(col_name, block.get_by_position(i-1).name); + EXPECT_EQ(col_name, block.get_by_position(i - 1).name); } // Verify temporary columns are removed for (int i = 1; i <= 2; ++i) { - EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_col" + std::to_string(i))); + EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_col" + + std::to_string(i))); } } @@ -4983,7 +4990,8 @@ TEST(BlockTest, EraseTmpColumns) { base_col->insert_value(i + 10); auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); block.insert({const_col->get_ptr(), type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_const" + std::to_string(i)}); + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_const" + + std::to_string(i)}); } EXPECT_EQ(4, block.columns()); @@ -4994,12 +5002,13 @@ TEST(BlockTest, EraseTmpColumns) { for (int i = 1; i <= 2; ++i) { std::string col_name = "const_col" + std::to_string(i); EXPECT_TRUE(block.has(col_name)); - EXPECT_EQ(col_name, block.get_by_position(i-1).name); + EXPECT_EQ(col_name, block.get_by_position(i - 1).name); } // Verify temporary const columns are removed for (int i = 1; i <= 2; ++i) { - EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_const" + std::to_string(i))); + EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_const" + + std::to_string(i))); } } @@ -5035,7 +5044,8 @@ TEST(BlockTest, EraseTmpColumns) { nested->insert_value(i + 10); null_map->insert_value((i + 1) % 2); block.insert({col->get_ptr(), nullable_type, - std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_null" + std::to_string(i)}); + std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_null" + + std::to_string(i)}); } EXPECT_EQ(4, block.columns()); @@ -5046,12 +5056,13 @@ TEST(BlockTest, EraseTmpColumns) { for (int i = 1; i <= 2; ++i) { std::string col_name = "nullable_col" + std::to_string(i); EXPECT_TRUE(block.has(col_name)); - EXPECT_EQ(col_name, block.get_by_position(i-1).name); + EXPECT_EQ(col_name, block.get_by_position(i - 1).name); } // Verify temporary nullable columns are removed for (int i = 1; i <= 2; ++i) { - EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_null" + std::to_string(i))); + EXPECT_FALSE(block.has(std::string(BeConsts::BLOCK_TEMP_COLUMN_PREFIX) + "tmp_null" + + std::to_string(i))); } } } @@ -5085,7 +5096,7 @@ TEST(BlockTest, ClearColumnMemNotKeep) { keep_flags[2] = true; EXPECT_EQ(keep_flags.size(), block.columns()); - + block.clear_column_mem_not_keep(keep_flags, true); // Verify columns are kept but data is cleared for non-kept columns @@ -5099,7 +5110,7 @@ TEST(BlockTest, ClearColumnMemNotKeep) { block.get_by_position(0).column.get()); const auto* col2 = assert_cast*>( block.get_by_position(2).column.get()); - + for (int i = 0; i < 5; ++i) { EXPECT_EQ(i, col0->get_data()[i]); EXPECT_EQ(20 + i, col2->get_data()[i]); @@ -5136,11 +5147,11 @@ TEST(BlockTest, ClearColumnMemNotKeep) { EXPECT_EQ(5, block.get_by_position(2).column->size()); // Kept // Verify const values in kept columns remain intact - const auto* col0 = assert_cast( - block.get_by_position(0).column.get()); - const auto* col2 = assert_cast( - block.get_by_position(2).column.get()); - + const auto* col0 = + assert_cast(block.get_by_position(0).column.get()); + const auto* col2 = + assert_cast(block.get_by_position(2).column.get()); + EXPECT_EQ(0, col0->get_int(0)); EXPECT_EQ(20, col2->get_int(0)); } @@ -5161,7 +5172,7 @@ TEST(BlockTest, ClearColumnMemNotKeep) { col->get_nested_column_ptr().get()); auto* null_map = assert_cast*>( col->get_null_map_column_ptr().get()); - + for (int j = 0; j < 5; ++j) { nested->insert_value(i * 10 + j); null_map->insert_value(j % 2); // Alternate between null and non-null @@ -5189,7 +5200,7 @@ TEST(BlockTest, ClearColumnMemNotKeep) { block.get_by_position(0).column.get()); const auto* col2 = assert_cast( block.get_by_position(2).column.get()); - + for (int i = 0; i < 5; ++i) { EXPECT_EQ(i % 2, col0->is_null_at(i)); EXPECT_EQ(i % 2, col2->is_null_at(i)); @@ -5222,10 +5233,10 @@ TEST(BlockTest, StringOperations) { { auto col = vectorized::ColumnString::create(); std::vector> test_strings = { - {"hello\0\0\0"s, 8}, // 8 bytes, 3 trailing zeros - {"world\0\0"s, 7}, // 7 bytes, 2 trailing zeros - {"test\0"s, 5}, // 5 bytes, 1 trailing zero - {""s, 0} // empty string + {"hello\0\0\0"s, 8}, // 8 bytes, 3 trailing zeros + {"world\0\0"s, 7}, // 7 bytes, 2 trailing zeros + {"test\0"s, 5}, // 5 bytes, 1 trailing zero + {""s, 0} // empty string }; for (const auto& [str, size] : test_strings) { @@ -5249,24 +5260,20 @@ TEST(BlockTest, StringOperations) { // Verify initial state ASSERT_EQ(2, block.columns()); - + // Test shrinking string column std::vector char_type_idx = {0}; // Index of string column ASSERT_LT(char_type_idx[0], block.columns()); block.shrink_char_type_column_suffix_zero(char_type_idx); // Verify string column is shrunk - const auto* str_col = assert_cast( - block.get_by_position(0).column.get()); + const auto* str_col = + assert_cast(block.get_by_position(0).column.get()); ASSERT_NE(nullptr, str_col); // Verify each string std::vector> expected_results = { - {"hello", 5}, - {"world", 5}, - {"test", 4}, - {"", 0} - }; + {"hello", 5}, {"world", 5}, {"test", 4}, {"", 0}}; ASSERT_EQ(expected_results.size(), str_col->size()); for (size_t i = 0; i < expected_results.size(); ++i) { @@ -5298,11 +5305,7 @@ TEST(BlockTest, StringOperations) { // Add strings with trailing zeros auto string_col = vectorized::ColumnString::create(); std::vector> test_strings = { - {"hello\0\0"s, 7}, - {"world\0"s, 6}, - {"test\0\0\0"s, 8}, - {""s, 0} - }; + {"hello\0\0"s, 7}, {"world\0"s, 6}, {"test\0\0\0"s, 8}, {""s, 0}}; for (const auto& [str, size] : test_strings) { string_col->insert_data(str.c_str(), size); @@ -5314,7 +5317,8 @@ TEST(BlockTest, StringOperations) { array_offsets->get_data().push_back(4); // Second array: ["test", ""] // Create and insert array column - auto array_col = vectorized::ColumnArray::create(std::move(string_col), std::move(array_offsets)); + auto array_col = + vectorized::ColumnArray::create(std::move(string_col), std::move(array_offsets)); block.insert({std::move(array_col), array_type, "array_str_col"}); // Verify initial state @@ -5326,8 +5330,8 @@ TEST(BlockTest, StringOperations) { block.shrink_char_type_column_suffix_zero(char_type_idx); // Verify strings in array are shrunk - const auto* array_col_result = assert_cast( - block.get_by_position(0).column.get()); + const auto* array_col_result = + assert_cast(block.get_by_position(0).column.get()); ASSERT_NE(nullptr, array_col_result); const auto* string_col_result = assert_cast( @@ -5336,11 +5340,7 @@ TEST(BlockTest, StringOperations) { // Verify each string in the arrays std::vector> expected_results = { - {"hello", 5}, - {"world", 5}, - {"test", 4}, - {"", 0} - }; + {"hello", 5}, {"world", 5}, {"test", 4}, {"", 0}}; ASSERT_EQ(expected_results.size(), string_col_result->size()); for (size_t i = 0; i < expected_results.size(); ++i) { From 037e553e26bbbd4bcfca9604c6d23b1d221e0b17 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 14:14:09 +0800 Subject: [PATCH 36/41] code format --- be/test/vec/core/block_test.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index da2b28b4880a57..74156e267acc52 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3319,8 +3319,7 @@ TEST(BlockTest, RowCheck) { TEST(BlockTest, ClearColumnData) { // Test with empty block - { - // Test clear with column_size == -1 + { // Test clear with column_size == -1 { vectorized::Block block; EXPECT_EQ(0, block.columns()); @@ -3395,7 +3394,7 @@ TEST(BlockTest, ClearColumnData) { EXPECT_EQ(0, block.columns()); EXPECT_EQ(0, block.rows()); } - } + } // namespace doris // Test with regular columns { From 84a859a2898f9aa774e5da673f0309dad1657722 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 14:15:11 +0800 Subject: [PATCH 37/41] code format --- be/test/vec/core/block_test.cpp | 390 ++++++++++++++++---------------- 1 file changed, 194 insertions(+), 196 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 74156e267acc52..08c694ed505465 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3319,249 +3319,247 @@ TEST(BlockTest, RowCheck) { TEST(BlockTest, ClearColumnData) { // Test with empty block - { // Test clear with column_size == -1 - { - vectorized::Block block; - EXPECT_EQ(0, block.columns()); - EXPECT_EQ(0, block.rows()); - - block.clear_column_data(-1); - EXPECT_EQ(0, block.columns()); - EXPECT_EQ(0, block.rows()); - } + {// Test clear with column_size == -1 + {vectorized::Block block; + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); + + block.clear_column_data(-1); + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); +} - // Test clear with column_size == 0 - { - vectorized::Block block; - EXPECT_EQ(0, block.columns()); - EXPECT_EQ(0, block.rows()); +// Test clear with column_size == 0 +{ + vectorized::Block block; + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); - block.clear_column_data(0); - EXPECT_EQ(0, block.columns()); - EXPECT_EQ(0, block.rows()); - } - - // Test clear with column_size > 0 - { - vectorized::Block block; - EXPECT_EQ(0, block.columns()); - EXPECT_EQ(0, block.rows()); + block.clear_column_data(0); + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); +} - block.clear_column_data(1); - EXPECT_EQ(0, block.columns()); - EXPECT_EQ(0, block.rows()); - } +// Test clear with column_size > 0 +{ + vectorized::Block block; + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); - // Test clear after insert empty column - { - vectorized::Block block; - auto type = std::make_shared(); - auto col = vectorized::ColumnVector::create(); - block.insert({std::move(col), type, "empty_col"}); + block.clear_column_data(1); + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); +} - EXPECT_EQ(1, block.columns()); - EXPECT_EQ(0, block.rows()); +// Test clear after insert empty column +{ + vectorized::Block block; + auto type = std::make_shared(); + auto col = vectorized::ColumnVector::create(); + block.insert({std::move(col), type, "empty_col"}); - block.clear_column_data(-1); - EXPECT_EQ(1, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - } + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); - // Test clear after multiple empty columns - { - vectorized::Block block; - auto type = std::make_shared(); + block.clear_column_data(-1); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); +} - for (int i = 0; i < 3; ++i) { - auto col = vectorized::ColumnVector::create(); - block.insert({std::move(col), type, "empty_col" + std::to_string(i)}); - } +// Test clear after multiple empty columns +{ + vectorized::Block block; + auto type = std::make_shared(); - EXPECT_EQ(3, block.columns()); - EXPECT_EQ(0, block.rows()); + for (int i = 0; i < 3; ++i) { + auto col = vectorized::ColumnVector::create(); + block.insert({std::move(col), type, "empty_col" + std::to_string(i)}); + } - // Test clear with different column_size values - block.clear_column_data(2); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0, block.rows()); + EXPECT_EQ(3, block.columns()); + EXPECT_EQ(0, block.rows()); - block.clear_column_data(-1); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0, block.rows()); + // Test clear with different column_size values + block.clear_column_data(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); - block.clear_column_data(0); - EXPECT_EQ(0, block.columns()); - EXPECT_EQ(0, block.rows()); - } - } // namespace doris + block.clear_column_data(-1); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); - // Test with regular columns - { - auto create_test_block = [](int num_columns) { - vectorized::Block block; - auto type = std::make_shared(); - - for (int i = 0; i < num_columns; ++i) { - auto col = vectorized::ColumnVector::create(); - col->insert_value(i + 1); - block.insert({std::move(col), type, "col" + std::to_string(i + 1)}); - } - return block; - }; + block.clear_column_data(0); + EXPECT_EQ(0, block.columns()); + EXPECT_EQ(0, block.rows()); +} +} // namespace doris - // Test clear with column_size == -1 - { - auto block = create_test_block(2); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(1, block.rows()); +// Test with regular columns +{ + auto create_test_block = [](int num_columns) { + vectorized::Block block; + auto type = std::make_shared(); - block.clear_column_data(-1); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - EXPECT_EQ(0, block.get_by_position(1).column->size()); + for (int i = 0; i < num_columns; ++i) { + auto col = vectorized::ColumnVector::create(); + col->insert_value(i + 1); + block.insert({std::move(col), type, "col" + std::to_string(i + 1)}); } + return block; + }; - // Test clear with specific column_size - { - auto block = create_test_block(3); - EXPECT_EQ(3, block.columns()); + // Test clear with column_size == -1 + { + auto block = create_test_block(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(1, block.rows()); - block.clear_column_data(2); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - EXPECT_EQ(0, block.get_by_position(1).column->size()); - } + block.clear_column_data(-1); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + } - // Test clear with column_size larger than actual size - { - auto block = create_test_block(1); - EXPECT_EQ(1, block.columns()); + // Test clear with specific column_size + { + auto block = create_test_block(3); + EXPECT_EQ(3, block.columns()); - block.clear_column_data(2); - EXPECT_EQ(1, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - } + block.clear_column_data(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); } - // Test with const columns + // Test clear with column_size larger than actual size { - auto create_test_block = [](int num_columns) { - vectorized::Block block; - auto type = std::make_shared(); + auto block = create_test_block(1); + EXPECT_EQ(1, block.columns()); - for (int i = 0; i < num_columns; ++i) { - auto base_col = vectorized::ColumnVector::create(); - base_col->insert_value(42 + i); - auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); - block.insert({const_col->get_ptr(), type, "const_col" + std::to_string(i + 1)}); - } - return block; - }; + block.clear_column_data(2); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + } +} - // Test clear with column_size == -1 - { - auto block = create_test_block(2); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(5, block.rows()); +// Test with const columns +{ + auto create_test_block = [](int num_columns) { + vectorized::Block block; + auto type = std::make_shared(); - block.clear_column_data(-1); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - EXPECT_EQ(0, block.get_by_position(1).column->size()); + for (int i = 0; i < num_columns; ++i) { + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(42 + i); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); + block.insert({const_col->get_ptr(), type, "const_col" + std::to_string(i + 1)}); } + return block; + }; - // Test clear with specific column_size - { - auto block = create_test_block(3); - EXPECT_EQ(3, block.columns()); + // Test clear with column_size == -1 + { + auto block = create_test_block(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(5, block.rows()); - block.clear_column_data(2); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - EXPECT_EQ(0, block.get_by_position(1).column->size()); - } + block.clear_column_data(-1); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + } - // Test clear with column_size larger than actual size - { - auto block = create_test_block(1); - EXPECT_EQ(1, block.columns()); + // Test clear with specific column_size + { + auto block = create_test_block(3); + EXPECT_EQ(3, block.columns()); - block.clear_column_data(2); - EXPECT_EQ(1, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - } + block.clear_column_data(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); } - // Test with nullable columns + // Test clear with column_size larger than actual size { - auto create_test_block = [](int num_columns) { - vectorized::Block block; - auto base_type = std::make_shared(); - auto nullable_type = std::make_shared(base_type); + auto block = create_test_block(1); + EXPECT_EQ(1, block.columns()); - for (int i = 0; i < num_columns; ++i) { - auto col = vectorized::ColumnNullable::create( - vectorized::ColumnVector::create(), - vectorized::ColumnVector::create()); + block.clear_column_data(2); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + } +} - auto* nested = assert_cast*>( - col->get_nested_column_ptr().get()); - auto* null_map = assert_cast*>( - col->get_null_map_column_ptr().get()); +// Test with nullable columns +{ + auto create_test_block = [](int num_columns) { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); - nested->insert_value(i + 1); - null_map->insert_value(i % 2); + for (int i = 0; i < num_columns; ++i) { + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); - block.insert( - {col->get_ptr(), nullable_type, "nullable_col" + std::to_string(i + 1)}); - } - return block; - }; + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); - // Test clear with column_size == -1 - { - auto block = create_test_block(2); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(1, block.rows()); + nested->insert_value(i + 1); + null_map->insert_value(i % 2); - block.clear_column_data(-1); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - EXPECT_EQ(0, block.get_by_position(1).column->size()); + block.insert({col->get_ptr(), nullable_type, "nullable_col" + std::to_string(i + 1)}); } + return block; + }; - // Test clear with specific column_size - { - auto block = create_test_block(3); - EXPECT_EQ(3, block.columns()); + // Test clear with column_size == -1 + { + auto block = create_test_block(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(1, block.rows()); - block.clear_column_data(2); - EXPECT_EQ(2, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - EXPECT_EQ(0, block.get_by_position(1).column->size()); - } + block.clear_column_data(-1); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + } - // Test clear with column_size larger than actual size - { - auto block = create_test_block(1); - EXPECT_EQ(1, block.columns()); + // Test clear with specific column_size + { + auto block = create_test_block(3); + EXPECT_EQ(3, block.columns()); - block.clear_column_data(2); - EXPECT_EQ(1, block.columns()); - EXPECT_EQ(0, block.rows()); - EXPECT_EQ(0, block.get_by_position(0).column->size()); - } + block.clear_column_data(2); + EXPECT_EQ(2, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); + EXPECT_EQ(0, block.get_by_position(1).column->size()); + } + + // Test clear with column_size larger than actual size + { + auto block = create_test_block(1); + EXPECT_EQ(1, block.columns()); + + block.clear_column_data(2); + EXPECT_EQ(1, block.columns()); + EXPECT_EQ(0, block.rows()); + EXPECT_EQ(0, block.get_by_position(0).column->size()); } } +} TEST(BlockTest, IndexByName) { // Test with empty block From 48076f697e030d4b9e95d18dc03d727a3fd62053 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 14:19:05 +0800 Subject: [PATCH 38/41] code format --- be/test/vec/core/block_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 08c694ed505465..d519f2d3a7bf09 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3402,7 +3402,7 @@ TEST(BlockTest, ClearColumnData) { auto type = std::make_shared(); for (int i = 0; i < num_columns; ++i) { - auto col = vectorized::ColumnVector::create(); + auto col = vectorized::ColumnVector::create(); col->insert_value(i + 1); block.insert({std::move(col), type, "col" + std::to_string(i + 1)}); } From 52e7a7bd05068e0adcaa9438e095c2a1b813dd8a Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 14:26:41 +0800 Subject: [PATCH 39/41] code format --- be/test/vec/core/block_test.cpp | 572 ++++++++++++++++---------------- 1 file changed, 283 insertions(+), 289 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index d519f2d3a7bf09..2a87a4be6979f8 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -3899,214 +3899,301 @@ TEST(BlockTest, ColumnTransformations) { TEST(BlockTest, HashUpdate) { // Test with empty block - { - // Single empty block - { - vectorized::Block empty_block; - SipHash hash1; - empty_block.update_hash(hash1); - uint64_t hash1_value = hash1.get64(); - - // Same empty block should produce same hash - SipHash hash2; - empty_block.update_hash(hash2); - EXPECT_EQ(hash1_value, hash2.get64()); - } + {// Single empty block + {vectorized::Block empty_block; + SipHash hash1; + empty_block.update_hash(hash1); + uint64_t hash1_value = hash1.get64(); + + // Same empty block should produce same hash + SipHash hash2; + empty_block.update_hash(hash2); + EXPECT_EQ(hash1_value, hash2.get64()); +} - // Multiple empty blocks - { - vectorized::Block block1; - vectorized::Block block2; +// Multiple empty blocks +{ + vectorized::Block block1; + vectorized::Block block2; - SipHash hash1, hash2; - block1.update_hash(hash1); - block2.update_hash(hash2); - EXPECT_EQ(hash1.get64(), hash2.get64()); - } - } + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_EQ(hash1.get64(), hash2.get64()); +} +} - // Test with regular columns - { - // Single column with single value - { - vectorized::Block block; - auto type = std::make_shared(); +// Test with regular columns +{// Single column with single value + {vectorized::Block block; +auto type = std::make_shared(); + +auto col = vectorized::ColumnVector::create(); +col->insert_value(42); +block.insert({std::move(col), type, "col1"}); + +SipHash hash1; +block.update_hash(hash1); +uint64_t hash1_value = hash1.get64(); + +// Same data should produce same hash +SipHash hash2; +block.update_hash(hash2); +EXPECT_EQ(hash1_value, hash2.get64()); +} - auto col = vectorized::ColumnVector::create(); - col->insert_value(42); - block.insert({std::move(col), type, "col1"}); +// Multiple columns +{ + vectorized::Block block1; + auto type = std::make_shared(); - SipHash hash1; - block.update_hash(hash1); - uint64_t hash1_value = hash1.get64(); + // Create first block with values [1, 2] + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(1); + block1.insert({std::move(col1), type, "col1"}); - // Same data should produce same hash - SipHash hash2; - block.update_hash(hash2); - EXPECT_EQ(hash1_value, hash2.get64()); - } + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(2); + block1.insert({std::move(col2), type, "col2"}); + } - // Multiple columns - { - vectorized::Block block1; - auto type = std::make_shared(); + // Create second block with values [2, 1] + vectorized::Block block2; + { + auto col1 = vectorized::ColumnVector::create(); + col1->insert_value(2); + block2.insert({std::move(col1), type, "col1"}); - // Create first block with values [1, 2] - { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(1); - block1.insert({std::move(col1), type, "col1"}); + auto col2 = vectorized::ColumnVector::create(); + col2->insert_value(1); + block2.insert({std::move(col2), type, "col2"}); + } - auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(2); - block1.insert({std::move(col2), type, "col2"}); - } + // Different order of same values should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); +} - // Create second block with values [2, 1] - vectorized::Block block2; - { - auto col1 = vectorized::ColumnVector::create(); - col1->insert_value(2); - block2.insert({std::move(col1), type, "col1"}); +// Multiple rows +{ + vectorized::Block block1; + auto type = std::make_shared(); - auto col2 = vectorized::ColumnVector::create(); - col2->insert_value(1); - block2.insert({std::move(col2), type, "col2"}); - } + // Create first block with ascending values + { + auto col = vectorized::ColumnVector::create(); + for (int i = 0; i < 5; ++i) { + col->insert_value(i); + } + block1.insert({std::move(col), type, "col1"}); + } - // Different order of same values should produce different hash - SipHash hash1, hash2; - block1.update_hash(hash1); - block2.update_hash(hash2); - EXPECT_NE(hash1.get64(), hash2.get64()); + // Create second block with descending values + vectorized::Block block2; + { + auto col = vectorized::ColumnVector::create(); + for (int i = 4; i >= 0; --i) { + col->insert_value(i); } + block2.insert({std::move(col), type, "col1"}); + } - // Multiple rows - { - vectorized::Block block1; - auto type = std::make_shared(); + // Different order of same values should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); +} +} - // Create first block with ascending values - { - auto col = vectorized::ColumnVector::create(); - for (int i = 0; i < 5; ++i) { - col->insert_value(i); - } - block1.insert({std::move(col), type, "col1"}); - } +// Test with const columns +{// Single column with single value + {vectorized::Block block; +auto type = std::make_shared(); + +auto base_col = vectorized::ColumnVector::create(); +base_col->insert_value(42); +auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); +block.insert({const_col->get_ptr(), type, "const_col"}); + +SipHash hash1; +block.update_hash(hash1); +uint64_t hash1_value = hash1.get64(); + +// Same data should produce same hash +SipHash hash2; +block.update_hash(hash2); +EXPECT_EQ(hash1_value, hash2.get64()); +} - // Create second block with descending values - vectorized::Block block2; - { - auto col = vectorized::ColumnVector::create(); - for (int i = 4; i >= 0; --i) { - col->insert_value(i); - } - block2.insert({std::move(col), type, "col1"}); - } +// Multiple columns +{ + vectorized::Block block1; + auto type = std::make_shared(); - // Different order of same values should produce different hash - SipHash hash1, hash2; - block1.update_hash(hash1); - block2.update_hash(hash2); - EXPECT_NE(hash1.get64(), hash2.get64()); - } + // Create first block with const values [1, 2] + { + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(1); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 5); + block1.insert({const_col1->get_ptr(), type, "const_col1"}); + + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(2); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); + block1.insert({const_col2->get_ptr(), type, "const_col2"}); } - // Test with const columns + // Create second block with const values [2, 1] + vectorized::Block block2; { - // Single column with single value - { - vectorized::Block block; - auto type = std::make_shared(); + auto base_col1 = vectorized::ColumnVector::create(); + base_col1->insert_value(2); + auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 5); + block2.insert({const_col1->get_ptr(), type, "const_col1"}); - auto base_col = vectorized::ColumnVector::create(); - base_col->insert_value(42); - auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); - block.insert({const_col->get_ptr(), type, "const_col"}); + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(1); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); + block2.insert({const_col2->get_ptr(), type, "const_col2"}); + } - SipHash hash1; - block.update_hash(hash1); - uint64_t hash1_value = hash1.get64(); + // Different order of same values should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); +} - // Same data should produce same hash - SipHash hash2; - block.update_hash(hash2); - EXPECT_EQ(hash1_value, hash2.get64()); - } +// Multiple rows (same value repeated) +{ + vectorized::Block block1; + auto type = std::make_shared(); - // Multiple columns - { - vectorized::Block block1; - auto type = std::make_shared(); + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(42); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); + block1.insert({const_col->get_ptr(), type, "const_col"}); + + // Create second block with same value but different row count + vectorized::Block block2; + auto base_col2 = vectorized::ColumnVector::create(); + base_col2->insert_value(42); + auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 3); + block2.insert({const_col2->get_ptr(), type, "const_col"}); + + // Different row counts should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); +} +} - // Create first block with const values [1, 2] - { - auto base_col1 = vectorized::ColumnVector::create(); - base_col1->insert_value(1); - auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 5); - block1.insert({const_col1->get_ptr(), type, "const_col1"}); - - auto base_col2 = vectorized::ColumnVector::create(); - base_col2->insert_value(2); - auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); - block1.insert({const_col2->get_ptr(), type, "const_col2"}); - } +// Test with nullable columns +{ + // Single column with single value + { + vectorized::Block block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); - // Create second block with const values [2, 1] - vectorized::Block block2; - { - auto base_col1 = vectorized::ColumnVector::create(); - base_col1->insert_value(2); - auto const_col1 = vectorized::ColumnConst::create(base_col1->get_ptr(), 5); - block2.insert({const_col1->get_ptr(), type, "const_col1"}); - - auto base_col2 = vectorized::ColumnVector::create(); - base_col2->insert_value(1); - auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 5); - block2.insert({const_col2->get_ptr(), type, "const_col2"}); - } + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = + assert_cast*>(col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + nested->insert_value(42); + null_map->insert_value(0); + block.insert({col->get_ptr(), nullable_type, "nullable_col"}); - // Different order of same values should produce different hash - SipHash hash1, hash2; - block1.update_hash(hash1); - block2.update_hash(hash2); - EXPECT_NE(hash1.get64(), hash2.get64()); - } + SipHash hash1; + block.update_hash(hash1); + uint64_t hash1_value = hash1.get64(); + + // Same data should produce same hash + SipHash hash2; + block.update_hash(hash2); + EXPECT_EQ(hash1_value, hash2.get64()); + } + + // Multiple columns + { + vectorized::Block block1; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); - // Multiple rows (same value repeated) + // Create first block with values [1(not null), 2(null)] { - vectorized::Block block1; - auto type = std::make_shared(); + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(1); + null_map1->insert_value(0); + block1.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); - auto base_col = vectorized::ColumnVector::create(); - base_col->insert_value(42); - auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 5); - block1.insert({const_col->get_ptr(), type, "const_col"}); + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + nested2->insert_value(2); + null_map2->insert_value(1); + block1.insert({col2->get_ptr(), nullable_type, "nullable_col2"}); + } - // Create second block with same value but different row count - vectorized::Block block2; - auto base_col2 = vectorized::ColumnVector::create(); - base_col2->insert_value(42); - auto const_col2 = vectorized::ColumnConst::create(base_col2->get_ptr(), 3); - block2.insert({const_col2->get_ptr(), type, "const_col"}); - - // Different row counts should produce different hash - SipHash hash1, hash2; - block1.update_hash(hash1); - block2.update_hash(hash2); - EXPECT_NE(hash1.get64(), hash2.get64()); + // Create second block with values [2(null), 1(not null)] + vectorized::Block block2; + { + auto col1 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested1 = assert_cast*>( + col1->get_nested_column_ptr().get()); + auto* null_map1 = assert_cast*>( + col1->get_null_map_column_ptr().get()); + nested1->insert_value(2); + null_map1->insert_value(1); + block2.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); + + auto col2 = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested2 = assert_cast*>( + col2->get_nested_column_ptr().get()); + auto* null_map2 = assert_cast*>( + col2->get_null_map_column_ptr().get()); + nested2->insert_value(1); + null_map2->insert_value(0); + block2.insert({col2->get_ptr(), nullable_type, "nullable_col2"}); } + + // Different order of same values should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); } - // Test with nullable columns + // Multiple rows { - // Single column with single value - { - vectorized::Block block; - auto base_type = std::make_shared(); - auto nullable_type = std::make_shared(base_type); + vectorized::Block block1; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + // Create first block with ascending values and alternating null flags + { auto col = vectorized::ColumnNullable::create( vectorized::ColumnVector::create(), vectorized::ColumnVector::create()); @@ -4114,133 +4201,40 @@ TEST(BlockTest, HashUpdate) { col->get_nested_column_ptr().get()); auto* null_map = assert_cast*>( col->get_null_map_column_ptr().get()); - nested->insert_value(42); - null_map->insert_value(0); - block.insert({col->get_ptr(), nullable_type, "nullable_col"}); - - SipHash hash1; - block.update_hash(hash1); - uint64_t hash1_value = hash1.get64(); - - // Same data should produce same hash - SipHash hash2; - block.update_hash(hash2); - EXPECT_EQ(hash1_value, hash2.get64()); - } - - // Multiple columns - { - vectorized::Block block1; - auto base_type = std::make_shared(); - auto nullable_type = std::make_shared(base_type); - // Create first block with values [1(not null), 2(null)] - { - auto col1 = vectorized::ColumnNullable::create( - vectorized::ColumnVector::create(), - vectorized::ColumnVector::create()); - auto* nested1 = assert_cast*>( - col1->get_nested_column_ptr().get()); - auto* null_map1 = assert_cast*>( - col1->get_null_map_column_ptr().get()); - nested1->insert_value(1); - null_map1->insert_value(0); - block1.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); - - auto col2 = vectorized::ColumnNullable::create( - vectorized::ColumnVector::create(), - vectorized::ColumnVector::create()); - auto* nested2 = assert_cast*>( - col2->get_nested_column_ptr().get()); - auto* null_map2 = assert_cast*>( - col2->get_null_map_column_ptr().get()); - nested2->insert_value(2); - null_map2->insert_value(1); - block1.insert({col2->get_ptr(), nullable_type, "nullable_col2"}); + for (int i = 0; i < 5; ++i) { + nested->insert_value(i); + null_map->insert_value(i % 2); } - - // Create second block with values [2(null), 1(not null)] - vectorized::Block block2; - { - auto col1 = vectorized::ColumnNullable::create( - vectorized::ColumnVector::create(), - vectorized::ColumnVector::create()); - auto* nested1 = assert_cast*>( - col1->get_nested_column_ptr().get()); - auto* null_map1 = assert_cast*>( - col1->get_null_map_column_ptr().get()); - nested1->insert_value(2); - null_map1->insert_value(1); - block2.insert({col1->get_ptr(), nullable_type, "nullable_col1"}); - - auto col2 = vectorized::ColumnNullable::create( - vectorized::ColumnVector::create(), - vectorized::ColumnVector::create()); - auto* nested2 = assert_cast*>( - col2->get_nested_column_ptr().get()); - auto* null_map2 = assert_cast*>( - col2->get_null_map_column_ptr().get()); - nested2->insert_value(1); - null_map2->insert_value(0); - block2.insert({col2->get_ptr(), nullable_type, "nullable_col2"}); - } - - // Different order of same values should produce different hash - SipHash hash1, hash2; - block1.update_hash(hash1); - block2.update_hash(hash2); - EXPECT_NE(hash1.get64(), hash2.get64()); + block1.insert({col->get_ptr(), nullable_type, "nullable_col"}); } - // Multiple rows + // Create second block with descending values and alternating null flags + vectorized::Block block2; { - vectorized::Block block1; - auto base_type = std::make_shared(); - auto nullable_type = std::make_shared(base_type); - - // Create first block with ascending values and alternating null flags - { - auto col = vectorized::ColumnNullable::create( - vectorized::ColumnVector::create(), - vectorized::ColumnVector::create()); - auto* nested = assert_cast*>( - col->get_nested_column_ptr().get()); - auto* null_map = assert_cast*>( - col->get_null_map_column_ptr().get()); - - for (int i = 0; i < 5; ++i) { - nested->insert_value(i); - null_map->insert_value(i % 2); - } - block1.insert({col->get_ptr(), nullable_type, "nullable_col"}); - } + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); - // Create second block with descending values and alternating null flags - vectorized::Block block2; - { - auto col = vectorized::ColumnNullable::create( - vectorized::ColumnVector::create(), - vectorized::ColumnVector::create()); - auto* nested = assert_cast*>( - col->get_nested_column_ptr().get()); - auto* null_map = assert_cast*>( - col->get_null_map_column_ptr().get()); - - for (int i = 4; i >= 0; --i) { - nested->insert_value(i); - null_map->insert_value(i % 2); - } - block2.insert({col->get_ptr(), nullable_type, "nullable_col"}); + for (int i = 4; i >= 0; --i) { + nested->insert_value(i); + null_map->insert_value(i % 2); } - - // Different order of same values should produce different hash - SipHash hash1, hash2; - block1.update_hash(hash1); - block2.update_hash(hash2); - EXPECT_NE(hash1.get64(), hash2.get64()); + block2.insert({col->get_ptr(), nullable_type, "nullable_col"}); } + + // Different order of same values should produce different hash + SipHash hash1, hash2; + block1.update_hash(hash1); + block2.update_hash(hash2); + EXPECT_NE(hash1.get64(), hash2.get64()); } } +} TEST(BlockTest, EraseUselessColumn) { // Test with empty block From 47ef29c66f1ea2ee8196f235f8957078430daee6 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 14:29:36 +0800 Subject: [PATCH 40/41] code format --- be/test/vec/core/block_test.cpp | 244 ++++++++++++++++---------------- 1 file changed, 121 insertions(+), 123 deletions(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 2a87a4be6979f8..32bbbea81e1645 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -4768,152 +4768,150 @@ TEST(BlockTest, SameBitOperations) { TEST(BlockTest, CreateSameStructBlock) { // Test with empty block - { - // Test case 1: with default values (is_reserve = false) - { - vectorized::Block original_block; - auto new_block = original_block.create_same_struct_block(5, false); - EXPECT_EQ(0, new_block->columns()); - EXPECT_EQ(0, new_block->rows()); - } + {// Test case 1: with default values (is_reserve = false) + {vectorized::Block original_block; + auto new_block = original_block.create_same_struct_block(5, false); + EXPECT_EQ(0, new_block->columns()); + EXPECT_EQ(0, new_block->rows()); +} - // Test case 2: with reserved space (is_reserve = true) - { - vectorized::Block original_block; - auto new_block = original_block.create_same_struct_block(5, true); - EXPECT_EQ(0, new_block->columns()); - EXPECT_EQ(0, new_block->rows()); - } +// Test case 2: with reserved space (is_reserve = true) +{ + vectorized::Block original_block; + auto new_block = original_block.create_same_struct_block(5, true); + EXPECT_EQ(0, new_block->columns()); + EXPECT_EQ(0, new_block->rows()); +} +} + +// Test with regular columns +{ + vectorized::Block original_block; + auto type = std::make_shared(); + + // Create original block with data + { + auto col = vectorized::ColumnVector::create(); + col->insert_value(1); + original_block.insert({std::move(col), type, "col1"}); } - // Test with regular columns + // Test case 1: with default values (is_reserve = false) { - vectorized::Block original_block; - auto type = std::make_shared(); + auto new_block = original_block.create_same_struct_block(5, false); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(5, new_block->rows()); // Should have 5 default values + EXPECT_EQ("col1", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); - // Create original block with data - { - auto col = vectorized::ColumnVector::create(); - col->insert_value(1); - original_block.insert({std::move(col), type, "col1"}); + // Verify default values are inserted + const auto* col = assert_cast*>( + new_block->get_by_position(0).column.get()); + for (size_t i = 0; i < 5; ++i) { + EXPECT_EQ(0, col->get_data()[i]); // Default value for Int32 is 0 } + } - // Test case 1: with default values (is_reserve = false) - { - auto new_block = original_block.create_same_struct_block(5, false); - EXPECT_EQ(original_block.columns(), new_block->columns()); - EXPECT_EQ(5, new_block->rows()); // Should have 5 default values - EXPECT_EQ("col1", new_block->get_by_position(0).name); - EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); - - // Verify default values are inserted - const auto* col = assert_cast*>( - new_block->get_by_position(0).column.get()); - for (size_t i = 0; i < 5; ++i) { - EXPECT_EQ(0, col->get_data()[i]); // Default value for Int32 is 0 - } - } + // Test case 2: with reserved space (is_reserve = true) + { + auto new_block = original_block.create_same_struct_block(5, true); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space + EXPECT_EQ("col1", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); + } +} - // Test case 2: with reserved space (is_reserve = true) - { - auto new_block = original_block.create_same_struct_block(5, true); - EXPECT_EQ(original_block.columns(), new_block->columns()); - EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space - EXPECT_EQ("col1", new_block->get_by_position(0).name); - EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); - } +// Test with const columns +{ + vectorized::Block original_block; + auto type = std::make_shared(); + + // Create original block with data + { + auto base_col = vectorized::ColumnVector::create(); + base_col->insert_value(42); + auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 1); + original_block.insert({const_col->get_ptr(), type, "const_col"}); } - // Test with const columns + // Test case 1: with default values (is_reserve = false) { - vectorized::Block original_block; - auto type = std::make_shared(); + auto new_block = original_block.create_same_struct_block(5, false); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(5, new_block->rows()); // Should have 5 default values + EXPECT_EQ("const_col", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); - // Create original block with data - { - auto base_col = vectorized::ColumnVector::create(); - base_col->insert_value(42); - auto const_col = vectorized::ColumnConst::create(base_col->get_ptr(), 1); - original_block.insert({const_col->get_ptr(), type, "const_col"}); - } - - // Test case 1: with default values (is_reserve = false) - { - auto new_block = original_block.create_same_struct_block(5, false); - EXPECT_EQ(original_block.columns(), new_block->columns()); - EXPECT_EQ(5, new_block->rows()); // Should have 5 default values - EXPECT_EQ("const_col", new_block->get_by_position(0).name); - EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); - - // Verify default values are inserted - const auto* col = assert_cast*>( - new_block->get_by_position(0).column.get()); - for (size_t i = 0; i < 5; ++i) { - EXPECT_EQ(0, col->get_data()[i]); // Default value for Int32 is 0 - } + // Verify default values are inserted + const auto* col = assert_cast*>( + new_block->get_by_position(0).column.get()); + for (size_t i = 0; i < 5; ++i) { + EXPECT_EQ(0, col->get_data()[i]); // Default value for Int32 is 0 } + } - // Test case 2: with reserved space (is_reserve = true) - { - auto new_block = original_block.create_same_struct_block(5, true); - EXPECT_EQ(original_block.columns(), new_block->columns()); - EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space - EXPECT_EQ("const_col", new_block->get_by_position(0).name); - EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); - } + // Test case 2: with reserved space (is_reserve = true) + { + auto new_block = original_block.create_same_struct_block(5, true); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space + EXPECT_EQ("const_col", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*type)); } +} - // Test with nullable columns +// Test with nullable columns +{ + vectorized::Block original_block; + auto base_type = std::make_shared(); + auto nullable_type = std::make_shared(base_type); + + // Create original block with data { - vectorized::Block original_block; - auto base_type = std::make_shared(); - auto nullable_type = std::make_shared(base_type); + auto col = vectorized::ColumnNullable::create( + vectorized::ColumnVector::create(), + vectorized::ColumnVector::create()); + auto* nested = + assert_cast*>(col->get_nested_column_ptr().get()); + auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + nested->insert_value(1); + null_map->insert_value(0); + original_block.insert({col->get_ptr(), nullable_type, "nullable_col"}); + } - // Create original block with data - { - auto col = vectorized::ColumnNullable::create( - vectorized::ColumnVector::create(), - vectorized::ColumnVector::create()); - auto* nested = assert_cast*>( - col->get_nested_column_ptr().get()); - auto* null_map = assert_cast*>( - col->get_null_map_column_ptr().get()); - nested->insert_value(1); - null_map->insert_value(0); - original_block.insert({col->get_ptr(), nullable_type, "nullable_col"}); - } + // Test case 1: with default values (is_reserve = false) + { + auto new_block = original_block.create_same_struct_block(5, false); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(5, new_block->rows()); // Should have 5 default values + EXPECT_EQ("nullable_col", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*nullable_type)); - // Test case 1: with default values (is_reserve = false) - { - auto new_block = original_block.create_same_struct_block(5, false); - EXPECT_EQ(original_block.columns(), new_block->columns()); - EXPECT_EQ(5, new_block->rows()); // Should have 5 default values - EXPECT_EQ("nullable_col", new_block->get_by_position(0).name); - EXPECT_TRUE(new_block->get_by_position(0).type->equals(*nullable_type)); - - // Verify default values are inserted - const auto* col = assert_cast( - new_block->get_by_position(0).column.get()); - const auto* nested = assert_cast*>( - col->get_nested_column_ptr().get()); - const auto* null_map = assert_cast*>( - col->get_null_map_column_ptr().get()); - for (size_t i = 0; i < 5; ++i) { - EXPECT_EQ(0, nested->get_data()[i]); // Default value for Int32 is 0 - EXPECT_EQ(1, null_map->get_data()[i]); // Default is null - } + // Verify default values are inserted + const auto* col = assert_cast( + new_block->get_by_position(0).column.get()); + const auto* nested = assert_cast*>( + col->get_nested_column_ptr().get()); + const auto* null_map = assert_cast*>( + col->get_null_map_column_ptr().get()); + for (size_t i = 0; i < 5; ++i) { + EXPECT_EQ(0, nested->get_data()[i]); // Default value for Int32 is 0 + EXPECT_EQ(1, null_map->get_data()[i]); // Default is null } + } - // Test case 2: with reserved space (is_reserve = true) - { - auto new_block = original_block.create_same_struct_block(5, true); - EXPECT_EQ(original_block.columns(), new_block->columns()); - EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space - EXPECT_EQ("nullable_col", new_block->get_by_position(0).name); - EXPECT_TRUE(new_block->get_by_position(0).type->equals(*nullable_type)); - } + // Test case 2: with reserved space (is_reserve = true) + { + auto new_block = original_block.create_same_struct_block(5, true); + EXPECT_EQ(original_block.columns(), new_block->columns()); + EXPECT_EQ(0, new_block->rows()); // Should be empty but with reserved space + EXPECT_EQ("nullable_col", new_block->get_by_position(0).name); + EXPECT_TRUE(new_block->get_by_position(0).type->equals(*nullable_type)); } } +} TEST(BlockTest, EraseTmpColumns) { // Test with empty block From 1ce54ee27d2179baa900854df101b01336a47b61 Mon Sep 17 00:00:00 2001 From: yoruet <1559650411@qq.com> Date: Wed, 18 Dec 2024 15:24:59 +0800 Subject: [PATCH 41/41] Fix BlockTest assertion to check for non-null ColumnConst in sorted block --- be/test/vec/core/block_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 32bbbea81e1645..8ae9548b13073d 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -1465,7 +1465,7 @@ TEST(BlockTest, SortColumns) { EXPECT_EQ(nullptr, typeid_cast( sorted_block.get_by_position(0).column.get())); EXPECT_TRUE(sorted_block.get_by_position(1).type->is_nullable()); - EXPECT_EQ(nullptr, typeid_cast( + EXPECT_NE(nullptr, typeid_cast( sorted_block.get_by_position(2).column.get())); } }