From 316b822c3776d02acdf09b3da07a11ceeab7c355 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Wed, 25 Oct 2023 13:37:02 -0700 Subject: [PATCH] Cleanup and test --- cpp/src/gandiva/annotator.cc | 58 +--- cpp/src/gandiva/array_ops.cc | 89 +---- cpp/src/gandiva/bitmap_accumulator.h | 2 - cpp/src/gandiva/expr_decomposer.cc | 10 - cpp/src/gandiva/field_descriptor.h | 1 - cpp/src/gandiva/function_registry.cc | 3 - cpp/src/gandiva/gdv_function_stubs.cc | 1 - cpp/src/gandiva/llvm_generator.cc | 279 +-------------- cpp/src/gandiva/llvm_types.h | 2 +- cpp/src/gandiva/lvalue.h | 4 - cpp/src/gandiva/projector.cc | 73 +--- cpp/src/gandiva/tree_expr_builder.cc | 5 - .../main/cpp/expression_registry_helper.cc | 8 +- java/gandiva/src/main/cpp/jni_common.cc | 64 +--- .../gandiva/evaluator/ListVectorExpander.java | 63 +--- .../arrow/gandiva/evaluator/Projector.java | 323 +----------------- .../gandiva/evaluator/VectorExpander.java | 1 - 17 files changed, 35 insertions(+), 951 deletions(-) diff --git a/cpp/src/gandiva/annotator.cc b/cpp/src/gandiva/annotator.cc index dbc1cc50babaf..7fc8ab94d3c05 100644 --- a/cpp/src/gandiva/annotator.cc +++ b/cpp/src/gandiva/annotator.cc @@ -53,7 +53,6 @@ FieldDescriptorPtr Annotator::MakeDesc(FieldPtr field, bool is_output) { } if (field->type()->id() == arrow::Type::LIST) { - //std::cout << "LR Annotator::MakeDesc 1" << std::endl; offsets_idx = buffer_count_++; if (arrow::is_binary_like(field->type()->field(0)->type()->id())) { child_offsets_idx = buffer_count_++; @@ -64,10 +63,7 @@ FieldDescriptorPtr Annotator::MakeDesc(FieldPtr field, bool is_output) { data_buffer_ptr_idx = buffer_count_++; } int child_valid_buffer_ptr_idx = FieldDescriptor::kInvalidIdx; - //if (is_output) { - child_valid_buffer_ptr_idx = buffer_count_++; - //std::cout << "LR Annotator::MakeDesc 2 child_valid_buffer_ptr_idx=" << child_valid_buffer_ptr_idx << std::endl; - //} + child_valid_buffer_ptr_idx = buffer_count_++; return std::make_shared(field, data_idx, validity_idx, offsets_idx, data_buffer_ptr_idx, child_offsets_idx, child_valid_buffer_ptr_idx); } @@ -86,56 +82,45 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, // The validity buffer is optional. Use nullptr if it does not have one. if (array_data.buffers[buffer_idx]) { uint8_t* validity_buf = const_cast(array_data.buffers[buffer_idx]->data()); - //std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -6 " << &validity_buf << std::endl; eval_batch->SetBuffer(desc.validity_idx(), validity_buf, array_data.offset); } else { - //std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -5 null " << std::endl; eval_batch->SetBuffer(desc.validity_idx(), nullptr, array_data.offset); } ++buffer_idx; if (desc.HasOffsetsIdx()) { uint8_t* offsets_buf = const_cast(array_data.buffers[buffer_idx]->data()); - //std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -4 " << &offsets_buf << " using idx=" << buffer_idx << std::endl; eval_batch->SetBuffer(desc.offsets_idx(), offsets_buf, array_data.offset); if (desc.HasChildOffsetsIdx()) { - //std::cout << "LR Annotator::PrepareBuffersForField 1 for field " << desc.Name() << " type is " << array_data.type->id() << std::endl; if (is_output) { // if list field is output field, we should put buffer pointer into eval batch // for resizing uint8_t* child_offsets_buf = reinterpret_cast( array_data.child_data.at(0)->buffers[buffer_idx].get()); - //std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -3a " << &child_offsets_buf << std::endl; eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_offsets_buf, array_data.child_data.at(0)->offset); uint8_t* child_valid_buf = reinterpret_cast( array_data.child_data.at(0)->buffers[0].get()); - //std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -3b " << &child_valid_buf << std::endl; eval_batch->SetBuffer(desc.child_data_validity_idx(), child_valid_buf, array_data.child_data.at(0)->offset); } else { - //std::cout << "LR Annotator::PrepareBuffersForField 2" << std::endl; // if list field is input field, just put buffer data into eval batch uint8_t* child_offsets_buf = const_cast( array_data.child_data.at(0)->buffers[buffer_idx]->data()); - //std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -2a " << &child_offsets_buf << std::endl; eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_offsets_buf, array_data.child_data.at(0)->offset); uint8_t* child_valid_buf = const_cast( array_data.child_data.at(0)->buffers[0]->data()); - //std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -2b " << &child_valid_buf << std::endl; eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_valid_buf, array_data.child_data.at(0)->offset); } } if (array_data.type->id() != arrow::Type::LIST || arrow::is_binary_like(array_data.type->field(0)->type()->id())) { - //std::cout << "LR Annotator::PrepareBuffersForField 3" << std::endl; - // primitive type list data buffer index is 1 // binary like type list data buffer index is 2 ++buffer_idx; @@ -143,39 +128,15 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, } if (array_data.type->id() != arrow::Type::LIST) { - //std::cout << "LR Annotator::PrepareBuffersForField 4" << std::endl; - - //std::cout << "LR Annotator::PrepareBuffersForField 4 buffer_idx " << buffer_idx << std::endl; uint8_t* data_buf = const_cast(array_data.buffers[buffer_idx]->data()); - //std::cout << "LR Annotator::PrepareBuffersForField 4a" << std::endl; - //std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -1 " << &data_buf << std::endl; eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset); - //std::cout << "LR Annotator::PrepareBuffersForField 4b" << std::endl; } else { - //std::cout << "LR Annotator::PrepareBuffersForField 5 " << desc.Name() << " buffer_idx " << buffer_idx << std::endl; - //std::cout << "LR Annotator::PrepareBuffersForField 5 array_data child size " << array_data.child_data.size() << std::endl; - - //std::cout << "LR array_data.child_data.at(0)->buffers[0]=" << array_data.child_data.at(0)->buffers[0] << std::endl; - //uint8_t* data_valid_buf = - // const_cast(array_data.child_data.at(0)->buffers[0]->data()); - //std::cout << "LR Annotator::PrepareBuffersForField setting offset eval data_valid_buf idx=" << 0 << " data_valid_buf=" << &data_valid_buf << std::endl; - //eval_batch->SetBuffer(desc.child_data_validity_idx(), data_valid_buf, array_data.child_data.at(0)->offset); - - uint8_t* data_buf = const_cast(array_data.child_data.at(0)->buffers[buffer_idx]->data()); - //std::cout << "LR Annotator::PrepareBuffersForField setting data buffer desc.data_idx()=" << desc.data_idx() << " idx=" << buffer_idx << " data=" << data_buf << std::endl; eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.child_data.at(0)->offset); - //std::cout << "LR Annotator::PrepareBuffersForField 5a" << std::endl; - - - //std::cout << "LR array_data.child_data.at(0)->buffers[0]->data() is " << array_data.child_data.at(0)->buffers[0] << std::endl; if (array_data.child_data.at(0)->buffers[0] ) { uint8_t* child_valid_buf = const_cast( array_data.child_data.at(0)->buffers[0]->data()); - //desc.set_child_data_validity_idx(4); - // std::cout << "LR Annotator::PrepareBuffersForField setting child valid buffer -5b " << - //" name=" << desc.Name() << " idx=" << desc.child_data_validity_idx() << " child_data_buf=" << *child_valid_buf << std::endl; eval_batch->SetBuffer(desc.child_data_validity_idx(), child_valid_buf, 0); } @@ -187,16 +148,11 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc, if (array_data.type->id() != arrow::Type::LIST) { uint8_t* data_buf_ptr = reinterpret_cast(array_data.buffers[buffer_idx].get()); - //std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer 1 " << &data_buf_ptr << std::endl; eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.offset); } else { - //std::cout << "LR Annotator::PrepareBuffersForField is_output index " << desc.data_buffer_ptr_idx() << std::endl; - // list data buffer is in child data buffer uint8_t* data_buf_ptr = reinterpret_cast( array_data.child_data.at(0)->buffers[buffer_idx].get()); - //std::cout << "LR Annotator::PrepareBuffersForField setting eval data buffer " << buffer_idx << " data=" << &data_buf_ptr << std::endl; - eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.child_data.at(0)->offset); } @@ -209,7 +165,6 @@ EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch, EvalBatchPtr eval_batch = std::make_shared( record_batch.num_rows(), buffer_count_, local_bitmap_count_); - //std::cout << "LR PrepareEvalBatch 1" << std::endl; // Fill in the entries for the input fields. for (int i = 0; i < record_batch.num_columns(); ++i) { const std::string& name = record_batch.column_name(i); @@ -218,28 +173,17 @@ EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch, // skip columns not involved in the expression. continue; } - - /*std::cout << "LR PrepareEvalBatch 1a i=" << i << " record batch schema " << record_batch.schema()->ToString() - << " num rows " << record_batch.num_rows() - << " num columns " << record_batch.num_columns() - << " data size " << record_batch.column_data().size() - << " col 1 " << record_batch.column(0)->ToString() - << std::endl;*/ - - //std::cout << "LR PrepareEvalBatch 1a i=" << i << " record batch data " << record_batch.ToString() << std::endl; PrepareBuffersForField(*(found->second), *(record_batch.column_data(i)), eval_batch.get(), false /*is_output*/); } // Fill in the entries for the output fields. - //std::cout << "LR PrepareEvalBatch preparing output fields" << std::endl; int idx = 0; for (auto& arraydata : out_vector) { const FieldDescriptorPtr& desc = out_descs_.at(idx); PrepareBuffersForField(*desc, *arraydata, eval_batch.get(), true /*is_output*/); ++idx; } - //std::cout << "LR PrepareEvalBatch 2" << std::endl; return eval_batch; } diff --git a/cpp/src/gandiva/array_ops.cc b/cpp/src/gandiva/array_ops.cc index d83cd0a8986e6..b9ff34b4a8a4f 100644 --- a/cpp/src/gandiva/array_ops.cc +++ b/cpp/src/gandiva/array_ops.cc @@ -74,18 +74,15 @@ bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, return false; } +//LR TODO int32_t* array_int32_make_array(int64_t context_ptr, int32_t contains_data, int32_t* out_len) { - //std::cout << "LR array_int32_make_array offset data=" << contains_data << std::endl; int integers[] = { contains_data, 21, 3, contains_data, 5 }; *out_len = 5;// * 4; //length is number of items, but buffers must account for byte size. uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, *out_len * 4); memcpy(ret, integers, *out_len * 4); - //std::cout << "LR made a buffer length" << *out_len * 4 << " item 3 is = " << int32_t(ret[3*4]) << std::endl; - - //return reinterpret_cast(ret); return reinterpret_cast(ret); } @@ -94,7 +91,6 @@ bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf, int64_t contains_data, bool entry_validWhat, int64_t loop_var, int64_t validity_index_var, bool* valid_row) { - //std::cout << "LR array_int64_contains_int64 offset length=" << entry_offsets_len << std::endl; if (!combined_row_validity) { *valid_row = false; return false; @@ -108,8 +104,7 @@ bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf, if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { continue; } - int64_t entry_len = *(entry_buf + (i*2)); //LR TODO sizeof int64? - //std::cout << "LR checking value " << entry_len << " against target " << contains_data << std::endl; + int64_t entry_len = *(entry_buf + (i*2)); if (entry_len == contains_data) { return true; } @@ -120,85 +115,29 @@ bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf, int32_t* array_int32_remove(int64_t context_ptr, const int32_t* entry_buf, int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, int32_t remove_data, bool entry_validWhat, - /*const int32_t* array_valid_bits,*/ int64_t loop_var, int64_t validity_index_var, + int64_t loop_var, int64_t validity_index_var, bool* valid_row, int32_t* out_len, int32_t** valid_ptr) { - //std::cout << "LR array_int32_remove data=" << remove_data - // << " entry_offsets_len " << entry_offsets_len << std::endl; - //std::cout << "LR array_int32_remove " << loop_var << std::endl; std::vector newInts; - - - /*std::bitset<8> validBits(*entry_valid); //LR TODO handle size. - std::bitset<8> outputValidBits; - std::cout << "LR Entry bitset is " << validBits << std::endl; - for (int i = 0; i < entry_offsets_len; i++) { - //std::cout << "LR going to check " << entry_buf + i << std::endl; - int32_t entry_item = *(entry_buf + (i * 1)); - //std::cout << "LR checking value " << entry_len << " against target " << remove_data << std::endl; - if (entry_item == remove_data) { - continue; - } else if (!validBits[i]) { - outputValidBits[i] = 0; - newInts.push_back(0); //This will be marked invalid, so data doesn't matter. - } else { - outputValidBits[i] = 1; - //Note the vector can have n elements, while validbits might have n+1. - newInts.push_back(entry_item); - } - }*/ - - //std::cout << "LR entry_buf=" << entry_buf << " *entry_buf=" << entry_buf << std::endl; - //std::cout << "LR notSureWhatThisIs=" << notSureWhatThisIs << " *notSureWhatThisIs=" << *notSureWhatThisIs << std::endl; - std::cout << "LR combined_row_validity=" << combined_row_validity << " entry_validWhat=" << entry_validWhat << " validity_index_var=" << validity_index_var << - " entry_validity=" << entry_validity << std::endl; - //<< " *notSureWhatThisIs=" << *notSureWhatThisIs << std::endl; //LR TODO not sure what entry_validWhat is. //LR TODO I'm not sure why entry_validty increases for each loop. It starts as the pointer to the validity buffer, so adjust here. const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); - //std::bitset<15> maybeInputBits (*notSureWhatThisIsAdjusted); - //std::cout << "LR maybeInputBits=" << maybeInputBits << std::endl; - - int64_t validityBitIndex = 0; - //for (int i = 0; i < loop_var; i++) { - // validityBitIndex += *(offsets + i); - // std::cout << "LR i=" << i << " adding offset " << *(offsets + i) << " offset is " << offsets << std::endl; - //} - //The validity index already has the current row length added to it, so decrement. -validityBitIndex = validity_index_var - entry_len; - //TODO temp until the buffer is worked out. - //validityBitIndex -= (loop_var); - - - //std::cout << "Using validityBitIndex=" << validityBitIndex << std::endl; - - - + validityBitIndex = validity_index_var - entry_len; entry_validWhat = true; - //std::bitset<10> outputValidBits; - std::vector outValid; for (int i = 0; i < entry_len; i++) { - //std::cout << "LR going to check " << entry_buf + i << std::endl; int32_t entry_item = *(entry_buf + (i * 1)); - //std::cout << "LR checking value " << entry_len << " against target " << remove_data << std::endl; if (entry_item == remove_data) { - //outValid.push_back(false); - //newInts.push_back(42); - //entry_validWhat = false; - //TODO temp until buffer is worked out } else if (!arrow::bit_util::GetBit(reinterpret_cast(array_valid_bits), validityBitIndex + i)) { + //Do not add the item to remove. } else if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { outValid.push_back(false); newInts.push_back(0); - //outputValidBits[i] = 0; } else { outValid.push_back(true); - //Note the vector can have n elements, while validbits might have n+1. newInts.push_back(entry_item); - //outputValidBits[i] = 1; } } @@ -215,31 +154,13 @@ validityBitIndex = validity_index_var - entry_len; //length is number of items, but buffers must account for byte size. uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, outBufferLength); memcpy(ret, newInts.data(), outBufferLength); - //std::cout << "LR made a buffer length" << *out_len * 4 << " item 3 is = " << int32_t(ret[3*4]) << std::endl; - - *valid_row = true; - - - //unsigned long ll = outputValidBits.to_ulong(); if (!combined_row_validity) { - //ll = 0; *out_len = 0; *valid_row = false; //this one is what works for the top level validity. entry_validWhat = false; } - //LR no need, set along the way. memcpy(validRet, &ll, 1); - //*valid_len = 1; - //std::cout << "LR valid_buf is " << valid_buf << std::endl; - //std::cout << "LR outputValidBits is " << outputValidBits << std::endl; - //valid_buf = reinterpret_cast(validRet); - *valid_ptr = reinterpret_cast(validRet); - //std::cout << "LR setting valid_ptr=" << valid_ptr << " *valid_ptr=" << *valid_ptr << " **valid_ptr=" << **valid_ptr << " valid_ptr bitset data is " << std::bitset<8>(**valid_ptr) - // << " return value is " << reinterpret_cast(ret) << std::endl; - - - //return reinterpret_cast(ret); return reinterpret_cast(ret); } diff --git a/cpp/src/gandiva/bitmap_accumulator.h b/cpp/src/gandiva/bitmap_accumulator.h index f67b58847ce70..52d73696c788c 100644 --- a/cpp/src/gandiva/bitmap_accumulator.h +++ b/cpp/src/gandiva/bitmap_accumulator.h @@ -37,11 +37,9 @@ class GANDIVA_EXPORT BitMapAccumulator : public DexDefaultVisitor { void Visit(const VectorReadValidityDex& dex) { int idx = dex.ValidityIdx(); - //std::cout << "LR BitMapAccumulator visiting " << idx << std::endl; auto bitmap = eval_batch_.GetBuffer(idx); // The bitmap could be null. Ignore it in this case. if (bitmap != NULLPTR) { - //std::cout << "LR BitMapAccumulator is not null " << bitmap << std::endl; src_maps_.push_back(bitmap); src_map_offsets_.push_back(eval_batch_.GetBufferOffset(idx)); } diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc index f35b3bc5cc5e8..e14fcbc8952cb 100644 --- a/cpp/src/gandiva/expr_decomposer.cc +++ b/cpp/src/gandiva/expr_decomposer.cc @@ -38,28 +38,20 @@ namespace gandiva { Status ExprDecomposer::Visit(const FieldNode& node) { auto desc = annotator_.CheckAndAddInputFieldDescriptor(node.field()); - //std::cout << "LR ExprDecomposer" << std::endl; DexPtr validity_dex = std::make_shared(desc); DexPtr value_dex; if (desc->HasChildOffsetsIdx()) { - //std::cout << "LR ExprDecomposer 1" << std::endl; // handle list type value_dex = std::make_shared(desc); } else if (desc->HasOffsetsIdx()) { - //std::cout << "LR ExprDecomposer 2" << std::endl; if (desc->field()->type()->id() == arrow::Type::LIST) { // handle list type - //std::cout << "LR ExprDecomposer 3" << std::endl; auto p = std::make_shared(desc); value_dex = p; - //int v = p->DataIdx(); - //std::cout << "LR primitive list type " v << " " << } else { - //std::cout << "LR ExprDecomposer 4" << std::endl; value_dex = std::make_shared(desc); } } else { - //std::cout << "LR ExprDecomposer 5" << std::endl; value_dex = std::make_shared(desc); } result_ = std::make_shared(validity_dex, value_dex); @@ -126,9 +118,7 @@ Status ExprDecomposer::Visit(const FunctionNode& in_node) { } else { DCHECK(native_function->result_nullable_type() == kResultNullInternal); - //LR TODO Need validity? // Add a local bitmap to track the output validity. - std::cout << "LR Making a nullable function holder with validity." << std::endl; int local_bitmap_idx = annotator_.AddLocalBitMap(); auto validity_dex = std::make_shared(local_bitmap_idx); diff --git a/cpp/src/gandiva/field_descriptor.h b/cpp/src/gandiva/field_descriptor.h index 0df7d4f2f2aaa..dfcf6872d501d 100644 --- a/cpp/src/gandiva/field_descriptor.h +++ b/cpp/src/gandiva/field_descriptor.h @@ -39,7 +39,6 @@ class FieldDescriptor { data_buffer_ptr_idx_(data_buffer_ptr_idx), child_offsets_idx_(child_offsets_idx), child_validity_idx_(child_validity_idx) { - //std::cout << "LR FieldDescriptor=" << Name() << " " << data_idx_ << "," << data_buffer_ptr_idx_ << "," << child_validity_idx_ << std::endl; } /// Index of validity array in the array-of-buffers diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 021100678a08e..616ef8530c02b 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -72,10 +72,7 @@ SignatureMap FunctionRegistry::InitPCMap() { pc_registry_.insert(std::end(pc_registry_), v7.begin(), v7.end()); for (auto& elem : pc_registry_) { - //std::cout << "LR pc_registry_ item " << elem.pc_name() << " first signature name " << elem.signatures()[0].base_name() << std::endl; for (auto& func_signature : elem.signatures()) { - //std::cout << "LR Adding function to map " << func_signature.base_name() << std::endl; - //std::cout << " LR args " << func_signature.param_types map.insert(std::make_pair(&(func_signature), &elem)); } } diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 38d61590613c1..2ca9529fa846b 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -166,7 +166,6 @@ int32_t gdv_fn_populate_varlen_vector(int64_t context_ptr, int8_t* data_ptr, TYPE* entry_buf, int32_t entry_len, int32_t** valid_ptr) { \ auto buffer = reinterpret_cast(data_ptr); \ int32_t offset = static_cast(buffer->size()); \ - std::cout << "LR gdv_fn_populate_list_" << slot << std::endl; \ auto status = buffer->Resize(offset + entry_len * SCALE, false /*shrink*/); \ if (!status.ok()) { \ gandiva::ExecutionContext* context = \ diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index 7fa7073a24948..9d8786e28c5bb 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -92,7 +92,6 @@ Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr out std::unique_ptr compiled_expr(new CompiledExpr(value_validity, output)); std::string fn_name = "expr_" + std::to_string(idx) + "_" + std::to_string(static_cast(selection_vector_mode_)); - //std::cout << "LR LLVMGenerator::Add " << fn_name << std::endl; if (!cached_) { ARROW_RETURN_NOT_OK(engine_->LoadFunctionIRs()); ARROW_RETURN_NOT_OK(CodeGenExprValue(value_validity->value_expr(), @@ -101,7 +100,6 @@ Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr out } compiled_expr->SetFunctionName(selection_vector_mode_, fn_name); compiled_exprs_.push_back(std::move(compiled_expr)); - //std::cout << "LR LLVMGenerator::Add Done" << std::endl; return Status::OK(); } @@ -110,18 +108,13 @@ Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr out Status LLVMGenerator::Build(const ExpressionVector& exprs, SelectionVector::Mode mode) { selection_vector_mode_ = mode; - //std::cout << "LR LLVMGenerator::Build " << std::endl; for (auto& expr : exprs) { auto output = annotator_.AddOutputFieldDescriptor(expr->result()); ARROW_RETURN_NOT_OK(Add(expr, output)); } - //std::cout << "LR LLVMGenerator::Build 2" << std::endl; - //Too much logging. needle in haystack? - std::cout << "LR LLVMGenerator::Build 2 IR is " << engine_->DumpIR() << std::endl; // Compile and inject into the process' memory the generated function. ARROW_RETURN_NOT_OK(engine_->FinalizeModule()); - //std::cout << "LR LLVMGenerator::Build FinalizeModule" << std::endl; // setup the jit functions for each expression. for (auto& compiled_expr : compiled_exprs_) { @@ -130,7 +123,6 @@ Status LLVMGenerator::Build(const ExpressionVector& exprs, SelectionVector::Mode compiled_expr->SetJITFunction(selection_vector_mode_, jit_fn); } - //std::cout << "LR LLVMGenerator::Build Done" << std::endl; return Status::OK(); } @@ -152,12 +144,10 @@ Status LLVMGenerator::Execute(const arrow::RecordBatch& record_batch, const SelectionVector* selection_vector, const ArrayDataVector& output_vector) const { DCHECK_GT(record_batch.num_rows(), 0); - //std::cout << "LR LLVMGenerator::Execute 1"<< std::endl; auto eval_batch = annotator_.PrepareEvalBatch(record_batch, output_vector); DCHECK_GT(eval_batch->GetNumBuffers(), 0); - //std::cout << "LR LLVMGenerator::Execute 2" << std::endl; auto mode = SelectionVector::MODE_NONE; if (selection_vector != nullptr) { mode = selection_vector->GetMode(); @@ -167,7 +157,6 @@ Status LLVMGenerator::Execute(const arrow::RecordBatch& record_batch, selection_vector_mode_, " received vector with mode ", mode); } - // std::cout << "LR LLVMGenerator::Execute 3" << std::endl; for (auto& compiled_expr : compiled_exprs_) { // generate data/offset vectors. const uint8_t* selection_buffer = nullptr; @@ -177,7 +166,6 @@ Status LLVMGenerator::Execute(const arrow::RecordBatch& record_batch, num_output_rows = selection_vector->GetNumSlots(); } - //std::cout << "LR LLVMGenerator::Execute A1" << std::endl; EvalFunc jit_function = compiled_expr->GetJITFunction(mode); jit_function(eval_batch->GetBufferArray(), eval_batch->GetBufferOffsetArray(), eval_batch->GetLocalBitMapArray(), annotator_.GetHolderPointersArray(), @@ -189,7 +177,6 @@ Status LLVMGenerator::Execute(const arrow::RecordBatch& record_batch, eval_batch->GetExecutionContext()->has_error(), Status::ExecutionError(eval_batch->GetExecutionContext()->get_error())); - // std::cout << "LR LLVMGenerator::Execute A2" << std::endl; // generate validity vectors. ComputeBitMapsForExpr(*compiled_expr, selection_vector, eval_batch.get()); } @@ -209,7 +196,6 @@ llvm::Value* LLVMGenerator::GetValidityReference(llvm::Value* arg_addrs, int idx FieldPtr field) { const std::string& name = field->name(); llvm::Value* load = LoadVectorAtIndex(arg_addrs, types()->i64_type(), idx, name); - // std::cout << "LR LLVMGenerator::GetValidityReference name=" << name << " idx=" << idx << std::endl; return ir_builder()->CreateIntToPtr(load, types()->i64_ptr_type(), name + "_varray"); } @@ -218,7 +204,6 @@ llvm::Value* LLVMGenerator::GetDataBufferPtrReference(llvm::Value* arg_addrs, in FieldPtr field) { const std::string& name = field->name(); llvm::Value* load = LoadVectorAtIndex(arg_addrs, types()->i64_type(), idx, name); - // std::cout << "LR LLVMGenerator::GetDataBufferPtrReference name=" << name << " idx=" << idx << std::endl; return ir_builder()->CreateIntToPtr(load, types()->i8_ptr_type(), name + "_buf_ptr"); } @@ -314,8 +299,6 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, FieldDescriptorPtr output, int suffix_idx, std::string& fn_name, SelectionVector::Mode selection_vector_mode) { - //std::cout << "LR CodeGenExprValue for output field " << output->Name() - // << " type " << output->Type()->ToString() << " output type id " << output->Type()->id() << std::endl; try { llvm::IRBuilder<>* builder = ir_builder(); // Create fn prototype : @@ -423,15 +406,11 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, } // The visitor can add code to both the entry/loop blocks. - //std::cout << "LR calling visitor to get output data for [" << fn_name << "]" << std::endl; Visitor visitor(this, fn, loop_entry, arg_addrs, arg_local_bitmaps, arg_holder_ptrs, slice_offsets, arg_context_ptr, position_var, validity_index_var); value_expr->Accept(visitor); LValuePtr output_value = visitor.result(); - //std::cout << "LR addfunctioncall for " << full_name << " == value->getType " << str2 << " ret_type " << str << std::endl; - //std::cout << "LR output_value from visitor is " << output_value->to_string() << std::endl; - // The "current" block may have changed due to code generation in the visitor. llvm::BasicBlock* loop_body_tail = builder->GetInsertBlock(); @@ -458,12 +437,10 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, output_value->data(), output_value->length()}); } else if (output_type_id == arrow::Type::STRUCT) { - //std::cout << "LR creating struct type to store the result." << std::endl; auto slot_offset = builder->CreateGEP(types()->IRType(output_type_id), output_ref, loop_var); builder->CreateStore(output_value->data(), slot_offset); } else if (output_type_id == arrow::Type::LIST) { auto output_list_internal_type = output->Type()->field(0)->type()->id(); - //std::cout << "LR creating list type to store the result with internal type " << output_list_internal_type << std::endl; if (arrow::is_binary_like(output_list_internal_type)) { auto output_list_value = std::dynamic_pointer_cast(output_value); @@ -475,22 +452,6 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, child_output_offset_ref, loop_var, output_list_value->data(), output_list_value->child_offsets(), output_list_value->offsets_length()}); } else if (output_list_internal_type == arrow::Type::INT32) { - - - std::string str1; - llvm::raw_string_ostream output1(str1); - output_value->data()->print(output1); - - std::string str2; - llvm::raw_string_ostream output2(str2); - output_value->length()->print(output2); - - - // std::cout << "LR gdv_fn_populate_list_int32_t_vector params are " << arg_context_ptr << "," << output_buffer_ptr_ref << "," - // << output_offset_ref << "," << loop_var << - // " output_value->data() " << output_value->data() << " output_value->validity() " << output_value->validity() << - // " output_value->length() " << output_value->length() << std::endl; - AddFunctionCall("gdv_fn_populate_list_int32_t_vector", types()->i32_type(), {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var, output_value->data(), output_value->length(), output_value->validity()}); @@ -515,13 +476,6 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, return Status::NotImplemented("output type ", output->Type()->ToString(), " not supported"); } - //LR HACK somehow this caused a crash???? - //std::cout << "LR saving result " << output->Name() << " value " << - // printType(output_value->data()) << std::endl; - - //ADD_TRACE("saving result 2 " + output->Name() + " value %T", output_value->data()); - //int jello = 0; - //std::cout << "LR CodeGenExprValue " << jello++ << std::endl; if (visitor.has_arena_allocs()) { // Reset allocations to avoid excessive memory usage. Once the result is copied to // the output vector (store instruction above), any memory allocations in this @@ -531,23 +485,19 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, AddFunctionCall("gdv_fn_context_arena_reset", types()->void_type(), reset_args); } - //std::cout << "LR CodeGenExprValue " << jello++ << std::endl; // check loop_var loop_var->addIncoming(types()->i64_constant(0), loop_entry); llvm::Value* loop_update = builder->CreateAdd(loop_var, types()->i64_constant(1), "loop_var+1"); loop_var->addIncoming(loop_update, loop_body_tail); - //std::cout << "LR CodeGenExprValue " << jello++ << std::endl; llvm::Value* loop_var_check = builder->CreateICmpSLT(loop_update, arg_nrecords, "loop_var < nrec"); builder->CreateCondBr(loop_var_check, loop_body, loop_exit); - //std::cout << "LR CodeGenExprValue " << jello++ << std::endl; // Loop exit builder->SetInsertPoint(loop_exit); builder->CreateRet(types()->i32_constant(0)); - //std::cout << "LR CodeGenExprValue " << jello++ << std::endl; return Status::OK(); } catch (std::exception& e) { std::cout << e.what() << std::endl; @@ -615,12 +565,6 @@ void LLVMGenerator::ComputeBitMapsForExpr(const CompiledExpr& compiled_expr, uint8_t* dst_bitmap = eval_batch->GetBuffer(out_idx); // Compute the destination bitmap. if (selection_vector == nullptr) { - // std::cout << "LR blarg" << std::endl; - //std::cout << "LR bitmap array buffer index is " << out_idx << " bitset is " << std::bitset<8>(*dst_bitmap) << std::endl; - //std::cout << "LR bitmap array buffer index is " << 0 << " bitset is " << std::bitset<8>(* eval_batch->GetBuffer(0)) << std::endl; - //std::cout << "LR bitmap thing getting the validity buffer " << compiled_expr.output()->validity_idx() << std::endl; - //std::cout << "LR Eval buffer has " << eval_batch->GetNumBuffers() << std::endl; - // << " bitset is " << std::bitset<8>(* eval_batch->GetBuffer(compiled_expr.output()->child_data_validity_idx() )) << std::endl; accumulator.ComputeResult(dst_bitmap); } else { /// The output bitmap is an intersection of some input/local bitmaps. However, with a @@ -633,19 +577,6 @@ void LLVMGenerator::ComputeBitMapsForExpr(const CompiledExpr& compiled_expr, LocalBitMapsHolder bit_map_holder(eval_batch->num_records(), 1); uint8_t* temp_bitmap = bit_map_holder.GetLocalBitMap(0); accumulator.ComputeResult(temp_bitmap); - - - //std::cout << "LR computing bitmap. Size is " << bit_map_holder.GetLocalBitMapSize() << std::endl; - // for (int i = 0; i < bit_map_holder.GetLocalBitMapSize(); i++) { - // uint8_t* arr = bit_map_holder.GetLocalBitMap(i); - // std::cout << "LR bitmap array [" << i << "] size is " << bit_map_holder.GetNumRecords() << " bitset is " << std::bitset<8>(*arr) << std::endl; - - //} - - - - - auto num_out_records = selection_vector->GetNumSlots(); // the memset isn't required, doing it just for valgrind. memset(dst_bitmap, 0, arrow::bit_util::BytesForBits(num_out_records)); @@ -683,8 +614,6 @@ llvm::Value* LLVMGenerator::AddFunctionCall(const std::string& full_name, llvm::raw_string_ostream output2(str2); ret_type->print(output); value->getType()->print(output2); - //std::cout << "LR addfunctioncall for " << full_name << " == value->getType " << str2 << " ret_type " << str << std::endl; - DCHECK(value->getType() == ret_type); } @@ -770,23 +699,14 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueListDex& dex) { auto types = generator_->types(); auto type = types->IRType(dex.FieldType()->id()); - //std::cout << "LR Visitor::Visit(const VectorReadFixedLenValueListDex& dex)" << std::endl; - //std::cout << "LR VectorReadFixedLenValueListDex dex.FieldType()->id() " << dex.FieldType()->id() << " types->DataVecType( " << printType(types->DataVecType(dex.FieldType())) << std::endl; - //std::cout << "LR VectorReadFixedLenValueListDex IRType is " << printType(type) << std::endl; arrow::Type::type at = arrow::Type::INT32; type = types->IRType(at); - //type = types->DataVecType(dex.FieldType()); - //std::cout << "LR VectorReadFixedLenValueListDex went with type " << printType(type) << std::endl; // compute list len from the offsets array. llvm::Value* offsets_slot_ref = GetBufferReference(dex.OffsetsIdx(), kBufferTypeOffsets, dex.Field()); llvm::Value* offsets_slot_index = builder->CreateAdd(loop_var_, GetSliceOffset(dex.OffsetsIdx())); - //std::cout << "LR VectorReadFixedLenValueListDex values " << printType(offsets_slot_ref) << " [next] " << - // printType(offsets_slot_index) << std::endl; - - // => offset_start = offsets[loop_var] slot = builder->CreateGEP(type, offsets_slot_ref, offsets_slot_index); llvm::Value* offset_start = builder->CreateLoad(type, slot, "offset_start"); @@ -808,15 +728,6 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueListDex& dex) { builder->CreateAdd(offset_start_int64, GetSliceOffset(dex.DataIdx())); llvm::Value* data_list = builder->CreateGEP(type, slot_ref, slot_index); -//LR-VAR - // auto valid_var = builder->CreateIntCast(list_len, types->i64_type(), true); - //builder->CreateStore(valid_var, validity_index_var_); - - - - - - auto list_len_var = builder->CreateIntCast(list_len, types->i64_type(), true); llvm::Value* vv_end = builder->CreateLoad(generator_->types()->i64_type(),validity_index_var_, "vv_end"); @@ -824,17 +735,6 @@ llvm::Value* updated_validity_index_var = builder->CreateAdd( vv_end, list_len_var, "validity_index_var+offset"); builder->CreateStore(updated_validity_index_var, validity_index_var_); - //builder->CreateStore(updated_validity_index_var, validity_index_var_); - - - - // TODO: handle bool type bitmap - //Validity bitmap. - //llvm::Value* b_slot_ref = GetBufferReference(dex.ValidityIdx(), kBufferTypeValidity, dex.Field()); - //llvm::Value* b_slot_index = - // builder->CreateAdd(loop_var_, GetSliceOffset(dex.ValidityIdx())); - //llvm::Value* validity = generator_->GetPackedValidityBitValue(b_slot_ref, b_slot_index); - llvm::Value* b_slot_index = builder->CreateAdd(loop_var_, GetSliceOffset(dex.ValidityIdx())); llvm::Value* b_slot_ref = GetBufferReference(dex.ChildValidityIdx(), kBufferTypeValidity, dex.Field()); @@ -845,13 +745,6 @@ llvm::Value* updated_validity_index_var = builder->CreateAdd( llvm::raw_string_ostream output3(str3); validity->print(output3); } - //std::cout << "LR VectorReadFixedLenValueListDex using validity " << str3 << std::endl; - - // TODO: handle decimal precision and scale - - //std::cout << "LR VectorReadFixedLenValueListDex slot_ref " << printType(slot_ref) << std::endl; - //std::cout << "LR VectorReadFixedLenValueListDex visit fixed-len data list vector " << dex.FieldName() << - // " length " << printType(list_len) << " data_list " << printType(data_list) << std::endl; ADD_VISITOR_TRACE("visit fixed-len data list vector " + dex.FieldName() + " length %T", list_len); ADD_VISITOR_TRACE("visit fixed-len data list vector " + dex.FieldName() + " updated_validity_index_var %T", @@ -914,10 +807,7 @@ void LLVMGenerator::Visitor::Visit(const VectorReadVarLenValueListDex& dex) { llvm::Value* slot; auto types = generator_->types(); auto type = types->IRType(dex.FieldType()->id()); - //std::cout << "LR dex.FieldType()->id() " << dex.FieldType()->id() << " types->DataVecType( " << printType(types->DataVecType(dex.FieldType())) << std::endl; - //std::cout << "LR IRType is " << printType(type) << std::endl; - //type = types->DataVecType(dex.FieldType()); - //LR HACK. Original was type = types->DataVecType(dex.FieldType()); + arrow::Type::type at = arrow::Type::INT32; type = types->IRType(at); @@ -930,29 +820,24 @@ void LLVMGenerator::Visitor::Visit(const VectorReadVarLenValueListDex& dex) { int i = 0; std::cout << "VectorReadVarLenValueListDex " << i++ << std::endl; // => offset_start = offsets[loop_var] - //std::cout << "LR Type is " << printType(type) << std::endl; slot = builder->CreateGEP(type, offsets_slot_ref, offsets_slot_index); std::cout << "VectorReadVarLenValueListDex " << i++ << std::endl; llvm::Value* offset_start = builder->CreateLoad(type, slot, "offset_start"); - //std::cout << "VectorReadVarLenValueListDex " << i++ << std::endl; // => offset_end = offsets[loop_var + 1] llvm::Value* offsets_slot_index_next = builder->CreateAdd( offsets_slot_index, generator_->types()->i64_constant(1), "loop_var+1"); slot = builder->CreateGEP(type, offsets_slot_ref, offsets_slot_index_next); llvm::Value* offset_end = builder->CreateLoad(type, slot, "offset_end"); - //std::cout << "VectorReadVarLenValueListDex " << i++ << std::endl; // => list_data_length = offset_end - offset_start llvm::Value* list_data_length = builder->CreateSub(offset_end, offset_start, "offsets_len"); - //std::cout << "VectorReadVarLenValueListDex " << i++ << std::endl; // get the child offsets array from the child offsets array, // start from offset 'offset_start' llvm::Value* child_offset_slot_ref = GetBufferReference(dex.ChildOffsetsIdx(), kBufferTypeChildOffsets, dex.Field()); - //std::cout << "VectorReadVarLenValueListDex " << i++ << std::endl; // do not forget slice offset llvm::Value* offset_start_int64 = builder->CreateIntCast(offset_start, generator_->types()->i64_type(), true); @@ -963,13 +848,11 @@ void LLVMGenerator::Visitor::Visit(const VectorReadVarLenValueListDex& dex) { llvm::Value* child_offset_start = builder->CreateLoad(type, child_offsets, "child_offset_start"); - //std::cout << "VectorReadVarLenValueListDex " << i++ << std::endl; // get the data array llvm::Value* data_slot_ref = GetBufferReference(dex.DataIdx(), kBufferTypeData, dex.Field()); llvm::Value* data_value = builder->CreateGEP(type, data_slot_ref, child_offset_start); - //std::cout << "VectorReadVarLenValueListDex " << i++ << std::endl; result_.reset(new ListLValue(data_value, child_offsets, list_data_length)); } @@ -1013,7 +896,6 @@ void LLVMGenerator::Visitor::Visit(const LiteralDex& dex) { llvm::Value* value = nullptr; llvm::Value* len = nullptr; - //std::cout << "LR LiteralDex type " << dex.type()->id() << std::endl; switch (dex.type()->id()) { case arrow::Type::BOOL: value = types->i1_constant(std::get(dex.holder())); @@ -1054,7 +936,6 @@ void LLVMGenerator::Visitor::Visit(const LiteralDex& dex) { case arrow::Type::STRING: case arrow::Type::BINARY: { const std::string& str = std::get(dex.holder()); - //std::cout << "LR Literal string " << str << std::endl; value = ir_builder()->CreateGlobalStringPtr(str.c_str()); len = types->i32_constant(static_cast(str.length())); break; @@ -1108,8 +989,6 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) { native_function->NeedsContext()); auto arrow_return_type = dex.func_descriptor()->return_type(); - //std::cout << "LR NonNullableFunc 1 result_type " << printType(generator_->types()->DataVecType(arrow_return_type)) << " arrow_return_type " << arrow_return_type->ToString() << " old type " << printType(generator_->types()->IRType(arrow_return_type->id())) << std::endl; - if (native_function->CanReturnErrors()) { // slow path : if a function can return errors, skip invoking the function // unless all of the input args are valid. Otherwise, it can cause spurious errors. @@ -1118,10 +997,6 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) { LLVMTypes* types = generator_->types(); auto arrow_type_id = arrow_return_type->id(); auto result_type = types->DataVecType(arrow_return_type); - //Result type array/list is special. - //auto result_type = types->IRType(arrow_type_id); - //std::cout << "LR NonNullableFunc 2 result_type " << printType(result_type) << " arrow_return_type " << arrow_return_type->ToString() << " old type " << types->IRType(arrow_type_id) << std::endl; - // Build combined validity of the args. llvm::Value* is_valid = types->true_constant(); for (auto& pair : dex.args()) { @@ -1193,52 +1068,18 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { } if (passLoopVars) { - //Pointer to validity bitmap and bit starting index for accessing validity bits in the called function. - //llvm::Value* b_slot_ref = GetBufferReference(dex.ChildValidityIdx(), kBufferTypeValidity, dex.Field()); - //llvm::Value* validity = b_slot_ref; - - //Compute the bit offset. - //int64_t validIndex = 0; - //for (int i = 0; i < loop_var_; i++) { - // validIndex += *(arg_offsets_ + i); - //} - - /*std::string str3 = "validity:"; - if (validity) { - llvm::raw_string_ostream output3(str3); - validity->print(output3); - }*/ std::string str32 = "loopvar:"; if (loop_var_) { llvm::raw_string_ostream output3(str32); loop_var_->print(output3); } - //std::cout << "LR VectorReadFixedLenValueListDex loopvar=" << str32 << " result()->length()=" << result()->length() << std::endl; - //TODO params.push_back(validity); - params.push_back(loop_var_); -//LR-VAR - //llvm::Value* updated_validity_index_var = builder->CreateAdd( - // validity_index_var_, result()->length(), "validity_index_var+offset"); - // check loop_var - //loop_var->addIncoming(types()->i64_constant(0), loop_entry); - - //builder->CreateStore(updated_validity_index_var, validity_index_var_); - auto valid_var = builder->CreateLoad(types->i64_type(), validity_index_var_, "loaded_var"); + params.push_back(loop_var_); + auto valid_var = builder->CreateLoad(types->i64_type(), validity_index_var_, "loaded_var"); params.push_back(valid_var); } - - - - - - - - - - // add an extra arg for validity (allocated on stack). llvm::AllocaInst* result_valid_ptr = new llvm::AllocaInst(types->i8_type(), 0, "result_valid", entry_block_); @@ -1250,96 +1091,10 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { // load the result validity and truncate to i1. auto result_valid_i8 = builder->CreateLoad(types->i8_type(), result_valid_ptr); llvm::Value* result_valid = builder->CreateTrunc(result_valid_i8, types->i1_type()); - - //std::bitset<8> bs(dex.local_bitmap_idx()); - //std::cout <<"LR NullableInternal validity from dex.local_bitmap_idx()=" << bs << std::endl; - - - // auto result_valid_i8ptr = builder->CreateLoad(types->i8_ptr_type(), result_valid_ptr); - - // std::cout << "LR NullableInternal function param validity=" << result_valid_i8ptr << std::endl; // set validity bit in the local bitmap. ClearLocalBitMapIfNotValid(dex.local_bitmap_idx(), result_valid); } -/* -void LLVMGenerator::Visitor::Visit(const NullableInternalListFuncDex& dex) { - ADD_VISITOR_TRACE("visit NullableInternalListFuncDex base function " + - dex.func_descriptor()->name()); - llvm::IRBuilder<>* builder = ir_builder(); - LLVMTypes* types = generator_->types(); - - const NativeFunction* native_function = dex.native_function(); - - // build function params along with validity. - auto params = BuildParams(dex.get_holder_idx(), dex.args(), true, - native_function->NeedsContext()); - - auto arrow_return_type = dex.func_descriptor()->return_type(); - - - - auto arrow_type_id = arrow_return_type->arrow_return_type->id(); - - if (arrow_return_type_id == arrow::Type::LIST) - { - //Pointer to validity bitmap and bit starting index for accessing validity bits in the called function. - llvm::Value* b_slot_index = - builder->CreateAdd(loop_var_, GetSliceOffset(dex.ValidityIdx())); - llvm::Value* b_slot_ref = GetBufferReference(dex.ChildValidityIdx(), kBufferTypeValidity, dex.Field()); - //llvm::Value* validity = builder->CreateGEP(type, b_slot_ref, 0); - llvm::Value* validity = b_slot_ref; - - //Compute the bit offset. - //int64_t validIndex = 0; - //for (int i = 0; i < loop_var_; i++) { - // validIndex += *(arg_offsets_ + i); - //} - - std::string str3 = "validity:"; - if (validity) { - llvm::raw_string_ostream output3(str3); - validity->print(output3); - } - std::string str32 = "loopvar:"; - if (loop_var_) { - llvm::raw_string_ostream output3(str32); - loop_var_->print(output3); - } - std::cout << "LR VectorReadFixedLenValueListDex loopvar=" + str32 + " using validity " << str3 << std::endl; - params.push_back(validity); - params.push_back(loop_var_); - params.push_back(arg_offsets_); - } - - - - - - - - // add an extra arg for validity (allocated on stack). - llvm::AllocaInst* result_valid_ptr = - new llvm::AllocaInst(types->i8_type(), 0, "result_valid", entry_block_); - params.push_back(result_valid_ptr); - - result_ = BuildFunctionCall(native_function, arrow_return_type, ¶ms); - - // load the result validity and truncate to i1. - auto result_valid_i8 = builder->CreateLoad(types->i8_type(), result_valid_ptr); - llvm::Value* result_valid = builder->CreateTrunc(result_valid_i8, types->i1_type()); - - std::bitset<8> bs(dex.local_bitmap_idx()); - std::cout <<"LR NullableInternalListFuncDex validity from dex.local_bitmap_idx()=" << bs << std::endl; - - - auto result_valid_i8ptr = builder->CreateLoad(types->i8_ptr_type(), result_valid_ptr); - - std::cout << "LR NullableInternalListFuncDex function param validity=" << result_valid_i8ptr << std::endl; - // set validity bit in the local bitmap. - ClearLocalBitMapIfNotValid(dex.local_bitmap_idx(), result_valid); -}*/ - void LLVMGenerator::Visitor::Visit(const IfDex& dex) { ADD_VISITOR_TRACE("visit IfExpression"); llvm::IRBuilder<>* builder = ir_builder(); @@ -1705,7 +1460,6 @@ LValuePtr LLVMGenerator::Visitor::BuildIfElse(llvm::Value* condition, LValuePtr LLVMGenerator::Visitor::BuildValueAndValidity(const ValueValidityPair& pair) { // generate code for value - // std::cout << "LR LLVMGenerator::Visitor::BuildValueAndValidity" << std::endl; auto value_expr = pair.value_expr(); value_expr->Accept(*this); auto value = result()->data(); @@ -1725,10 +1479,6 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, auto llvm_return_type = types->DataVecType(arrow_return_type); DecimalIR decimalIR(generator_->engine_.get()); - //std::cout << "LR LLVMGenerator::Visitor::BuildFunctionCall for " << func->pc_name() << " llvm return type is " << printType(llvm_return_type) << std::endl; - //for (unsigned int i = 0; i < params->size(); i++) { - // std::cout << "LR param " << i << printType(params->at(i)) << std::endl; - //} if (arrow_return_type_id == arrow::Type::DECIMAL) { // For decimal fns, the output precision/scale are passed along as parameters. // @@ -1757,7 +1507,6 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, llvm::AllocaInst* result_len_ptr = nullptr; llvm::AllocaInst* valid_ptr = nullptr; if (arrow::is_binary_like(arrow_return_type_id)) { - //std::cout << "LR LLVMGenerator::Visitor::BuildFunctionCall is binary like" << std::endl; result_len_ptr = new llvm::AllocaInst(generator_->types()->i32_type(), 0, "result_len", entry_block_); params->push_back(result_len_ptr); @@ -1765,26 +1514,16 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, } if (arrow_return_type_id == arrow::Type::LIST) { - //std::cout << "LR LLVMGenerator::Visitor::BuildFunctionCall is list" << std::endl; + result_len_ptr = new llvm::AllocaInst(generator_->types()->i32_type(), 0, "result_len", entry_block_); params->push_back(result_len_ptr); has_arena_allocs_ = true; - valid_ptr = new llvm::AllocaInst(generator_->types()->i32_ptr_type(), 0, "valid_ptr", entry_block_); - // std::cout << "LR allocinst for valid_ptr=" << printType(valid_ptr) << std::endl; params->push_back(valid_ptr); } - //std::cout << "LR LLVMGenerator::Visitor::BuildFunctionCall params are: " << std::endl; - /*for (auto p : *params) { - std::string str1; - llvm::raw_string_ostream output1(str1); - p->print(output1); - std::cout << str1 << std::endl; - }*/ - // Make the function call llvm::IRBuilder<>* builder = ir_builder(); auto value = @@ -1799,8 +1538,6 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, (valid_ptr == nullptr) ? nullptr : builder->CreateLoad(generator_->types()->i32_ptr_type(), valid_ptr); - // std::cout << "LR LLVMGenerator::Visitor::BuildFunctionCall is DONE. using validity=" << validity << " ptr=" << valid_ptr << std::endl; - // std::cout << "LR LLVMGenerator::Visitor::BuildFunctionCall is DONE. using value_len=" << value_len << " ptr=" << result_len_ptr << std::endl; return std::make_shared(value, value_len, validity); } } @@ -1816,7 +1553,6 @@ std::vector LLVMGenerator::Visitor::BuildParams( params.push_back(arg_context_ptr_); } - //std::cout << "LR BuildParams1" << std::endl; // if the function has holder, add the holder pointer. if (holder_idx != -1) { auto builder = ir_builder(); @@ -1825,7 +1561,6 @@ std::vector LLVMGenerator::Visitor::BuildParams( llvm::BasicBlock* saved_block = builder->GetInsertBlock(); builder->SetInsertPoint(entry_block_); - // std::cout << "LR BuildParams1a" << std::endl; auto holder = generator_->LoadVectorAtIndex( arg_holder_ptrs_, generator_->types()->i64_type(), holder_idx, "holder"); @@ -1833,25 +1568,21 @@ std::vector LLVMGenerator::Visitor::BuildParams( params.push_back(holder); } - // std::cout << "LR BuildParams2" << std::endl; // build the function params, along with the validities. for (auto& pair : args) { // build value. DexPtr value_expr = pair->value_expr(); - // std::cout << "LR BuildParams2a" << std::endl; value_expr->Accept(*this); - // std::cout << "LR BuildParams2b" << std::endl; LValue& result_ref = *result(); // append all the parameters corresponding to this LValue. result_ref.AppendFunctionParams(¶ms); - // std::cout << "LR BuildParams2c" << std::endl; + // build validity. if (with_validity) { llvm::Value* validity_expr = BuildCombinedValidity(pair->validity_exprs()); params.push_back(validity_expr); - // std::cout << "LR BuildParams2d adding combined validity" << std::endl; } } diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h index fc875c14d380a..5e43eb74abcdf 100644 --- a/cpp/src/gandiva/llvm_types.h +++ b/cpp/src/gandiva/llvm_types.h @@ -126,7 +126,7 @@ class GANDIVA_EXPORT LLVMTypes { // offsets buffer is to separate data into list // not support nested list if (data_type->id() == arrow::Type::LIST) { - //LR HACK + //LR TODO //std::cout << "LR Returning list type as type " << data_type->field(0)->type()->id()<< " for IR " << std::endl; //return IRType(data_type->field(0)->type()->id()); //return IRType(data_type->id()); diff --git a/cpp/src/gandiva/lvalue.h b/cpp/src/gandiva/lvalue.h index 3b2bbd3b0ec96..2f33a97788c6c 100644 --- a/cpp/src/gandiva/lvalue.h +++ b/cpp/src/gandiva/lvalue.h @@ -33,7 +33,6 @@ class GANDIVA_EXPORT LValue { explicit LValue(llvm::Value* data, llvm::Value* length = NULLPTR, llvm::Value* validity = NULLPTR) : data_(data), length_(length), validity_(validity) { - //std::cout << "LR created LValue " << to_string() << std::endl; } virtual ~LValue() = default; @@ -45,7 +44,6 @@ class GANDIVA_EXPORT LValue { // Append the params required when passing this as a function parameter. virtual void AppendFunctionParams(std::vector* params) { - // std::cout << "LR LValue::AppendFunctionParams" << std::endl; params->push_back(data_); if (length_ != NULLPTR) { params->push_back(length_); @@ -112,7 +110,6 @@ class GANDIVA_EXPORT ListLValue : public LValue { : LValue(data, NULLPTR, validity), child_offsets_(child_offsets), offsets_length_(offsets_length) { - //std::cout << "LR Creating ListLValue " << std::endl; } llvm::Value* child_offsets() { return child_offsets_; } @@ -120,7 +117,6 @@ class GANDIVA_EXPORT ListLValue : public LValue { llvm::Value* offsets_length() { return offsets_length_; } void AppendFunctionParams(std::vector* params) override { - // std::cout << "LR ListLValue::AppendFunctionParams" << std::endl; LValue::AppendFunctionParams(params); params->push_back(child_offsets_); params->push_back(offsets_length_); diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc index b0d5331a3ee48..a3cccca11191d 100644 --- a/cpp/src/gandiva/projector.cc +++ b/cpp/src/gandiva/projector.cc @@ -169,7 +169,6 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs, ARROW_RETURN_IF(configuration == nullptr, Status::Invalid("Configuration cannot be null")); - //std::cout << "LR Projector::Make 1" << std::endl; // see if equivalent projector was already built std::shared_ptr>> cache = LLVMGenerator::GetCache(); @@ -192,7 +191,6 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs, std::unique_ptr llvm_gen; ARROW_RETURN_NOT_OK(LLVMGenerator::Make(configuration, is_cached, &llvm_gen)); - //std::cout << "LR Projector::Make 2" << std::endl; if (!is_cached && sec_cache != nullptr) { std::shared_ptr arrow_buffer = sec_cache->Get(GetSecondaryCacheKey(cache_key.ToString())); @@ -210,7 +208,6 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs, // Run the validation on the expressions. // Return if any of the expression is invalid since // we will not be able to process further. - //std::cout << "LR Projector::Make 3" << std::endl; if (!is_cached) { ExprValidator expr_validator(llvm_gen->types(), schema); for (auto& expr : exprs) { @@ -230,13 +227,11 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs, output_fields.push_back(expr->result()); } - //std::cout << "LR Projector::Make 4" << std::endl; // Instantiate the projector with the completely built llvm generator *projector = std::shared_ptr( new Projector(std::move(llvm_gen), schema, output_fields, configuration)); projector->get()->SetBuiltFromCache(is_cached); - //std::cout << "LR Projector::Make 5" << std::endl; if (sec_cache != nullptr && is_cached == false) { std::shared_ptr sec_cached_obj = cache->GetObjectCode(cache_key); llvm::StringRef string_buffer = sec_cached_obj->getBuffer(); @@ -245,7 +240,6 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs, sec_cache->Set(GetSecondaryCacheKey(cache_key.ToString()), arrow_buffer); } - //std::cout << "LR Projector::Make DONE" << std::endl; return Status::OK(); } @@ -259,7 +253,6 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, const ArrayDataVector& output_data_vecs) const { ARROW_RETURN_NOT_OK(ValidateEvaluateArgsCommon(batch)); - //std::cout << "LR the other Projector::Evaluate" << std::endl; if (output_data_vecs.size() != output_fields_.size()) { std::stringstream ss; ss << "number of buffers for output_data_vecs is " << output_data_vecs.size() @@ -267,10 +260,8 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, return Status::Invalid(ss.str()); } -//std::cout << "LR the other Projector::Evaluate 1a" << std::endl; int idx = 0; for (auto& array_data : output_data_vecs) { - //std::cout << "LR the other Projector::Evaluate checking array_data" << std::endl; if (array_data == nullptr) { std::stringstream ss; ss << "array for output field " << output_fields_[idx]->name() << "is null."; @@ -280,58 +271,13 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, auto num_rows = selection_vector == nullptr ? batch.num_rows() : selection_vector->GetNumSlots(); - //std::cout << "LR the other Projector::Evaluate about to validate capacity" << std::endl; ARROW_RETURN_NOT_OK( ValidateArrayDataCapacity(*array_data, *(output_fields_[idx]), num_rows)); ++idx; } - //std::cout << "LR the other Projector::Evaluate 2" << std::endl; ARROW_RETURN_NOT_OK( llvm_generator_->Execute(batch, selection_vector, output_data_vecs)); - // Create and return array arrays. - - /* for (auto& array_data : output_data_vecs) { - - if (array_data->type->id() == arrow::Type::LIST) { - auto child_data = array_data->child_data[0]; - //std::cout << "LR the other Projector::Evaluate modifying child array " << - //child_data->buffers[1]->ToString() << std::endl; - //std::cout << "LR the other Projector::Evaluate child array[3] " << - //int32_t( (*child_data->buffers[1])[3*4]) << std::endl; - //std::cout << "LR the other Projector::Evaluate modifying child0 array " << - //child_data->buffers[0]->ToString() << std::endl; - - int64_t child_data_size = 1; - if (arrow::is_binary_like(child_data->type->id())) { - - child_data_size = child_data->buffers[1]->size() / 4 - 1; - } else if (child_data->type->id() == arrow::Type::INT32) { - child_data_size = child_data->buffers[1]->size() / 4; - } else if (child_data->type->id() == arrow::Type::INT64) { - child_data_size = child_data->buffers[1]->size() / 8; - } else if (child_data->type->id() == arrow::Type::FLOAT) { - child_data_size = child_data->buffers[1]->size() / 4; - } else if (child_data->type->id() == arrow::Type::DOUBLE) { - child_data_size = child_data->buffers[1]->size() / 8; - } - auto new_child_data = arrow::ArrayData::Make( - child_data->type, child_data_size, child_data->buffers, child_data->offset); - array_data->child_data.clear(); - array_data->child_data.push_back(new_child_data); - - //std::cout << "LR the other Projector::Evaluate child data size " << child_data_size << std::endl; - //std::cout << "LR the other Projector::Evaluate after modifying child array[3] " << - //int32_t( (*(array_data->child_data[0])->buffers[1])[3*4]) << std::endl; - - //array_data = arrow::ArrayData::Make(array_data->type, array_data->length, - // array_data->buffers, {new_child_data}, - // array_data->null_count, array_data->offset); - } - - }*/ - - return Status::OK(); } @@ -343,14 +289,12 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, arrow::MemoryPool* p Status Projector::Evaluate(const arrow::RecordBatch& batch, const SelectionVector* selection_vector, arrow::MemoryPool* pool, arrow::ArrayVector* output) const { - //std::cout << "LR Projector::Evaluate" << std::endl; ARROW_RETURN_NOT_OK(ValidateEvaluateArgsCommon(batch)); ARROW_RETURN_IF(output == nullptr, Status::Invalid("Output must be non-null.")); ARROW_RETURN_IF(pool == nullptr, Status::Invalid("Memory pool must be non-null.")); auto num_rows = selection_vector == nullptr ? batch.num_rows() : selection_vector->GetNumSlots(); - //std::cout << "LR Projector::Evaluate num_rows" << num_rows << std::endl; // Allocate the output data vecs. ArrayDataVector output_data_vecs; for (auto& field : output_fields_) { @@ -394,7 +338,6 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, array_data = arrow::ArrayData::Make(array_data->type, array_data->length, array_data->buffers, {new_child_data}, array_data->null_count, array_data->offset); - // std::cout << "LR Making array data length " << array_data->length << std::endl; } output->push_back(arrow::MakeArray(array_data)); @@ -402,14 +345,12 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, return Status::OK(); } -// TODO : handle complex vectors (list/map/..) Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records, arrow::MemoryPool* pool, ArrayDataPtr* array_data) const { arrow::Status astatus; std::vector> buffers; - //std::cout << "LR Projector::AllocArrayData Enter" << std::endl; // The output vector always has a null bitmap. int64_t size = arrow::bit_util::BytesForBits(num_records); ARROW_ASSIGN_OR_RAISE(auto bitmap_buffer, arrow::AllocateBuffer(size, pool)); @@ -463,22 +404,16 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records, } buffers.push_back(std::move(data_buffer)); - - //LR TODO not sure this is needed. ARROW_ASSIGN_OR_RAISE(auto data_valid_buffer, arrow::AllocateResizableBuffer(data_len, pool)); - //std::cout << "LR Projector::AllocArrayData 1" << std::endl; if (type->id() == arrow::Type::LIST) { - // std::cout << "LR Projector::AllocArrayData List. There are number of buffers=" << buffers.size() << std::endl; auto internal_type = type->field(0)->type(); ArrayDataPtr child_data; if (arrow::is_primitive(internal_type->id())) { - //std::cout << "LR Projector::AllocArrayData List 1" << std::endl; child_data = arrow::ArrayData::Make(internal_type, 0 /*initialize length*/, {std::move(data_valid_buffer), std::move(buffers[2])}, 0); } if (arrow::is_binary_like(internal_type->id())) { - //std::cout << "LR Projector::AllocArrayData List 2" << std::endl; child_data = arrow::ArrayData::Make( internal_type, 0 /*initialize length*/, {nullptr, std::move(buffers[2]), std::move(buffers[3])}, 0); @@ -490,7 +425,6 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records, *array_data = arrow::ArrayData::Make(type, num_records, std::move(buffers)); } - // std::cout << "LR Projector::AllocArrayData Done" << std::endl; return Status::OK(); } @@ -509,20 +443,15 @@ Status Projector::ValidateArrayDataCapacity(const arrow::ArrayData& array_data, ARROW_RETURN_IF(array_data.buffers.size() < 2, Status::Invalid("ArrayData must have at least 2 buffers")); -//std::cout << "LR ValidateArrayDataCapacity" << std::endl; int64_t min_bitmap_len = arrow::bit_util::BytesForBits(num_records); - //std::cout << "LR ValidateArrayDataCapacity arra_data 0 is " << array_data.buffers[0] << std::endl; int64_t bitmap_len = array_data.buffers[0]->capacity(); - //std::cout << "LR ValidateArrayDataCapacity" << std::endl; ARROW_RETURN_IF( bitmap_len < min_bitmap_len, Status::Invalid("Bitmap buffer too small for ", field.name(), " expected minimum ", min_bitmap_len, " actual size ", bitmap_len)); auto type_id = field.type()->id(); - //std::cout << "LR ValidateArrayDataCapacity" << std::endl; - //LR TODO - if (arrow::is_binary_like(type_id)) { //|| type_id == arrow::Type::LIST) { + if (arrow::is_binary_like(type_id)) { // validate size of offsets buffer. int64_t min_offsets_len = arrow::bit_util::BytesForBits((num_records + 1) * 32); int64_t offsets_len = array_data.buffers[1]->capacity(); diff --git a/cpp/src/gandiva/tree_expr_builder.cc b/cpp/src/gandiva/tree_expr_builder.cc index c43285843a1ee..1946aadfef16f 100644 --- a/cpp/src/gandiva/tree_expr_builder.cc +++ b/cpp/src/gandiva/tree_expr_builder.cc @@ -144,10 +144,7 @@ NodePtr TreeExprBuilder::MakeOr(const NodeVector& children) { static bool print_expr = false; ExpressionPtr TreeExprBuilder::MakeExpression(NodePtr root_node, FieldPtr result_field) { - //std::cout << "LR Expression: " << root_node->ToString() << "\n"; - if (result_field == nullptr) { - //std::cout << "LR MakeExpression result_field is null" << std::endl; return nullptr; } return ExpressionPtr(new Expression(root_node, result_field)); @@ -164,9 +161,7 @@ ExpressionPtr TreeExprBuilder::MakeExpression(const std::string& function, auto node = MakeField(field); field_nodes.push_back(node); } - //std::cout << "LR MakeExpression making function for " << function << std::endl; auto func_node = MakeFunction(function, field_nodes, out_field->type()); - //std::cout << "LR MakeExpression function is " << func_node->ToString() << std::endl; return MakeExpression(func_node, out_field); } diff --git a/java/gandiva/src/main/cpp/expression_registry_helper.cc b/java/gandiva/src/main/cpp/expression_registry_helper.cc index 0efb2e412e873..c74a1b7271788 100644 --- a/java/gandiva/src/main/cpp/expression_registry_helper.cc +++ b/java/gandiva/src/main/cpp/expression_registry_helper.cc @@ -136,6 +136,7 @@ void ArrowToProtobuf(DataTypePtr type, types::ExtGandivaType* gandiva_data_type) gandiva_data_type->set_type(types::GandivaType::INTERVAL); gandiva_data_type->set_intervaltype(types::IntervalType::DAY_TIME); break; + //LR TODO case arrow::Type::STRUCT: gandiva_data_type->set_type(types::GandivaType::STRUCT); break; @@ -146,6 +147,7 @@ void ArrowToProtobuf(DataTypePtr type, types::ExtGandivaType* gandiva_data_type) // un-supported types. test ensures that // when one of these are added build breaks. //DCHECK(false); + //LR TODO printf("LR Found unsupported type %d\n", type->id()); fflush(stdout); } @@ -176,16 +178,10 @@ Java_org_apache_arrow_gandiva_evaluator_ExpressionRegistryJniHelper_getGandivaSu JNIEXPORT jbyteArray JNICALL Java_org_apache_arrow_gandiva_evaluator_ExpressionRegistryJniHelper_getGandivaSupportedFunctions( // NOLINT JNIEnv* env, jobject types_helper) { - printf("LR Entering JNI call getGandivaSupportedFunctions\n"); - fflush(stdout); - ExpressionRegistry expr_registry; types::GandivaFunctions gandiva_functions; for (auto function = expr_registry.function_signature_begin(); function != expr_registry.function_signature_end(); function++) { - printf("LR getGandivaSupportedFunctions Functions: %s\n", (*function).base_name().c_str()); - printf("LR getGandivaSupportedFunctions Functions: %s\n", (*function).ToString().c_str()); - fflush(stdout); types::FunctionSignature* function_signature = gandiva_functions.add_function(); function_signature->set_name((*function).base_name()); diff --git a/java/gandiva/src/main/cpp/jni_common.cc b/java/gandiva/src/main/cpp/jni_common.cc index 5f5f3fb02d920..41b2593d501cd 100644 --- a/java/gandiva/src/main/cpp/jni_common.cc +++ b/java/gandiva/src/main/cpp/jni_common.cc @@ -90,11 +90,8 @@ static jmethodID listvector_expander_method_; static jfieldID vector_expander_ret_address_; static jfieldID vector_expander_ret_capacity_; static jfieldID list_expander_ret_address_; -static jfieldID list_expander_valid_address_; static jfieldID list_expander_outer_valid_address_; static jfieldID list_expander_ret_capacity_; -static jfieldID list_expander_offset_ret_address_; -static jfieldID list_expander_offset_ret_capacity_; static jclass secondary_cache_class_; static jmethodID cache_get_method_; @@ -162,14 +159,8 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) { env->GetFieldID(list_expander_ret_class_, "address", "J"); list_expander_ret_capacity_ = env->GetFieldID(list_expander_ret_class_, "capacity", "J"); - list_expander_offset_ret_address_ = - env->GetFieldID(list_expander_ret_class_, "offsetaddress", "J"); - list_expander_offset_ret_capacity_ = - env->GetFieldID(list_expander_ret_class_, "offsetcapacity", "J"); - list_expander_valid_address_ = - env->GetFieldID(list_expander_ret_class_, "validityaddress", "J"); list_expander_outer_valid_address_ = - env->GetFieldID(list_expander_ret_class_, "outervalidityaddress", "J"); + env->GetFieldID(list_expander_ret_class_, "validityaddress", "J"); jclass local_cache_class = env->FindClass("org/apache/arrow/gandiva/evaluator/JavaSecondaryCacheInterface"); @@ -337,7 +328,6 @@ FieldPtr ProtoTypeToField(const types::Field& f) { NodePtr ProtoTypeToFieldNode(const types::FieldNode& node) { FieldPtr field_ptr = ProtoTypeToField(node.field()); - //std::cout << "LR created field " << field_ptr->ToString(true) << std::endl; if (field_ptr == nullptr) { std::cerr << "Unable to create field node from protobuf\n"; return nullptr; @@ -509,7 +499,6 @@ NodePtr ProtoTypeToNullNode(const types::NullNode& node) { NodePtr ProtoTypeToNode(const types::TreeNode& node) { if (node.has_fieldnode()) { - //std::cout << "LR Found ProtoTypeToNode fieldnode " << std::endl; return ProtoTypeToFieldNode(node.fieldnode()); } @@ -558,7 +547,6 @@ NodePtr ProtoTypeToNode(const types::TreeNode& node) { } if (node.has_stringnode()) { - //std::cout << "LR Found StringNode" << std::endl; return TreeExprBuilder::MakeStringLiteral(node.stringnode().value()); } @@ -646,8 +634,6 @@ Status make_record_batch_with_buf_addrs(SchemaPtr schema, int num_rows, auto validity = std::shared_ptr( new arrow::Buffer(reinterpret_cast(validity_addr), validity_size)); buffers.push_back(validity); - //std::cout << "LR make_record_batch_with_buf_addrs adding validity_addr buffer=" << validity_addr << " idx=" << buf_idx - 1 << std::endl; - if (buf_idx >= in_bufs_len) { return Status::Invalid("insufficient number of in_buf_addrs"); } @@ -656,7 +642,6 @@ Status make_record_batch_with_buf_addrs(SchemaPtr schema, int num_rows, auto data = std::shared_ptr( new arrow::Buffer(reinterpret_cast(value_addr), value_size)); buffers.push_back(data); - // std::cout << "LR make_record_batch_with_buf_addrs adding value_addr buffer=" << value_addr << " idx=" << buf_idx - 1 << std::endl; if (arrow::is_binary_like(field->type()->id())) { if (buf_idx >= in_bufs_len) { @@ -669,9 +654,8 @@ Status make_record_batch_with_buf_addrs(SchemaPtr schema, int num_rows, auto offsets = std::shared_ptr( new arrow::Buffer(reinterpret_cast(offsets_addr), offsets_size)); buffers.push_back(offsets); - // std::cout << "LR make_record_batch_with_buf_addrs adding offsets_addr buffer=" << offsets_addr << " idx=" << buf_idx - 1 << std::endl; } -////////// + @@ -939,25 +923,15 @@ Status JavaResizableBuffer::Reserve(const int64_t new_capacity) { if (isList) { jlong ret_address = env_->GetLongField(ret, list_expander_ret_address_); jlong ret_capacity = env_->GetLongField(ret, list_expander_ret_capacity_); - jlong offset_ret_address = env_->GetLongField(ret, list_expander_offset_ret_address_); - jlong offset_ret_capacity = env_->GetLongField(ret, list_expander_offset_ret_capacity_); - jlong valid_address = env_->GetLongField(ret, list_expander_valid_address_); jlong outer_valid_address = env_->GetLongField(ret, list_expander_outer_valid_address_); std::cout << "Buffer expand: New capacity is " << new_capacity << " vector id " << vector_idx_ << " expander method " << method_ << " jexpander_ " << jexpander_ << " returned size is " << ret_capacity << - " and the original buffer ptr=" << reinterpret_cast(data_) << " and the new ptr=" << ret_address << - " and the original offset ptr=" << reinterpret_cast(offsetBuffer) << " and the new ptr=" << offset_ret_address << std::endl; + " and the original buffer ptr=" << reinterpret_cast(data_) << " and the new ptr=" << ret_address << std::endl; data_ = reinterpret_cast(ret_address); capacity_ = ret_capacity; - - offsetBuffer = reinterpret_cast(offset_ret_address); - offsetCapacity = offset_ret_capacity; - std::cout << "LR Setting buffer validityBuffer to " << validityBuffer << std::endl; - validityBuffer = reinterpret_cast(valid_address); - outerValidityBuffer = reinterpret_cast(outer_valid_address); } else { jlong ret_address = env_->GetLongField(ret, vector_expander_ret_address_); jlong ret_capacity = env_->GetLongField(ret, vector_expander_ret_capacity_); @@ -1003,7 +977,6 @@ Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector( jlongArray buf_addrs, jlongArray buf_sizes, jint sel_vec_type, jint sel_vec_rows, jlong sel_vec_addr, jlong sel_vec_size, jlongArray out_buf_addrs, jlongArray out_buf_sizes) { - //std::cout << "LR Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector " << std::endl; Status status; std::shared_ptr holder = projector_modules_.Lookup(module_id); if (holder == nullptr) { @@ -1039,21 +1012,6 @@ Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector( if (!status.ok()) { break; } - /*std::cout << "LR Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector " - << " Made a recordbatch num_rows " << num_rows - << in_batch->ToString() - << " there are " << out_bufs_len << " buffers " - << std::endl;*/ - //std::cout << "LR Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector " - //<< " there are " << out_bufs_len << " buffers " - //<< std::endl; - //for (int i = 0; i < out_bufs_len; i++) { - // std::cout << "LR Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector " - // << " buffer " << i - // << "length " << out_sizes[i] - // << std::endl; - // } - std::shared_ptr selection_vector; auto selection_buffer = std::make_shared( reinterpret_cast(sel_vec_addr), sel_vec_size); @@ -1089,14 +1047,12 @@ Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector( for (FieldPtr field : ret_types) { std::vector> buffers; - // std::cout << "LR Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector -2 adding buffer idx=" << buf_idx << std::endl; CHECK_OUT_BUFFER_IDX_AND_BREAK(buf_idx, out_bufs_len); uint8_t* validity_buf = reinterpret_cast(out_bufs[buf_idx++]); jlong bitmap_sz = out_sizes[sz_idx++]; buffers.push_back(std::make_shared(validity_buf, bitmap_sz)); if (arrow::is_binary_like(field->type()->id())) { - // std::cout << "LR Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector -1 adding bufferbuffer idx=" << buf_idx << std::endl; CHECK_OUT_BUFFER_IDX_AND_BREAK(buf_idx, out_bufs_len); uint8_t* offsets_buf = reinterpret_cast(out_bufs[buf_idx++]); jlong offsets_sz = out_sizes[sz_idx++]; @@ -1115,13 +1071,9 @@ Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector( break; } - // std::cout << "LR Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector 1 adding buffer buffer idx=" << buf_idx - 1 << " size=" << data_sz << std::endl; - buffers.push_back(std::make_shared( + buffers.push_back(std::make_shared( env, jexpander, vector_expander_method_, output_vector_idx, value_buf, data_sz)); } else if (field->type()->id() == arrow::Type::LIST) { - // std::cout << "LR Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector 2 adding list offset buffer idx=" << buf_idx - 1 << " size=" << data_sz << std::endl; - // std::cout << " size=" << out_sizes[sz_idx - 1] << " outsize index=" << sz_idx - 1 << " address " << out_bufs[buf_idx - 1] - // << " output_vector_idx=" << output_vector_idx << std::endl; buffers.push_back(std::make_shared( env, jexpander, vector_expander_method_, output_vector_idx, value_buf, data_sz)); } else { @@ -1141,17 +1093,11 @@ Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector( } data_sz = out_sizes[sz_idx++]; - // std::cout << "LR Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector 3 adding child nbuffer " << buf_idx - // << " size=" << data_sz << std::endl; CHECK_OUT_BUFFER_IDX_AND_BREAK(buf_idx, out_bufs_len); uint8_t* child_offset_buf = reinterpret_cast(out_bufs[buf_idx++]); child_buffers.push_back(std::make_shared( env, jListExpander, listvector_expander_method_, output_vector_idx, child_offset_buf, data_sz)); - - // std::cout << "LR Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector 4 adding child buffer " << buf_idx - // << " size=" << out_sizes[sz_idx] << " outsize index=" << sz_idx << " address " << out_bufs[buf_idx] - // << " output_vector_idx=" << output_vector_idx << std::endl; data_sz = out_sizes[sz_idx++]; CHECK_OUT_BUFFER_IDX_AND_BREAK(buf_idx, out_bufs_len); uint8_t* child_data_buf = reinterpret_cast(out_bufs[buf_idx++]); @@ -1161,7 +1107,7 @@ Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector( outBufJava->offsetBuffer = reinterpret_cast(out_bufs[1]); outBufJava->offsetCapacity = out_sizes[1]; outBufJava->validityBuffer = reinterpret_cast(out_bufs[2]); - outBufJava->outerValidityBuffer = reinterpret_cast(out_bufs[0]); + //outBufJava->outerValidityBuffer = reinterpret_cast(out_bufs[0]); child_buffers.push_back(outBufJava); std::shared_ptr dt2 = std::make_shared(); diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ListVectorExpander.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ListVectorExpander.java index 3b2778c7f21a4..4430674d19a72 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ListVectorExpander.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ListVectorExpander.java @@ -20,41 +20,34 @@ import org.apache.arrow.vector.complex.ListVector; /** - * This class provides the functionality to expand output vectors using a callback mechanism from + * This class provides the functionality to expand output ListVectors using a callback mechanism from * gandiva. */ public class ListVectorExpander { - private final ListVector[] vectors; + private final ListVector[] bufferVectors; - public ListVectorExpander(ListVector[] vectors) { - this.vectors = vectors; + public ListVectorExpander(ListVector[] bufferVectors) { + this.bufferVectors = bufferVectors; } /** - * Result of vector expansion. + * Result of ListVector expansion. */ public static class ExpandResult { public long address; public long capacity; - public long offsetaddress; - public long offsetcapacity; public long validityaddress; - public long outervalidityaddress; /** - * fdsfsdfds. - * @param address dsfds - * @param capacity dfsdf - * @param offsetad dsfdsfsd - * @param offsetcap dfsfs + * Result of expanding the buffer. + * @param address Data buffer address + * @param capacity Capacity + * @param validAdd Validity buffer address * */ - public ExpandResult(long address, long capacity, long offsetad, long offsetcap, long outValidAdd, long validAdd) { + public ExpandResult(long address, long capacity, long validAdd) { this.address = address; this.capacity = capacity; - this.offsetaddress = offsetad; - this.offsetcapacity = offsetcap; - this.outervalidityaddress = outValidAdd; this.validityaddress = validAdd; } } @@ -69,50 +62,22 @@ public ExpandResult(long address, long capacity, long offsetad, long offsetcap, * @return address and size of the buffer after expansion. */ public ExpandResult expandOutputVectorAtIndex(int index, long toCapacity) { - if (index >= vectors.length || vectors[index] == null) { + if (index >= bufferVectors.length || bufferVectors[index] == null) { throw new IllegalArgumentException("invalid index " + index); } - - //ArrowBuf ab = vectors[index].getValidityBuffer(); - //String s = "Before validity = ["; - //for (int i = 0; i < 20; i++) { - // s += ab.getInt(i) + ","; - //} - //System.out.println(s); - - int valueBufferIndex = 1; - int validBufferIndex = 0; - ListVector vector = vectors[index]; + int validityBufferIndex = 0; + ListVector vector = bufferVectors[index]; while (vector.getDataVector().getFieldBuffers().get(valueBufferIndex).capacity() < toCapacity) { //Just realloc the data vector. vector.getDataVector().reAlloc(); } - System.out.println("LR Expanding ListVector. New capacity=" + - vector.getDataVector().getFieldBuffers().get(valueBufferIndex).capacity()); - System.out.println("LR Expanding ListVector. new data is "); - /*ArrowBuf ab2 = vector.getValidityBuffer(); - s = "After validity = ["; - for (int i = 0; i < 20; i++) { - s += ab2.getInt(i) + ","; - } - System.out.println(s);*/ - /*ArrowBuf ab = vector.getOffsetBuffer(); - String s = "offsetBuffer = ["; - for (int i = 0; i < 20; i++) { - s += ab.getInt(i) + ","; - } - System.out.println(s); - */ return new ExpandResult( vector.getDataVector().getFieldBuffers().get(valueBufferIndex).memoryAddress(), vector.getDataVector().getFieldBuffers().get(valueBufferIndex).capacity(), - vector.getOffsetBuffer().memoryAddress(), - vector.getOffsetBuffer().capacity(), - vector.getValidityBuffer().memoryAddress(), - vector.getDataVector().getFieldBuffers().get(validBufferIndex).memoryAddress()); + vector.getDataVector().getFieldBuffers().get(validityBufferIndex).memoryAddress()); } } diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java index 61e6c8ffacc39..7d677927f0ced 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java @@ -332,7 +332,6 @@ private void evaluate(int numRows, List buffers, List buf throw new EvaluatorClosedException(); } - logger.error("LR Projector.java evaluate"); if (numExprs != outColumns.size()) { logger.info("Expected " + numExprs + " columns, got " + outColumns.size()); throw new GandivaException("Incorrect number of columns for the output vector"); @@ -362,19 +361,11 @@ private void evaluate(int numRows, List buffers, List buf int outColumnIdx = 0; for (ValueVector valueVector : outColumns) { if (valueVector instanceof ListVector) { - //LR HACK there is only one column. - logger.error("LR Projector.java evaluate out columns=" + outColumns.size()); outAddrs = new long[5 * outColumns.size()]; outSizes = new long[5 * outColumns.size()]; } - /*boolean isFixedWith = valueVector instanceof FixedWidthVector;*/ boolean isVarWidth = valueVector instanceof VariableWidthVector; - /*if (!isFixedWith && !isVarWidth) { - throw new UnsupportedTypeException( - "Unsupported value vector type " + valueVector.getField().getFieldType()); - }*/ - outAddrs[idx] = valueVector.getValidityBuffer().memoryAddress(); outSizes[idx++] = valueVector.getValidityBuffer().capacity(); if (isVarWidth) { @@ -393,51 +384,17 @@ private void evaluate(int numRows, List buffers, List buf if (valueVector instanceof ListVector) { hasVariableWidthColumns = true; resizableListVectors[outColumnIdx] = (ListVector) valueVector; - //LR TODO figure out what to use here resizableVectors[outColumnIdx] = (BaseVariableWidthVector) valueVector; - //resizableVectors[outColumnIdx] = (BaseVariableWidthVector) valueVector; - //resizeableVectors[outColumnIdx] = ((ListVector) valueVector).getDataVector().getFieldBuffers().get(0); - List fieldBufs = ((ListVector) valueVector).getDataVector().getFieldBuffers(); - logger.error("LR Projector.java evaluate ListVector has buffers=" + fieldBufs.size()); - - - logger.error("LR Projector.java evaluate isVarlistvector Width setting buffer=" + idx); outAddrs[idx] = valueVector.getOffsetBuffer().memoryAddress(); outSizes[idx++] = valueVector.getOffsetBuffer().capacity(); //vector valid - logger.error("LR Projector.java evaluate isVarlistvector Width setting vector validity buffer=" + idx); - //outAddrs[idx] = ((ListVector) valueVector).getDataVector().getValidityBufferAddress(); - //outSizes[idx++] = ((ListVector) valueVector).getDataVector().getFieldBuffers().get(0).capacity(); outAddrs[idx] = ((ListVector) valueVector).getDataVector().getFieldBuffers().get(0).memoryAddress(); outSizes[idx++] = ((ListVector) valueVector).getDataVector().getFieldBuffers().get(0).capacity(); //vector offset - logger.error("LR Projector.java evaluate ListVector passing data buffer as " + idx); - - logger.error("LR Projector.java evaluate isVarlistvector Width setting buffer=" + idx); - //The realloc avoids dynamic resizing, will have to be fixed later. outAddrs[idx] = ((ListVector) valueVector).getDataVector().getFieldBuffers().get(1).memoryAddress(); outSizes[idx++] = ((ListVector) valueVector).getDataVector().getFieldBuffers().get(1).capacity(); - //logger.error("LR Projector.java evaluate ListVector set buffer " + idx + - // " as ptr=" + outAddrs[idx - 1] + " size " + outSizes[idx - 1]); - - //vector data - //outAddrs[idx] = ((ListVector) valueVector).getDataVector().getFieldBuffers().get(2).memoryAddress(); - //outSizes[idx++] = ((ListVector) valueVector).getDataVector().getFieldBuffers().get(2).capacity(); - - //LR HACK TODO ((ListVector) valueVector).getDataVector().capacity(); - - - - - - - - - - - } else { outAddrs[idx] = valueVector.getDataBuffer().memoryAddress(); outSizes[idx++] = valueVector.getDataBuffer().capacity(); @@ -446,10 +403,6 @@ private void evaluate(int numRows, List buffers, List buf valueVector.setValueCount(selectionVectorRecordCount); outColumnIdx++; } - - //logger.error("LR Projector.java evaluate calling evaluateProjector with buffers=" + idx); - //logger.error("LR Projector.java before evaluateProjector buffer[3]=" + outAddrs[3]); - //logger.error("LR Projector.java before evaluateProjector buffer[1]=" + outAddrs[1]); wrapper.evaluateProjector( hasVariableWidthColumns ? new VectorExpander(resizableVectors) : null, hasVariableWidthColumns ? new ListVectorExpander(resizableListVectors) : null, @@ -458,286 +411,12 @@ private void evaluate(int numRows, List buffers, List buf selectionVectorAddr, selectionVectorSize, outAddrs, outSizes); - //outColumns.clear(); - //FieldType ft = new FieldType(true, int32, null); - //ListVector lv = new ListVector("res", allocator, ft, null); - //System.out.println(intVector.getDataVector()); - - - //logger.error("LR Projector.java after evaluateProjector buffer[3]=" + outAddrs[3]); - //logger.error("LR Projector.java after evaluateProjector buffer[1]=" + outAddrs[1]); for (ValueVector valueVector : outColumns) { if (valueVector instanceof ListVector) { - //LR HACK - - //int numRecordsFound = 5 * 100; - //int numRecordsFound = Math.toIntExact(outSizes[3]) / 4; - //logger.error("LR Projector.java using numRecords=" + numRecordsFound + " outSizes[3]=" + outSizes[3]); - - //LR HACK 9-13 10:34 - /*public void startList() { - vector.startNewValue(idx()); - writer.setPosition(vector.getOffsetBuffer().getInt((idx() + 1L) * OFFSET_WIDTH)); - listStarted = true; - } - - @Override - public void endList() { - vector.getOffsetBuffer().setInt((idx() + 1L) * OFFSET_WIDTH, writer.idx()); - setPosition(idx() + 1); - listStarted = false; - */ - - //ArrowBuf ab = new ArrowBuf(ReferenceManager.NO_OP, null, outSizes[2], outAddrs[2]); - - - //ArrowBuf ab2 = new ArrowBuf(ReferenceManager.NO_OP, null, outSizes[3], outAddrs[3]); - - // logger.error("LR Projector.java using numRecords=" + - // selectionVectorRecordCount + " outSizes[3]=" + outSizes[3]); - - //import org.apache.arrow.vector.complex.impl.UnionListWriter; - /*UnionListWriter writer = ((ListVector) valueVector).getWriter(); - for (int i = 0; i < selectionVectorRecordCount; i++) { - writer.startList(); - writer.setPosition(i); - for (int j = 0; j < 5; j++) { - int index = ((j + (5 * i)) * 4); - //Not sure whats going on. Buffer too small? - try { - writer.writeInt(ab2.getInt(index)); - //writer.writeInt(42); - } catch (IndexOutOfBoundsException e) { - continue; - } - } - writer.setValueCount(5); - writer.endList(); - } - ((ListVector) valueVector).setValueCount(selectionVectorRecordCount);*/ - - - //offsetBuffer = [0,83886080,327680,1280,5,167772160,655360,2560,10,251658240,983040,3840,15, - //335544320,1310720,5120,20, - //419430400,1638400,6400,25,503316480,1966080,7680,30,587202560,2293760,8960,35,671088640,2621440,10240,40, - //754974720,2949120,11520, - - - - - - - - - - - /* - String s = ""; - List fv = ((ListVector) valueVector).getDataVector().getFieldBuffers(); - for (ArrowBuf ab : fv) { - s = ""; - for (int i = 0; i < 20; i++) { - s += ab.getInt(i) + ","; - } - logger.error("LR Projector.java before updating listvector. size=" + - ab.capacity() + " buffer=" + s); - } - - ArrowBuf fvv = ((ListVector) valueVector).getValidityBuffer(); - s = ""; - for (int i = 0; i < 20; i++) { - s += fvv.getInt(i) + ","; - } - logger.error("LR Projector.java before updating listvector. getValidityBuffer=" + - fvv.capacity() + " buffer=" + s); - - ArrowBuf fvvv = ((ListVector) valueVector).getOffsetBuffer(); - s = ""; - for (int i = 0; i < 20; i++) { - s += fvvv.getInt(i) + ","; - } - logger.error("LR Projector.java before updating listvector. getOffsetBuffer=" + - fvvv.capacity() + " buffer=" + s); - */ - - - - - - - //((ListVector) valueVector).getDataVector().setValueCount(selectionVectorRecordCount * 5); - + //LR TODO check if this is necessary. ((ListVector) valueVector).setLastSet(selectionVectorRecordCount - 1); - - /* - ArrowBuf mabb2 = new ArrowBuf(ReferenceManager.NO_OP, null, outSizes[2], outAddrs[2]); - s = "validity? buffer mabb2, outAddrs[2]="; - for (int i = 0; i < 20; i++) { - s += mabb2.getInt(i) + ","; - } - System.out.println(s); - */ - /* - //Validity then data. - ArrowBuf abb = new ArrowBuf(ReferenceManager.NO_OP, null, outSizes[2], outAddrs[2]); - ArrowBuf abb2 = new ArrowBuf(ReferenceManager.NO_OP, null, outSizes[3], outAddrs[3]); - List outBufsNew = new ArrayList(); - - //outBufsNew.add(ab0); - outBufsNew.add(abb); - outBufsNew.add(abb2); - ArrowFieldNode afn = new ArrowFieldNode(selectionVectorRecordCount * 5, 0); - ((ListVector) valueVector).getDataVector().clear(); - ((ListVector) valueVector).getDataVector().loadFieldBuffers(afn, outBufsNew); - - //TODO Need to get validity [0] and offset [1] buffer for the listvector. - //((ListVector) valueVector).getDataVector().loadFieldBuffers(afn, outBufsNew); - - List outBufsNew2 = new ArrayList(); - - - - ArrowBuf mabb22 = new ArrowBuf(ReferenceManager.NO_OP, null, selectionVectorRecordCount, outAddrs[0]); - for (int i = 0; i < selectionVectorRecordCount; i++) { - BitVectorHelper.setBit(mabb22, i); - } - - ArrowBuf mabb2 = new ArrowBuf(ReferenceManager.NO_OP, null, outSizes[1], outAddrs[1]); - //for (int i = 0; i < selectionVectorRecordCount; i++) { - // mabb2.setInt(i * 4, 5 * i); - //} - s = "offset? buffer mabb2, outAddrs[0]="; - for (int i = 0; i < 20; i++) { - s += mabb2.getInt(i) + ","; - } - System.out.println(s); - - outBufsNew2.add(mabb22); - outBufsNew2.add(mabb2); - ArrowFieldNode afn2 = new ArrowFieldNode(selectionVectorRecordCount, 0); - ((ListVector) valueVector).loadFieldBuffers(afn2, outBufsNew2); - - - */ - - //((ListVector) valueVector).setValueCount(selectionVectorRecordCount); - //((ListVector) valueVector).getDataVector().setValueCount(selectionVectorRecordCount); - - /*TODO NEeD THIS int simple = 0; - try { - for (int i = 0; i < selectionVectorRecordCount * 5; i++) { - BitVectorHelper.setBit(((ListVector) valueVector).getDataVector().getValidityBuffer(), i); - simple++; - } - } catch (IndexOutOfBoundsException e) { - simple = 0; - } - */ - /* int simple = 0; - import org.apache.arrow.vector.BitVectorHelper; - try { - for (int i = 0; i < selectionVectorRecordCount; i++) { - BitVectorHelper.setBit(((ListVector) valueVector).getValidityBuffer(), i); - simple++; - } - } catch (IndexOutOfBoundsException e) { - simple = 0; - } -*/ - - - - - - /* - - - - try { - for (int i = 0; i < selectionVectorRecordCount; i++) { - BitVectorHelper.setBit(((ListVector) valueVector).getValidityBuffer(), i); - simple++; - } - } catch (IndexOutOfBoundsException e) { - simple = 0; - } - - - for (int i = 0; i < selectionVectorRecordCount; i++) { - ((ListVector) valueVector).getOffsetBuffer().setInt(i * 4, 5 * i); - } - */ - - - - - - - - //LR HACK 9-13 10:34 All the multiline comment - /* - import org.apache.arrow.memory.ReferenceManager; - import org.apache.arrow.vector.BitVectorHelper; - import org.apache.arrow.vector.ipc.message.ArrowFieldNode; - */ - //ArrowBuf ab0 = new ArrowBuf(ReferenceManager.NO_OP, null, outSizes[2], outAddrs[2]); - /*ArrowBuf abb = new ArrowBuf(ReferenceManager.NO_OP, null, outSizes[2], outAddrs[2]); - ArrowBuf abb2 = new ArrowBuf(ReferenceManager.NO_OP, null, outSizes[3], outAddrs[3]); - List outBufsNew = new ArrayList(); - - StringBuilder sbb = new StringBuilder(); - abb.print(sbb, 1); - System.out.println("LR abb=" + sbb); - - //outBufsNew.add(ab0); - outBufsNew.add(abb); - outBufsNew.add(abb2); - ArrowFieldNode afn = new ArrowFieldNode(numRecordsFound, 0); - ((ListVector) valueVector).getDataVector().clear(); - ((ListVector) valueVector).getDataVector().loadFieldBuffers(afn, outBufsNew); - - //LR HACK 9-12 10:09 - //ArrowBuf offBuff = ((ListVector) valueVector).getOffsetBuffer(); - //for (int i = 0; i < 101; i++) { - // offBuff.setInt(i, 5 * i * 4); - //} - - - - - - //byte[] valid = new byte[outsizes[2]]; - //LR HACK - //for (int i = 0; i < outSizes[2]; i++) { - int simple = 0; - try { - for (int i = 0; i < numRecordsFound * 4; i++) { - BitVectorHelper.setBit(((ListVector) valueVector).getDataVector().getValidityBuffer(), i); - simple++; - //BitVectorHelper.setBit(((ListVector) valueVector).getValidityBuffer(), i); - } - } catch (IndexOutOfBoundsException e) { - simple = 0; - } - ArrowBuf ab3 = ((ListVector) valueVector).getDataVector().getFieldBuffers().get(0); - for (int i = 0; i < 50; i++) { - System.out.println("LR arrowbuf after=" + Integer.reverseBytes(ab3.getInt(i))); - System.out.println("LR arrowbuf after=" + ab3.getInt(i)); - System.out.println("LR arrowbuf after=" + ab3.getShort(i)); - } - ArrowBuf ab3a = ((ListVector) valueVector).getDataVector().getFieldBuffers().get(1); - for (int i = 0; i < 50; i++) { - System.out.println("LR arrowbuf aftera=" + Integer.reverseBytes(ab3a.getInt(i))); - System.out.println("LR arrowbuf aftera=" + ab3a.getInt(i)); - System.out.println("LR arrowbuf aftera=" + ab3a.getShort(i)); - } - IntVector iv = (IntVector) ((ListVector) valueVector).getDataVector(); - for (int i = 0; i < 50; i++) { - System.out.println("LR IntVector=" + iv.get(i)); - }*/ } } - } /** diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java index d3c75413957a1..f22ebbd37878f 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java @@ -57,7 +57,6 @@ public ExpandResult expandOutputVectorAtIndex(int index, long toCapacity) { throw new IllegalArgumentException("invalid index " + index); } - System.out.println("LR Expanding VectorExpander."); BaseVariableWidthVector vector = vectors[index]; while (vector.getDataBuffer().capacity() < toCapacity) { vector.reallocDataBuffer();