From ff0f9ab55de45c03d5f226b5e8cf591cbc8d2530 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Wed, 1 Nov 2023 11:11:31 -0700 Subject: [PATCH] More cleanup. --- build_release.sh | 34 -- build_testing.sh | 38 -- cpp/src/arrow/type_fwd.h | 10 +- cpp/src/gandiva/array_ops.cc | 401 +++++------------- cpp/src/gandiva/array_ops.h | 7 +- cpp/src/gandiva/array_ops_test.cc | 26 -- cpp/src/gandiva/bitmap_accumulator.h | 1 - cpp/src/gandiva/compiled_expr.h | 3 +- cpp/src/gandiva/engine.cc | 5 +- cpp/src/gandiva/function_registry.cc | 1 - cpp/src/gandiva/function_registry_array.cc | 21 +- cpp/src/gandiva/function_signature.cc | 1 - cpp/src/gandiva/llvm_generator.cc | 73 +--- cpp/src/gandiva/llvm_generator_test.cc | 68 --- cpp/src/gandiva/llvm_types.h | 9 - cpp/src/gandiva/tests/list_test.cc | 354 +--------------- cpp/src/gandiva/tree_expr_builder.cc | 2 +- .../main/cpp/expression_registry_helper.cc | 34 +- java/gandiva/src/main/cpp/jni_common.cc | 20 +- .../gandiva/evaluator/ExpressionRegistry.java | 2 - .../arrow/gandiva/evaluator/Projector.java | 2 - .../gandiva/expression/ArrowTypeHelper.java | 5 - .../gandiva/expression/FunctionNode.java | 1 - .../arrow/gandiva/expression/TreeBuilder.java | 15 - .../gandiva/evaluator/ProjectorTest.java | 50 --- 25 files changed, 134 insertions(+), 1049 deletions(-) delete mode 100755 build_release.sh delete mode 100755 build_testing.sh diff --git a/build_release.sh b/build_release.sh deleted file mode 100755 index 5afaff588237c..0000000000000 --- a/build_release.sh +++ /dev/null @@ -1,34 +0,0 @@ -rm -rf cpp-jni java-dist java-jni cpp/debug -mkdir cpp/debug -cd cpp/debug - -arch -x86_64 cmake -DCMAKE_BUILD_TYPE=RELEASE -DARROW_GANDIVA=ON -DARROW_JEMALLOC=OFF -DARROW_GANDIVA_JAVA=ON -DARROW_BUILD_TESTS=OFF .. -arch -x86_64 make -j 8 -if [ $? -ne 0 ] -then - echo "failed" - exit 1 -fi - -cd ../../ -mkdir -p java-jni cpp-jni - -arch -x86_64 cmake -S cpp -B cpp-jni -DARROW_BUILD_SHARED=OFF -DARROW_JEMALLOC=OFF -DARROW_CSV=ON -DARROW_DATASET=ON -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_DEPENDENCY_USE_SHARED=OFF -DARROW_FILESYSTEM=ON -DARROW_GANDIVA=ON -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON -DARROW_ORC=ON -DARROW_PARQUET=ON -DARROW_S3=ON -DARROW_USE_CCACHE=ON -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_LIBDIR=lib/x86_64 -DCMAKE_INSTALL_PREFIX=java-dist -DCMAKE_UNITY_BUILD=ON -arch -x86_64 cmake --build cpp-jni --target install --config Release -if [ $? -ne 0 ] -then - echo "failed" - exit 1 -fi - -arch -x86_64 cmake -S java -B java-jni -DARROW_JAVA_JNI_ENABLE_C=OFF -DARROW_JEMALLOC=OFF -DARROW_JAVA_JNI_ENABLE_DEFAULT=ON -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_LIBDIR=lib/x86_64 -DCMAKE_INSTALL_PREFIX=java-dist -DCMAKE_PREFIX_PATH=$PWD/java-dist/lib/x86_64/cmake -arch -x86_64 cmake --build java-jni --target install --config Release -if [ $? -ne 0 ] -then - echo "failed" - exit 1 -fi - -cd java -/opt/homebrew/bin/mvn -DskipTests -Darrow.c.jni.dist.dir=/Users/logan.riggs/github/arrow/java-dist/lib -Darrow.cpp.build.dir=/Users/logan.riggs/github/arrow/java-dist/lib -Parrow-jni clean install -cp gandiva/target/arrow-gandiva-12.0.1.jar /Users/logan.riggs/github/dremio/enterprise/distribution/server/target/dremio-enterprise-24.3.0-SNAPSHOT/dremio-enterprise-24.3.0-SNAPSHOT/jars/3rdparty/ diff --git a/build_testing.sh b/build_testing.sh deleted file mode 100755 index 9604ba5678ff2..0000000000000 --- a/build_testing.sh +++ /dev/null @@ -1,38 +0,0 @@ -rm -rf cpp-jni java-dist java-jni cpp/debug -mkdir cpp/debug -cd cpp/debug - -echo "====CPP====" -arch -x86_64 cmake -DCMAKE_BUILD_TYPE=DEBUG -DARROW_GANDIVA=ON -DARROW_JEMALLOC=OFF -DARROW_GANDIVA_JAVA=ON -DARROW_BUILD_TESTS=ON .. -arch -x86_64 make -j 8 -if [ $? -ne 0 ] -then - echo "failed" - exit 1 -fi - -cd ../../ -mkdir -p java-jni cpp-jni - -echo "====CPP-JNI====" -arch -x86_64 cmake -S cpp -B cpp-jni -DARROW_BUILD_SHARED=OFF -DARROW_JEMALLOC=OFF -DARROW_CSV=ON -DARROW_DATASET=ON -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_DEPENDENCY_USE_SHARED=OFF -DARROW_FILESYSTEM=ON -DARROW_GANDIVA=ON -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON -DARROW_ORC=ON -DARROW_PARQUET=ON -DARROW_S3=ON -DARROW_USE_CCACHE=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_LIBDIR=lib/x86_64 -DCMAKE_INSTALL_PREFIX=java-dist -DCMAKE_UNITY_BUILD=ON -arch -x86_64 cmake --build cpp-jni --target install --config Debug -if [ $? -ne 0 ] -then - echo "failed" - exit 1 -fi - -echo "====JAVA-JNI====" -arch -x86_64 cmake -S java -B java-jni -DARROW_JAVA_JNI_ENABLE_C=OFF -DARROW_JEMALLOC=OFF -DARROW_JAVA_JNI_ENABLE_DEFAULT=ON -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_LIBDIR=lib/x86_64 -DCMAKE_INSTALL_PREFIX=java-dist -DCMAKE_PREFIX_PATH=$PWD/java-dist/lib/x86_64/cmake -DArrowTesting_DIR=$PWD/cpp/debug/src/arrow -arch -x86_64 cmake --build java-jni --target install --config Debug -if [ $? -ne 0 ] -then - echo "failed" - exit 1 -fi - -echo "====JARS====" -cd java -/opt/homebrew/bin/mvn -DskipTests -Darrow.c.jni.dist.dir=/Users/logan.riggs/github/arrow-fork/arrow/java-dist/lib -Darrow.cpp.build.dir=/Users/logan.riggs/github/arrow-fork/arrow/java-dist/lib -Parrow-jni clean install -cp java/gandiva/target/arrow-gandiva-12.0.1.jar /Users/logan.riggs/github/dremio/enterprise/distribution/server/target/dremio-enterprise-24.3.0-SNAPSHOT/dremio-enterprise-24.3.0-SNAPSHOT/jars/3rdparty/ diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h index 66fd6c75f0ddb..657abbaecc42b 100644 --- a/cpp/src/arrow/type_fwd.h +++ b/cpp/src/arrow/type_fwd.h @@ -123,11 +123,6 @@ class StringArray; class StringBuilder; struct StringScalar; -class StructType; -class StructArray; -class StructBuilder; -struct StructScalar; - class LargeStringType; class LargeStringArray; class LargeStringBuilder; @@ -153,6 +148,11 @@ class FixedSizeListArray; class FixedSizeListBuilder; struct FixedSizeListScalar; +class StructType; +class StructArray; +class StructBuilder; +struct StructScalar; + class Decimal128; class Decimal256; class DecimalType; diff --git a/cpp/src/gandiva/array_ops.cc b/cpp/src/gandiva/array_ops.cc index 96052f58c92de..802b4e20947bf 100644 --- a/cpp/src/gandiva/array_ops.cc +++ b/cpp/src/gandiva/array_ops.cc @@ -27,45 +27,73 @@ #include "gandiva/engine.h" #include "gandiva/exported_funcs.h" -//LR TODO -namespace { - bool floatsEqual(float l, float r) { - return (l - r < 0.001 && r - l < 0.001); - } - - bool doublesEqual(double l, double r) { - return (l - r < 0.001 && r - l < 0.001); - } -} /// Stub functions that can be accessed from LLVM or the pre-compiled library. -extern "C" { - -bool array_utf8_contains_utf8(int64_t context_ptr, const char* entry_buf, - int32_t* entry_child_offsets, int32_t entry_offsets_len, - const char* contains_data, int32_t contains_data_length, +template +Type* array_remove_template(int64_t context_ptr, const Type* entry_buf, + int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, + Type remove_data, bool entry_validWhat, int64_t loop_var, int64_t validity_index_var, - bool* valid_row) { - for (int i = 0; i < entry_offsets_len; i++) { - int32_t entry_len = *(entry_child_offsets + i + 1) - *(entry_child_offsets + i); - if (entry_len != contains_data_length) { - entry_buf = entry_buf + entry_len; - continue; - } - if (strncmp(entry_buf, contains_data, contains_data_length) == 0) { - return true; + bool* valid_row, int32_t* out_len, int32_t** valid_ptr) +{ + std::vector newInts; + + const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); + int64_t validityBitIndex = 0; + //The validity index already has the current row length added to it, so decrement. + validityBitIndex = validity_index_var - entry_len; + entry_validWhat = true; + std::vector outValid; + for (int i = 0; i < entry_len; i++) { + Type entry_item = *(entry_buf + (i * 1)); + std::cout << "LR TODO checking " << entry_item << std::endl; + if (entry_item == remove_data) { + //Do not add the item to remove. + } else if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { + outValid.push_back(false); + newInts.push_back(0); + std::cout << "LR TODO not valid! " << i << std::endl; + } else { + outValid.push_back(true); + newInts.push_back(entry_item); + std::cout << "LR TODO valid " << i << std::endl; } - entry_buf = entry_buf + entry_len; } - return false; + + *out_len = (int)newInts.size(); + + //Since this function can remove values we don't know the length ahead of time. + //A fast way to compute Math.ceil(input / 8.0). + int validByteSize = (unsigned int)((*out_len) + 7) >> 3; + std::cout << "LR TODO out_len=" << *out_len << " valid byte length is " << validByteSize << std::endl; + + uint8_t* validRet = gdv_fn_context_arena_malloc(context_ptr, validByteSize); + for (int i = 0; i < outValid.size(); i++) { + std::cout << "LR TODO setting bit " << i << " to value " << outValid[i] << std::endl; + arrow::bit_util::SetBitTo(validRet, i, outValid[i]); + } + + int32_t outBufferLength = (int)*out_len * sizeof(Type); + //length is number of items, but buffers must account for byte size. + uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, outBufferLength); + memcpy(ret, newInts.data(), outBufferLength); + *valid_row = true; + if (!combined_row_validity) { + *out_len = 0; + *valid_row = false; //this one is what works for the top level validity. + entry_validWhat = false; + } + *valid_ptr = reinterpret_cast(validRet); + return reinterpret_cast(ret); } -bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, +template +bool array_contains_template(const Type* entry_buf, int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, - int32_t contains_data, bool entry_validWhat, + int32_t contains_data, int64_t loop_var, int64_t validity_index_var, bool* valid_row) { - if (!combined_row_validity) { + if (!combined_row_validity) { *valid_row = false; return false; } @@ -78,7 +106,7 @@ bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { continue; } - int32_t entry_item = *(entry_buf + i); + Type entry_item = *(entry_buf + i); if (entry_item == contains_data) { return true; } @@ -86,30 +114,26 @@ bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, return false; } +extern "C" { + +bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, + int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, + int32_t contains_data, bool entry_validWhat, + int64_t loop_var, int64_t validity_index_var, + bool* valid_row) { + return array_contains_template(entry_buf, entry_len, entry_validity, + combined_row_validity, contains_data, + loop_var, validity_index_var, valid_row); +} + bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf, int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, int64_t contains_data, bool entry_validWhat, int64_t loop_var, int64_t validity_index_var, bool* valid_row) { - if (!combined_row_validity) { - *valid_row = false; - return false; - } - *valid_row = true; - - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); - int64_t validityBitIndex = validity_index_var - entry_len; - - for (int i = 0; i < entry_len; i++) { - if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { - continue; - } - int64_t entry_item = *(entry_buf + (i)); - if (entry_item == contains_data) { - return true; - } - } - return false; + return array_contains_template(entry_buf, entry_len, entry_validity, + combined_row_validity, contains_data, + loop_var, validity_index_var, valid_row); } bool array_float32_contains_float32(int64_t context_ptr, const float* entry_buf, @@ -117,25 +141,9 @@ bool array_float32_contains_float32(int64_t context_ptr, const float* entry_buf, float contains_data, bool entry_validWhat, int64_t loop_var, int64_t validity_index_var, bool* valid_row) { - if (!combined_row_validity) { - *valid_row = false; - return false; - } - *valid_row = true; - - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); - int64_t validityBitIndex = validity_index_var - entry_len; - - for (int i = 0; i < entry_len; i++) { - if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { - continue; - } - float entry_item = *(entry_buf + (i)); - if (floatsEqual(entry_item, contains_data)) { - return true; - } - } - return false; + return array_contains_template(entry_buf, entry_len, entry_validity, + combined_row_validity, contains_data, + loop_var, validity_index_var, valid_row); } bool array_float64_contains_float64(int64_t context_ptr, const double* entry_buf, @@ -143,141 +151,35 @@ bool array_float64_contains_float64(int64_t context_ptr, const double* entry_buf double contains_data, bool entry_validWhat, int64_t loop_var, int64_t validity_index_var, bool* valid_row) { - if (!combined_row_validity) { - *valid_row = false; - return false; - } - *valid_row = true; + return array_contains_template(entry_buf, entry_len, entry_validity, + combined_row_validity, contains_data, + loop_var, validity_index_var, valid_row); +} - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); - int64_t validityBitIndex = validity_index_var - entry_len; - for (int i = 0; i < entry_len; i++) { - if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { - continue; - } - double entry_item = *(entry_buf + (i)); - if (doublesEqual(entry_item, contains_data)) { - return true; - } - } - return false; -} int32_t* array_int32_remove(int64_t context_ptr, const int32_t* entry_buf, int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, int32_t remove_data, bool entry_validWhat, int64_t loop_var, int64_t validity_index_var, bool* valid_row, int32_t* out_len, int32_t** valid_ptr) { - - std::vector newInts; - - //LR TODO not sure what entry_validWhat is. - //LR TODO I'm not sure why entry_validty increases for each loop. It starts as the pointer to the validity buffer, so adjust here. - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); - int64_t validityBitIndex = 0; - //The validity index already has the current row length added to it, so decrement. - validityBitIndex = validity_index_var - entry_len; - entry_validWhat = true; - std::vector outValid; - for (int i = 0; i < entry_len; i++) { - int32_t entry_item = *(entry_buf + (i * 1)); - if (entry_item == remove_data) { - //Do not add the item to remove. - } else if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { - outValid.push_back(false); - newInts.push_back(0); - } else { - outValid.push_back(true); - newInts.push_back(entry_item); - } - } - - *out_len = (int)newInts.size(); - - //Since this function can remove values we don't know the length ahead of time. - //LR TODO divide by 8 and ensure at least 1? - uint8_t* validRet = gdv_fn_context_arena_malloc(context_ptr, *out_len); - for (int i = 0; i < outValid.size(); i++) { - arrow::bit_util::SetBitTo(validRet, i, outValid[i]); - } - - int32_t outBufferLength = (int)*out_len * sizeof(int); - //length is number of items, but buffers must account for byte size. - uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, outBufferLength); - memcpy(ret, newInts.data(), outBufferLength); - *valid_row = true; - if (!combined_row_validity) { - *out_len = 0; - *valid_row = false; //this one is what works for the top level validity. - entry_validWhat = false; - } - *valid_ptr = reinterpret_cast(validRet); - return reinterpret_cast(ret); + return array_remove_template(context_ptr, entry_buf, + entry_len, entry_validity, combined_row_validity, + remove_data, entry_validWhat, + loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } - - int64_t* array_int64_remove(int64_t context_ptr, const int64_t* entry_buf, int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, int64_t remove_data, bool entry_validWhat, int64_t loop_var, int64_t validity_index_var, bool* valid_row, int32_t* out_len, int32_t** valid_ptr){ - - std::vector newInts; - - //LR TODO not sure what entry_validWhat is. - //LR TODO I'm not sure why entry_validty increases for each loop. It starts as the pointer to the validity buffer, so adjust here. - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); - int64_t validityBitIndex = 0; - //The validity index already has the current row length added to it, so decrement. - validityBitIndex = validity_index_var - entry_len; - entry_validWhat = true; - std::vector outValid; - std::cout << "LR TODO entry length is " << entry_len << std::endl; - for (int32_t i = 0; i < entry_len; i++) { - int64_t entry_item = *(entry_buf + (i)); - std::cout << "LR TODO checking entry item " << entry_item << std::endl; - if (entry_item == remove_data) { - //Do not add the item to remove. - } else if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { - outValid.push_back(false); - newInts.push_back(0); - std::cout << "LR TODO entry item is null" << std::endl; - } else { - outValid.push_back(true); - newInts.push_back(entry_item); - } - } - - *out_len = (int)newInts.size(); - - //Since this function can remove values we don't know the length ahead of time. - //LR TODO divide by 8 and ensure at least 1? - uint8_t* validRet = gdv_fn_context_arena_malloc(context_ptr, *out_len); - for (int i = 0; i < outValid.size(); i++) { - arrow::bit_util::SetBitTo(validRet, i, outValid[i]); - std::cout << "LR TODO Setting validty " << i << " to " << outValid[i] << std::endl; - } - - int32_t outBufferLength = (int)*out_len * sizeof(int64_t); - //length is number of items, but buffers must account for byte size. - uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, outBufferLength); - memcpy(ret, newInts.data(), outBufferLength); - - //LR TODO - for (int k = 0; k < *out_len; k++) { - std::cout << "LR TODO the 64 data is " << ((int64_t*)ret)[k] << std::endl; - } - - *valid_row = true; - if (!combined_row_validity) { - *out_len = 0; - *valid_row = false; //this one is what works for the top level validity. - entry_validWhat = false; - } - *valid_ptr = reinterpret_cast(validRet); - return reinterpret_cast(ret); + return array_remove_template(context_ptr, entry_buf, + entry_len, entry_validity, combined_row_validity, + remove_data, entry_validWhat, + loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } float* array_float32_remove(int64_t context_ptr, const float* entry_buf, @@ -285,52 +187,11 @@ float* array_float32_remove(int64_t context_ptr, const float* entry_buf, float remove_data, bool entry_validWhat, int64_t loop_var, int64_t validity_index_var, bool* valid_row, int32_t* out_len, int32_t** valid_ptr){ - - std::vector newArray; - - //LR TODO not sure what entry_validWhat is. - //LR TODO I'm not sure why entry_validty increases for each loop. It starts as the pointer to the validity buffer, so adjust here. - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); - int64_t validityBitIndex = 0; - //The validity index already has the current row length added to it, so decrement. - validityBitIndex = validity_index_var - entry_len; - entry_validWhat = true; - std::vector outValid; - for (int i = 0; i < entry_len; i++) { - float entry_item = *(entry_buf + (i * 1)); - //LR TODO comparison tolerance? - if (floatsEqual(entry_item, remove_data)) { - //Do not add the item to remove. - } else if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { - outValid.push_back(false); - newArray.push_back(0); - } else { - outValid.push_back(true); - newArray.push_back(entry_item); - } - } - - *out_len = (int)newArray.size(); - - //Since this function can remove values we don't know the length ahead of time. - //LR TODO divide by 8 and ensure at least 1? - uint8_t* validRet = gdv_fn_context_arena_malloc(context_ptr, *out_len); - for (int i = 0; i < outValid.size(); i++) { - arrow::bit_util::SetBitTo(validRet, i, outValid[i]); - } - - int32_t outBufferLength = (int)*out_len * sizeof(float); - //length is number of items, but buffers must account for byte size. - uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, outBufferLength); - memcpy(ret, newArray.data(), outBufferLength); - *valid_row = true; - if (!combined_row_validity) { - *out_len = 0; - *valid_row = false; //this one is what works for the top level validity. - entry_validWhat = false; - } - *valid_ptr = reinterpret_cast(validRet); - return reinterpret_cast(ret); + return array_remove_template(context_ptr, entry_buf, + entry_len, entry_validity, combined_row_validity, + remove_data, entry_validWhat, + loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } @@ -339,54 +200,12 @@ double* array_float64_remove(int64_t context_ptr, const double* entry_buf, double remove_data, bool entry_validWhat, int64_t loop_var, int64_t validity_index_var, bool* valid_row, int32_t* out_len, int32_t** valid_ptr){ - - std::vector newArray; - - //LR TODO not sure what entry_validWhat is. - //LR TODO I'm not sure why entry_validty increases for each loop. It starts as the pointer to the validity buffer, so adjust here. - const int32_t* entry_validityAdjusted = entry_validity - (loop_var ); - int64_t validityBitIndex = 0; - //The validity index already has the current row length added to it, so decrement. - validityBitIndex = validity_index_var - entry_len; - entry_validWhat = true; - std::vector outValid; - for (int32_t i = 0; i < entry_len; i++) { - double entry_item = *(entry_buf + (i * 1)); - //LR TODO comparison tolerance? - if (doublesEqual(entry_item, remove_data)) { - //Do not add the item to remove. - } else if (!arrow::bit_util::GetBit(reinterpret_cast(entry_validityAdjusted), validityBitIndex + i)) { - outValid.push_back(false); - newArray.push_back(0.0); - } else { - outValid.push_back(true); - newArray.push_back(entry_item); - } - } - - *out_len = (int)newArray.size(); - - //Since this function can remove values we don't know the length ahead of time. - //LR TODO divide by 8 and ensure at least 1? - uint8_t* validRet = gdv_fn_context_arena_malloc(context_ptr, *out_len); - for (int i = 0; i < outValid.size(); i++) { - arrow::bit_util::SetBitTo(validRet, i, outValid[i]); - } - - int32_t outBufferLength = (int)*out_len * sizeof(double); - //length is number of items, but buffers must account for byte size. - uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, outBufferLength); - memcpy(ret, newArray.data(), outBufferLength); - *valid_row = true; - if (!combined_row_validity) { - *out_len = 0; - *valid_row = false; //this one is what works for the top level validity. - entry_validWhat = false; - } - *valid_ptr = reinterpret_cast(validRet); - return reinterpret_cast(ret); + return array_remove_template(context_ptr, entry_buf, + entry_len, entry_validity, combined_row_validity, + remove_data, entry_validWhat, + loop_var, validity_index_var, + valid_row, out_len, valid_ptr); } - } namespace gandiva { @@ -394,26 +213,6 @@ void ExportedArrayFunctions::AddMappings(Engine* engine) const { std::vector args; auto types = engine->types(); - - //Array contains. - args = {types->i64_type(), // int64_t execution_context - types->i8_ptr_type(), // int8_t* data ptr - types->i32_ptr_type(), // int32_t* child offsets ptr - types->i32_type(), // int32_t child offsets length - types->i32_ptr_type(), // input validity buffer - types->i1_type(), // bool input row validity - types->i8_ptr_type(), // const char* contains data buf - types->i32_type(), // int32_t contains data length - types->i1_type(), // bool validity --Needed? - types->i64_type(), //in loop var --Needed? - types->i64_type(), //in validity_index_var index into the valdity vector for the current row. - types->i1_ptr_type() //output validity for the row - }; - - engine->AddGlobalMappingForFunc("array_utf8_contains_utf8", - types->i1_type() /*return_type*/, args, - reinterpret_cast(array_utf8_contains_utf8)); - args = {types->i64_type(), // int64_t execution_context types->i64_ptr_type(), // int8_t* data ptr types->i32_type(), // int32_t data length diff --git a/cpp/src/gandiva/array_ops.h b/cpp/src/gandiva/array_ops.h index 2a7d1448a9af4..c0de72a39472b 100644 --- a/cpp/src/gandiva/array_ops.h +++ b/cpp/src/gandiva/array_ops.h @@ -27,12 +27,7 @@ class VectorType; /// Array functions that can be accessed from LLVM. extern "C" { -GANDIVA_EXPORT -bool array_utf8_contains_utf8(int64_t context_ptr, const char* entry_buf, - int32_t* entry_child_offsets, int32_t entry_offsets_len, - const char* contains_data, int32_t contains_data_length, - int64_t loop_var, int64_t validity_index_var, - bool* valid_row); + GANDIVA_EXPORT bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, diff --git a/cpp/src/gandiva/array_ops_test.cc b/cpp/src/gandiva/array_ops_test.cc index 12dd6f9c56d30..4d96b80dd4222 100644 --- a/cpp/src/gandiva/array_ops_test.cc +++ b/cpp/src/gandiva/array_ops_test.cc @@ -36,30 +36,4 @@ TEST(TestArrayOps, TestInt32ContainsInt32) { true); } -TEST(TestArrayOps, TestUtf8ContainsUtf8) { - gandiva::ExecutionContext ctx; - uint64_t ctx_ptr = reinterpret_cast(&ctx); - const char* entry_buf = "trianglecirclerectangle"; - int32_t entry_child_offsets[] = {0, 8, 14, 24}; - int32_t entry_offsets_len = 3; - const char* contains_data = "triangle"; - int32_t contains_data_length = 8; - - EXPECT_EQ( - array_utf8_contains_utf8(ctx_ptr, entry_buf, entry_child_offsets, entry_offsets_len, - contains_data, contains_data_length), - true); -} - -TEST(TestArrayOps, TestUtf8Length) { - gandiva::ExecutionContext ctx; - uint64_t ctx_ptr = reinterpret_cast(&ctx); - const char* entry_buf = "trianglecirclerectangle"; - int32_t entry_child_offsets[] = {0, 8, 14, 24}; - int32_t entry_offsets_len = 3; - - EXPECT_EQ(array_utf8_length(ctx_ptr, entry_buf, entry_child_offsets, entry_offsets_len), - 3); -} - } // namespace gandiva diff --git a/cpp/src/gandiva/bitmap_accumulator.h b/cpp/src/gandiva/bitmap_accumulator.h index 52d73696c788c..9eaec81763786 100644 --- a/cpp/src/gandiva/bitmap_accumulator.h +++ b/cpp/src/gandiva/bitmap_accumulator.h @@ -17,7 +17,6 @@ #pragma once -#include #include #include "arrow/util/macros.h" diff --git a/cpp/src/gandiva/compiled_expr.h b/cpp/src/gandiva/compiled_expr.h index b4244aae63380..4933e7f4922f6 100644 --- a/cpp/src/gandiva/compiled_expr.h +++ b/cpp/src/gandiva/compiled_expr.h @@ -36,8 +36,7 @@ class CompiledExpr { ValueValidityPairPtr value_validity() const { return value_validity_; } - FieldDescriptorPtr output() const { - return output_; } + FieldDescriptorPtr output() const { return output_; } void SetFunctionName(SelectionVector::Mode mode, std::string& name) { ir_functions_[static_cast(mode)] = name; diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index f8cfa8b54a60d..f5f9460ddd1f2 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -300,10 +300,7 @@ Status Engine::FinalizeModule() { if (!cached_) { ARROW_RETURN_NOT_OK(RemoveUnusedFunctions()); - //LR TODO - //LR Turning this off seems to provide better error messages with compilation/generation failures. - //if (optimize_) { - if (false) { + if (optimize_) { // misc passes to allow for inlining, vectorization, .. std::unique_ptr pass_manager( new llvm::legacy::PassManager()); diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 616ef8530c02b..9180e8c33ca33 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -17,7 +17,6 @@ #include "gandiva/function_registry.h" -#include #include #include #include diff --git a/cpp/src/gandiva/function_registry_array.cc b/cpp/src/gandiva/function_registry_array.cc index 015c8e97bfb53..893ba6e3d2b04 100644 --- a/cpp/src/gandiva/function_registry_array.cc +++ b/cpp/src/gandiva/function_registry_array.cc @@ -22,34 +22,29 @@ namespace gandiva { std::vector GetArrayFunctionRegistry() { static std::vector array_fn_registry_ = { - NativeFunction("array_containsGandiva", {}, DataTypeVector{list(utf8()), utf8()}, - boolean(), kResultNullInternal, "array_utf8_contains_utf8", - NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), - - - NativeFunction("array_containsGandiva", {}, DataTypeVector{list(int32()), int32()}, + NativeFunction("array_contains", {}, DataTypeVector{list(int32()), int32()}, boolean(), kResultNullInternal, "array_int32_contains_int32", NativeFunction::kNeedsContext), - NativeFunction("array_containsGandiva", {}, DataTypeVector{list(int64()), int64()}, + NativeFunction("array_contains", {}, DataTypeVector{list(int64()), int64()}, boolean(), kResultNullInternal, "array_int64_contains_int64", NativeFunction::kNeedsContext), - NativeFunction("array_containsGandiva", {}, DataTypeVector{list(float32()), float32()}, + NativeFunction("array_contains", {}, DataTypeVector{list(float32()), float32()}, boolean(), kResultNullInternal, "array_float32_contains_float32", NativeFunction::kNeedsContext), - NativeFunction("array_containsGandiva", {}, DataTypeVector{list(float64()), float64()}, + NativeFunction("array_contains", {}, DataTypeVector{list(float64()), float64()}, boolean(), kResultNullInternal, "array_float64_contains_float64", NativeFunction::kNeedsContext), - NativeFunction("array_removeGandiva", {}, DataTypeVector{list(int32()), int32()}, + NativeFunction("array_remove", {}, DataTypeVector{list(int32()), int32()}, list(int32()), kResultNullInternal, "array_int32_remove", NativeFunction::kNeedsContext), - NativeFunction("array_removeGandiva", {}, DataTypeVector{list(int64()), int64()}, + NativeFunction("array_remove", {}, DataTypeVector{list(int64()), int64()}, list(int64()), kResultNullInternal, "array_int64_remove", NativeFunction::kNeedsContext), - NativeFunction("array_removeGandiva", {}, DataTypeVector{list(float32()), float32()}, + NativeFunction("array_remove", {}, DataTypeVector{list(float32()), float32()}, list(float32()), kResultNullInternal, "array_float32_remove", NativeFunction::kNeedsContext), - NativeFunction("array_removeGandiva", {}, DataTypeVector{list(float64()), float64()}, + NativeFunction("array_remove", {}, DataTypeVector{list(float64()), float64()}, list(float64()), kResultNullInternal, "array_float64_remove", NativeFunction::kNeedsContext), }; diff --git a/cpp/src/gandiva/function_signature.cc b/cpp/src/gandiva/function_signature.cc index 8c086f5ee33a4..2498de39e1b3b 100644 --- a/cpp/src/gandiva/function_signature.cc +++ b/cpp/src/gandiva/function_signature.cc @@ -59,7 +59,6 @@ FunctionSignature::FunctionSignature(std::string base_name, DataTypeVector param : base_name_(std::move(base_name)), param_types_(std::move(param_types)), ret_type_(std::move(ret_type)) { - std::cout << "LR TODO creating FunctionSignature " << ret_type_->ToString() << std::endl; DCHECK_GT(base_name_.length(), 0); for (auto it = param_types_.begin(); it != param_types_.end(); it++) { DCHECK(*it); diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index f9d993403bf42..11fd2d3cb1947 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -17,7 +17,6 @@ #include "gandiva/llvm_generator.h" -#include #include #include #include @@ -36,27 +35,6 @@ namespace gandiva { AddTrace(__VA_ARGS__); \ } -namespace { - std::string printType(llvm::Type* t) { - if (t == nullptr) { - return std::string("null"); - } - std::string str; - llvm::raw_string_ostream output(str); - t->print(output); - return str; - } - std::string printType(llvm::Value* t) { - if (t == nullptr) { - return std::string("null"); - } - std::string str; - llvm::raw_string_ostream output(str); - t->print(output); - return str; - } -} - LLVMGenerator::LLVMGenerator(bool cached) : cached_(cached), enable_ir_traces_(false) {} Status LLVMGenerator::Make(std::shared_ptr config, bool cached, @@ -113,7 +91,6 @@ Status LLVMGenerator::Build(const ExpressionVector& exprs, SelectionVector::Mode ARROW_RETURN_NOT_OK(Add(expr, output)); } -std::cout << "LR TODO LLVMGenerator::Build 2 IR is " << engine_->DumpIR() << std::endl; // Compile and inject into the process' memory the generated function. ARROW_RETURN_NOT_OK(engine_->FinalizeModule()); @@ -300,7 +277,6 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, FieldDescriptorPtr output, int suffix_idx, std::string& fn_name, SelectionVector::Mode selection_vector_mode) { - try { llvm::IRBuilder<>* builder = ir_builder(); // Create fn prototype : // int expr_1 (long **addrs, long *offsets, long **bitmaps, @@ -390,11 +366,6 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, // define loop_var : start with 0, +1 after each iter llvm::PHINode* loop_var = builder->CreatePHI(types()->i64_type(), 2, "loop_var"); -//LR-VAR - //Define counter for index into list validity vector. - //llvm::PHINode* validity_index_var = builder->CreatePHI(types()->i64_type(), 2, "validity_index_var"); - - llvm::Value* position_var = loop_var; if (selection_vector_mode != SelectionVector::MODE_NONE) { @@ -500,10 +471,6 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, builder->SetInsertPoint(loop_exit); builder->CreateRet(types()->i32_constant(0)); return Status::OK(); - } catch (std::exception& e) { - std::cout << e.what() << std::endl; - throw e; - } } /// Return value of a bit in bitMap. @@ -591,21 +558,16 @@ void LLVMGenerator::ComputeBitMapsForExpr(const CompiledExpr& compiled_expr, llvm::Value* LLVMGenerator::AddFunctionCall(const std::string& full_name, llvm::Type* ret_type, const std::vector& args) { - std::cout << "LR TODO AddFunctionCall " << full_name << " ret type is " << printType(ret_type) << std::endl; // find the llvm function. llvm::Function* fn = module()->getFunction(full_name); DCHECK_NE(fn, nullptr) << "missing function " << full_name; - if (!full_name.compare("printf") && + if (enable_ir_traces_ && !full_name.compare("printf") && !full_name.compare("printff")) { // Trace for debugging ADD_TRACE("invoke native fn " + full_name); } - std::cout << "LR TODO AddFunctionCall 2" << std::endl; - for (llvm::Value* lv : args) { - std::cout << "LR TODO arg is " << printType(lv) << std::endl; - } // build a call to the llvm function. llvm::Value* value; if (ret_type->isVoidTy()) { @@ -613,13 +575,7 @@ llvm::Value* LLVMGenerator::AddFunctionCall(const std::string& full_name, value = ir_builder()->CreateCall(fn, args); } else { value = ir_builder()->CreateCall(fn, args, full_name); -std::cout << "LR TODO AddFunctionCall 3" << std::endl; - std::string str; - llvm::raw_string_ostream output(str); - std::string str2; - llvm::raw_string_ostream output2(str2); - ret_type->print(output); - value->getType()->print(output2); + DCHECK(value->getType() == ret_type); } @@ -638,7 +594,9 @@ std::shared_ptr LLVMGenerator::BuildDecimalLValue(llvm::Value* va } #define ADD_VISITOR_TRACE(...) \ + if (generator_->enable_ir_traces_) { \ generator_->AddTrace(__VA_ARGS__); \ + } // Visitor for generating the code for a decomposed expression. LLVMGenerator::Visitor::Visitor(LLVMGenerator* generator, llvm::Function* function, @@ -705,17 +663,10 @@ void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueListDex& dex) { auto types = generator_->types(); auto type = types->IRType(dex.FieldType()->id()); - std::cout << "LR VectorReadFixedLenValueListDex dex.FieldType()->id() " << dex.FieldType()->id() << " types->DataVecType( " << printType(types->DataVecType(dex.FieldType())) << std::endl; - auto dt = dex.FieldType(); if (dt->id() == arrow::Type::LIST) { - if (dt->num_fields() > 0) { - std::cout << "LR TODO creating listtype" << std::endl; - std::cout << "LR TODO listtype id=" << dt->fields()[0]->type()->id() << std::endl; type = types->IRType(dt->fields()[0]->type()->id() ); - } } - std::cout << "LR TODO using type " << printType(type) << std::endl; arrow::Type::type at32 = arrow::Type::INT32; auto type32 = types->IRType(at32); @@ -1075,7 +1026,6 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { auto arrow_return_type = dex.func_descriptor()->return_type(); - auto arrow_return_type_id = arrow_return_type->id(); bool passLoopVars = false; for (auto& p : dex.func_descriptor()->params()) { @@ -1086,16 +1036,9 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { } if (passLoopVars) { - std::string str32 = "loopvar:"; - if (loop_var_) { - llvm::raw_string_ostream output3(str32); - loop_var_->print(output3); - } - - params.push_back(loop_var_); - auto valid_var = builder->CreateLoad(types->i64_type(), validity_index_var_, "loaded_var"); - params.push_back(valid_var); - + params.push_back(loop_var_); + auto valid_var = builder->CreateLoad(types->i64_type(), validity_index_var_, "loaded_var"); + params.push_back(valid_var); } // add an extra arg for validity (allocated on stack). @@ -1103,7 +1046,6 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { new llvm::AllocaInst(types->i8_type(), 0, "result_valid", entry_block_); params.push_back(result_valid_ptr); - //auto arrow_return_type = dex.func_descriptor()->return_type(); result_ = BuildFunctionCall(native_function, arrow_return_type, ¶ms); // load the result validity and truncate to i1. @@ -1731,7 +1673,6 @@ void LLVMGenerator::AddTrace(const std::string& msg, llvm::Value* value) { dmsg = ReplaceFormatInTrace(dmsg, value, &print_fn_name); } trace_strings_.push_back(dmsg); - std::cout << dmsg << std::endl; // cast this to an llvm pointer. const char* str = trace_strings_.back().c_str(); diff --git a/cpp/src/gandiva/llvm_generator_test.cc b/cpp/src/gandiva/llvm_generator_test.cc index 2c0c742eb79c2..0651614c816f6 100644 --- a/cpp/src/gandiva/llvm_generator_test.cc +++ b/cpp/src/gandiva/llvm_generator_test.cc @@ -114,72 +114,4 @@ TEST_F(TestLLVMGenerator, TestAdd) { EXPECT_THAT(out, testing::ElementsAre(6, 8, 10, 12)); EXPECT_EQ(out_bitmap, 0ULL); } -/* -TEST_F(TestLLVMGenerator, TestArrayRemove) { - // Setup LLVM generator to do an array remove. - std::unique_ptr generator; - ASSERT_OK(LLVMGenerator::Make(TestConfiguration(), false, &generator)); - Annotator annotator; - - std::shared_ptr listDt = std::make_shared(); - std::shared_ptr dt = std::make_shared(listDt); - auto field0 = std::make_shared("f0", dt); - auto desc0 = annotator.CheckAndAddInputFieldDescriptor(field0); - auto validity_dex0 = std::make_shared(desc0); - auto value_dex0 = std::make_shared(desc0); - auto pair0 = std::make_shared(validity_dex0, value_dex0); - - auto field1 = std::make_shared("f1", arrow::int32()); - auto desc1 = annotator.CheckAndAddInputFieldDescriptor(field1); - auto validity_dex1 = std::make_shared(desc1); - auto value_dex1 = std::make_shared(desc1); - auto pair1 = std::make_shared(validity_dex1, value_dex1); - - DataTypeVector params{dt, arrow::int32()}; - auto func_desc = std::make_shared("array_removeGandiva", params, arrow::int32()); - FunctionSignature signature(func_desc->name(), func_desc->params(), - func_desc->return_type()); - const NativeFunction* native_func = - generator->function_registry_.LookupSignature(signature); - - std::vector pairs{pair0, pair1}; - auto func_dex = std::make_shared( - func_desc, native_func, FunctionHolderPtr(nullptr), -1, pairs); - - auto field_sum = std::make_shared("out", arrow::int32()); - auto desc_sum = annotator.CheckAndAddInputFieldDescriptor(field_sum); - - std::string fn_name = "codegen"; - - ASSERT_OK(generator->engine_->LoadFunctionIRs()); - ASSERT_OK(generator->CodeGenExprValue(func_dex, 4, desc_sum, 0, fn_name, - SelectionVector::MODE_NONE)); - - ASSERT_OK(generator->engine_->FinalizeModule()); - auto ir = generator->engine_->DumpIR(); - EXPECT_THAT(ir, testing::HasSubstr("vector.body")); - - EvalFunc eval_func = (EvalFunc)generator->engine_->CompiledFunction(fn_name); - - constexpr size_t kNumRecords = 4; - std::array a0{1, 2, 3, 4}; - std::array a1{5, 6, 7, 8}; - uint64_t in_bitmap = 0xffffffffffffffffull; - - std::array out{0, 0, 0, 0}; - uint64_t out_bitmap = 0; - - std::array addrs{ - reinterpret_cast(a0.data()), reinterpret_cast(&in_bitmap), - reinterpret_cast(a1.data()), reinterpret_cast(&in_bitmap), - reinterpret_cast(out.data()), reinterpret_cast(&out_bitmap), - }; - std::array addr_offsets{0, 0, 0, 0, 0, 0}; - eval_func(addrs.data(), addr_offsets.data(), nullptr, nullptr, nullptr, - 0 /* dummy context ptr */, kNumRecords); - - EXPECT_THAT(out, testing::ElementsAre(6, 8, 10, 12)); - EXPECT_EQ(out_bitmap, 0ULL); -}*/ - } // namespace gandiva diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h index be31954f0c7b1..7473f0c4d6ea7 100644 --- a/cpp/src/gandiva/llvm_types.h +++ b/cpp/src/gandiva/llvm_types.h @@ -17,7 +17,6 @@ #pragma once -#include #include #include @@ -47,10 +46,6 @@ class GANDIVA_EXPORT LLVMTypes { llvm::Type* i128_type() { return llvm::Type::getInt128Ty(context_); } - llvm::StructType* struct_type() { - return llvm::StructType::get(context_, {double_type(), double_type()}, false); - } - llvm::VectorType* list_type() { return llvm::ScalableVectorType::get(i8_type(), (unsigned int)0); } llvm::StructType* i128_split_type() { @@ -126,11 +121,7 @@ class GANDIVA_EXPORT LLVMTypes { // offsets buffer is to separate data into list // not support nested list if (data_type->id() == arrow::Type::LIST) { - //LR TODO - std::cout << "LR Returning list type as type " << data_type->field(0)->type()->id()<< " for IR " << std::endl; return IRType(data_type->field(0)->type()->id()); - //return IRType(data_type->id()); - //return i32_ptr_type(); } return IRType(data_type->id()); } diff --git a/cpp/src/gandiva/tests/list_test.cc b/cpp/src/gandiva/tests/list_test.cc index 249980abbab84..e065645bcff6a 100644 --- a/cpp/src/gandiva/tests/list_test.cc +++ b/cpp/src/gandiva/tests/list_test.cc @@ -199,28 +199,14 @@ TEST_F(TestList, TestConcatWS) { // prepare input record batch auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b, array_c}); - // build expressions. - // array_contains(a, b) - - //auto expr = TreeExprBuilder::MakeExpression("array_containsGandiva", {field_a, field_b}, res); - - //std::vector field_nodes; - //auto node2 = TreeExprBuilder::MakeLiteral(42); - //field_nodes.push_back(node2); - - //auto func_node = TreeExprBuilder::MakeFunction("array_makeGandiva", {field_b}, res->type()); - //auto expr = TreeExprBuilder::MakeExpression(func_node, res); - std::cout << "LR test is about to make expression " << std::endl; auto expr = TreeExprBuilder::MakeExpression("concat_ws", {field_a, field_b, field_c}, res); - //////// + // Build a projector for the expressions. std::shared_ptr projector; auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); EXPECT_TRUE(status.ok()) << status.message(); - std::cout << "LR Test 2 " << std::endl; - //std::cout << "LR IR IS " << projector->DumpIR() << std::endl; // Evaluate expression arrow::ArrayVector outputs; status = projector->Evaluate(*in_batch, pool_, &outputs); @@ -255,194 +241,11 @@ TEST_F(TestList, TestArrayRemove) { {10, 30, 70, 80}, {2, 2}, {true, true}, {true, true, true, true}, pool_, &exp1); - // auto exp = MakeArrowArrayArray({ 42, 42, 44, 45, 46}, - // {true, true, true, true, true}); // prepare input record batch auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b}); - // build expressions. - // array_contains(a, b) - - //auto expr = TreeExprBuilder::MakeExpression("array_containsGandiva", {field_a, field_b}, res); - - //std::vector field_nodes; - //auto node2 = TreeExprBuilder::MakeLiteral(42); - //field_nodes.push_back(node2); - - //auto func_node = TreeExprBuilder::MakeFunction("array_makeGandiva", {field_b}, res->type()); - //auto expr = TreeExprBuilder::MakeExpression(func_node, res); - std::cout << "LR test is about to make expression " << std::endl; - auto expr = TreeExprBuilder::MakeExpression("array_removeGandiva", {field_a, field_b}, res); - //////// - - // Build a projector for the expressions. - std::shared_ptr projector; - auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); - EXPECT_TRUE(status.ok()) << status.message(); - - std::cout << "LR Test 2 " << std::endl; - //std::cout << "LR IR IS " << projector->DumpIR() << std::endl; - // Evaluate expression - arrow::ArrayVector outputs; - status = projector->Evaluate(*in_batch, pool_, &outputs); - EXPECT_TRUE(status.ok()) << status.message(); - // Validate results - EXPECT_ARROW_ARRAY_EQUALS(exp1, outputs.at(0)); - - std::cout << "LR ==============================SECOND=WAY==================================================== " << std::endl; - - - - //Try the second method. - arrow::ArrayDataVector outputs2; - std::shared_ptr listDt = std::make_shared(); - std::shared_ptr dt = std::make_shared(listDt); - - - int num_records2 = 5; - std::vector> buffers; - - - - //int64_t size = arrow::bit_util::BytesForBits(num_records2); - int64_t size = 20; - auto bitmap_buffer = arrow::AllocateBuffer(size, pool_); - buffers.push_back(*std::move(bitmap_buffer)); - auto offsets_len = arrow::bit_util::BytesForBits((num_records2 + 1) * 32); - - auto offsets_buffer = arrow::AllocateBuffer(offsets_len*10, pool_); - buffers.push_back(*std::move(offsets_buffer)); - - std::cout << "LR Test buffers [0] is " << buffers[0] << std::endl; - //auto array_data = arrow::ArrayData::Make(dt, num_records2, buffers, 0, offsets_len); - //outputs2.push_back(array_data); - - - -std::vector> buffers2; -auto bitmap_buffer2 = arrow::AllocateBuffer(size, pool_); - buffers2.push_back(*std::move(bitmap_buffer2)); - - auto offsets_buffer2 = arrow::AllocateBuffer(offsets_len, pool_); - buffers2.push_back(*std::move(offsets_buffer2)); -std::shared_ptr dt2 = std::make_shared(); - - auto array_data_child = arrow::ArrayData::Make(dt2, num_records2, buffers2, 0, 0); - array_data_child->buffers = std::move(buffers2); - - std::vector> kids; - kids.push_back(array_data_child); - - -auto array_data = arrow::ArrayData::Make(dt, num_records2, buffers, kids, 0, 0); -array_data->buffers = std::move(buffers); -outputs2.push_back(array_data); - -std::cout << "LR Test " << array_data << " arra_data 0 is " << array_data->buffers[0] << std::endl; - //std::cout << "LR Test buffers [0] is " << buffers[0] << std::endl; - std::cout << "LR about to evaluate 2nd " << std::endl; - - status = projector->Evaluate(*(in_batch.get()), outputs2); - EXPECT_TRUE(status.ok()) << status.message(); - arrow::ArrayData ad = *outputs2.at(0); - arrow::ArraySpan sp(*ad.child_data.at(0)); - EXPECT_ARROW_ARRAY_EQUALS(exp1, sp.ToArray()); - - - - -for (auto& array_data : outputs2) { - auto child_data = array_data->child_data[0]; - int64_t child_data_size = 1; - if (arrow::is_binary_like(child_data->type->id())) { - /* when allocate array data, child data length is an initialized value, - * after calculating, child data offsets buffer has been resized for results, - * but array data length is unchanged. - * We should recalculate child data length and make ArrayData with new length - * - * Otherwise, child data offsets buffer length is data length + 1 - * and offset data is int32_t, need use buffer->size()/4 - 1 - */ - child_data_size = child_data->buffers[1]->size() / 4 - 1; - } else if (child_data->type->id() == arrow::Type::INT32) { - child_data_size = child_data->buffers[1]->size() / 4; - } else if (child_data->type->id() == arrow::Type::INT64) { - child_data_size = child_data->buffers[1]->size() / 8; - } else if (child_data->type->id() == arrow::Type::FLOAT) { - child_data_size = child_data->buffers[1]->size() / 4; - } else if (child_data->type->id() == arrow::Type::DOUBLE) { - child_data_size = child_data->buffers[1]->size() / 8; - } - auto new_child_data = arrow::ArrayData::Make( - child_data->type, child_data_size, child_data->buffers, child_data->offset); - array_data = arrow::ArrayData::Make(array_data->type, array_data->length, - array_data->buffers, {new_child_data}, - array_data->null_count, array_data->offset); - - - auto newArray = arrow::MakeArray(array_data); - //arrow::ArraySpan sp(newArray); - EXPECT_ARROW_ARRAY_EQUALS(exp1, newArray); -} - - - - std::cout << "LR ====================THIRD=WAY================================== " << std::endl; - { - std::shared_ptr listDt = std::make_shared(); - std::shared_ptr dt = std::make_shared(listDt); - -ArrayDataPtr output_data; - auto s = projector->AllocArrayData(dt, num_records2, pool_, &output_data); - ArrayDataVector output_data_vecs; - output_data_vecs.push_back(output_data); - - status = projector->Evaluate(*(in_batch.get()), output_data_vecs); - EXPECT_TRUE(status.ok()) << status.message(); - arrow::ArraySpan sp(*output_data_vecs.at(0)); - EXPECT_ARROW_ARRAY_EQUALS(exp1, sp.ToArray()); - } -} - - -TEST_F(TestList, TestMakeArray) { - // schema for input fields - auto field_b = field("b", int32()); - auto schema = arrow::schema({field_b}); - - // output fields - auto res = field("res", list(int32())); - - // Create a row-batch with some sample data - int num_records = 5; - auto array_b = - MakeArrowArrayInt32({42, 43, 44, 45, 46}, {true, true, true, true, true}); - - // expected output - auto exp1 = MakeArrowArrayInt32({ 1, 2, 3, 42, 5}, - {true, true, true, true, true}); - - // auto exp = MakeArrowArrayArray({ 42, 42, 44, 45, 46}, - // {true, true, true, true, true}); - - // prepare input record batch - auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_b}); - - // build expressions. - // array_contains(a, b) - - //auto expr = TreeExprBuilder::MakeExpression("array_containsGandiva", {field_a, field_b}, res); - - //std::vector field_nodes; - //auto node2 = TreeExprBuilder::MakeLiteral(42); - //field_nodes.push_back(node2); - - //auto func_node = TreeExprBuilder::MakeFunction("array_makeGandiva", {field_b}, res->type()); - //auto expr = TreeExprBuilder::MakeExpression(func_node, res); - std::cout << "LR test is about to make expression " << std::endl; - auto expr = TreeExprBuilder::MakeExpression("array_makeGandiva", {field_b}, res); - //////// + auto expr = TreeExprBuilder::MakeExpression("array_remove", {field_a, field_b}, res); // Build a projector for the expressions. std::shared_ptr projector; @@ -450,7 +253,6 @@ TEST_F(TestList, TestMakeArray) { EXPECT_TRUE(status.ok()) << status.message(); std::cout << "LR Test 2 " << std::endl; - //std::cout << "LR IR IS " << projector->DumpIR() << std::endl; // Evaluate expression arrow::ArrayVector outputs; status = projector->Evaluate(*in_batch, pool_, &outputs); @@ -458,10 +260,6 @@ TEST_F(TestList, TestMakeArray) { // Validate results EXPECT_ARROW_ARRAY_EQUALS(exp1, outputs.at(0)); - std::cout << "LR ==============================SECOND=WAY==================================================== " << std::endl; - - - //Try the second method. arrow::ArrayDataVector outputs2; std::shared_ptr listDt = std::make_shared(); @@ -471,9 +269,6 @@ TEST_F(TestList, TestMakeArray) { int num_records2 = 5; std::vector> buffers; - - - //int64_t size = arrow::bit_util::BytesForBits(num_records2); int64_t size = 20; auto bitmap_buffer = arrow::AllocateBuffer(size, pool_); buffers.push_back(*std::move(bitmap_buffer)); @@ -482,12 +277,6 @@ TEST_F(TestList, TestMakeArray) { auto offsets_buffer = arrow::AllocateBuffer(offsets_len*10, pool_); buffers.push_back(*std::move(offsets_buffer)); - std::cout << "LR Test buffers [0] is " << buffers[0] << std::endl; - //auto array_data = arrow::ArrayData::Make(dt, num_records2, buffers, 0, offsets_len); - //outputs2.push_back(array_data); - - - std::vector> buffers2; auto bitmap_buffer2 = arrow::AllocateBuffer(size, pool_); buffers2.push_back(*std::move(bitmap_buffer2)); @@ -507,9 +296,6 @@ auto array_data = arrow::ArrayData::Make(dt, num_records2, buffers, kids, 0, 0); array_data->buffers = std::move(buffers); outputs2.push_back(array_data); -std::cout << "LR Test " << array_data << " arra_data 0 is " << array_data->buffers[0] << std::endl; - //std::cout << "LR Test buffers [0] is " << buffers[0] << std::endl; - std::cout << "LR about to evaluate 2nd " << std::endl; status = projector->Evaluate(*(in_batch.get()), outputs2); EXPECT_TRUE(status.ok()) << status.message(); @@ -573,8 +359,6 @@ ArrayDataPtr output_data; } } - -/* TEST_F(TestList, TestListArrayInt32) { gandiva::ExecutionContext ctx; uint64_t ctx_ptr = reinterpret_cast(&ctx); @@ -615,11 +399,6 @@ TEST_F(TestList, TestListInt32LiteralContains) { // prepare input record batch auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b}); - // build expressions. - // array_contains(a, b) - - //auto expr = TreeExprBuilder::MakeExpression("array_containsGandiva", {field_a, field_b}, res); - std::vector field_nodes; auto node = TreeExprBuilder::MakeField(field_a); field_nodes.push_back(node); @@ -627,7 +406,7 @@ TEST_F(TestList, TestListInt32LiteralContains) { auto node2 = TreeExprBuilder::MakeLiteral(42); field_nodes.push_back(node2); - auto func_node = TreeExprBuilder::MakeFunction("array_containsGandiva", field_nodes, res->type()); + auto func_node = TreeExprBuilder::MakeFunction("array_contains", field_nodes, res->type()); auto expr = TreeExprBuilder::MakeExpression(func_node, res); //////// @@ -673,7 +452,7 @@ TEST_F(TestList, TestListInt32Contains) { // build expressions. // array_contains(a, b) - auto expr = TreeExprBuilder::MakeExpression("array_containsGandiva", {field_a, field_b}, res); + auto expr = TreeExprBuilder::MakeExpression("array_contains", {field_a, field_b}, res); // Build a projector for the expressions. std::shared_ptr projector; @@ -707,129 +486,4 @@ TEST_F(TestList, TestListFloat64) { _test_list_type_field_alias(list(float64()), array, pool_); } - -TEST_F(TestList, TestListUtf8Length) { - // schema for input fields - auto field_a = field("a", list(utf8())); - auto schema = arrow::schema({field_a}); - - // output fields - auto res = field("res", int64()); - - // Create a row-batch with some sample data - int num_records = 5; - ArrayPtr array_a; - _build_list_array( - {"a", "b", "bb", "c", "cc", "ccc", "d", "dd", "ddd", "dddd", "e", "ee", "eee", - "eeee", "eeeee"}, - {1, 2, 3, 4, 5}, {true, true, true, true, true}, pool_, &array_a); - - // expected output - auto exp = MakeArrowArrayInt64({1, 2, 3, 4, 5}, {true, true, true, true, true}); - - // prepare input record batch - auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); - - // build expressions. - // array_length(a) - auto expr = TreeExprBuilder::MakeExpression("array_lengthGandiva", {field_a}, res); - - // Build a projector for the expressions. - std::shared_ptr projector; - auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); - EXPECT_TRUE(status.ok()) << status.message(); - - // Evaluate expression - arrow::ArrayVector outputs; - status = projector->Evaluate(*in_batch, pool_, &outputs); - EXPECT_TRUE(status.ok()) << status.message(); - - // Validate results - EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0)); -} - -TEST_F(TestList, TestListUtf8LengthWithInvalidData) { - // schema for input fields - auto field_a = field("a", list(utf8())); - auto schema = arrow::schema({field_a}); - - // output fields - auto res = field("res", int64()); - - // Create a row-batch with some sample data - int num_records = 5; - ArrayPtr array_a; - _build_list_array( - {"a", "b", "bb", "cc", "cc", "ccc", "d", "dd", "ddd"}, {1, 2, 2, 3, 1}, - {true, false, true, false, true}, pool_, &array_a); - - // expected output - auto exp = MakeArrowArrayInt64({1, 2, 2, 3, 1}, {true, false, true, false, true}); - - // prepare input record batch - auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); - - // build expressions. - // array_length(a) - auto expr = TreeExprBuilder::MakeExpression("array_lengthGandiva", {field_a}, res); - - // Build a projector for the expressions. - std::shared_ptr projector; - auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); - EXPECT_TRUE(status.ok()) << status.message(); - - // Evaluate expression - arrow::ArrayVector outputs; - status = projector->Evaluate(*in_batch, pool_, &outputs); - EXPECT_TRUE(status.ok()) << status.message(); - - // Validate results - EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0)); -} - - -TEST_F(TestList, TestListUtf8Contains) { - // schema for input fields - auto field_a = field("a", list(utf8())); - auto field_b = field("b", utf8()); - auto schema = arrow::schema({field_a, field_b}); - - // output fields - auto res = field("res", boolean()); - - // Create a row-batch with some sample data - int num_records = 5; - ArrayPtr array_a; - _build_list_array( - {"rectangle", "circle", "rectangle", "circle", "triangle", "triangle", "circle", - "rectangle"}, - {2, 3, 1, 1, 1}, {true, true, true, true, true}, pool_, &array_a); - auto array_b = - MakeArrowArrayUtf8({"rectangle", "circle", "circle", "circle", "rectangll"}); - - // expected output - auto exp = MakeArrowArrayBool({true, true, false, true, false}, - {true, true, true, true, true}); - - // prepare input record batch - auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b}); - - // build expressions. - // array_contains(a, b) - auto expr = TreeExprBuilder::MakeExpression("array_containsGandiva", {field_a, field_b}, res); - - // Build a projector for the expressions. - std::shared_ptr projector; - auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); - EXPECT_TRUE(status.ok()) << status.message(); - - // Evaluate expression - arrow::ArrayVector outputs; - status = projector->Evaluate(*in_batch, pool_, &outputs); - EXPECT_TRUE(status.ok()) << status.message(); - - // Validate results - EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0)); -} -*/ } // namespace gandiva diff --git a/cpp/src/gandiva/tree_expr_builder.cc b/cpp/src/gandiva/tree_expr_builder.cc index 08ffb8c192cb9..461ed2a04d8ea 100644 --- a/cpp/src/gandiva/tree_expr_builder.cc +++ b/cpp/src/gandiva/tree_expr_builder.cc @@ -161,7 +161,7 @@ ExpressionPtr TreeExprBuilder::MakeExpression(const std::string& function, auto node = MakeField(field); field_nodes.push_back(node); } - std::cout << "LR TODO creating TreeExpression " << out_field->type()->ToString() << std::endl; + auto func_node = MakeFunction(function, field_nodes, out_field->type()); return MakeExpression(func_node, out_field); } diff --git a/java/gandiva/src/main/cpp/expression_registry_helper.cc b/java/gandiva/src/main/cpp/expression_registry_helper.cc index aba90a93fc87f..cc1ed04194861 100644 --- a/java/gandiva/src/main/cpp/expression_registry_helper.cc +++ b/java/gandiva/src/main/cpp/expression_registry_helper.cc @@ -138,38 +138,20 @@ void ArrowToProtobuf(DataTypePtr type, types::ExtGandivaType* gandiva_data_type) break; case arrow::Type::LIST: { gandiva_data_type->set_type(types::GandivaType::LIST); - //LR TODO make a helper function - std::cout << "LR TODO creating listtype" << std::endl; if (type->num_fields() <= 0) { break; } - std::cout << "LR TODO listtype id=" << type->fields()[0]->type()->id() << std::endl; - switch (type->fields()[0]->type()->id()) { - case arrow::Type::INT32: - gandiva_data_type->set_listtype(types::GandivaType::INT32); - break; - case arrow::Type::INT64: - gandiva_data_type->set_listtype(types::GandivaType::INT64); - break; - case arrow::Type::FLOAT: - gandiva_data_type->set_listtype(types::GandivaType::FLOAT); - break; - case arrow::Type::DOUBLE: - gandiva_data_type->set_listtype(types::GandivaType::DOUBLE); - break; - case arrow::Type::STRING: - gandiva_data_type->set_listtype(types::GandivaType::UTF8); - break; + if (type->fields()[0]->type()->id() != arrow::Type::LIST) { + types::ExtGandivaType gt; + ArrowToProtobuf(type->fields()[0]->type(), >); + gandiva_data_type->set_listtype(gt.type()); } break; } default: // un-supported types. test ensures that // when one of these are added build breaks. - //DCHECK(false); - //LR TODO - printf("LR Found unsupported type %d\n", type->id()); - fflush(stdout); + DCHECK(false); } } @@ -202,12 +184,6 @@ Java_org_apache_arrow_gandiva_evaluator_ExpressionRegistryJniHelper_getGandivaSu types::GandivaFunctions gandiva_functions; for (auto function = expr_registry.function_signature_begin(); function != expr_registry.function_signature_end(); function++) { - - //LR TODO - printf("LR getGandivaSupportedFunctions Functions: %s\n", (*function).base_name().c_str()); - printf("LR getGandivaSupportedFunctions Functions: %s\n", (*function).ToString().c_str()); - fflush(stdout); - types::FunctionSignature* function_signature = gandiva_functions.add_function(); function_signature->set_name((*function).base_name()); types::ExtGandivaType* return_type = function_signature->mutable_returntype(); diff --git a/java/gandiva/src/main/cpp/jni_common.cc b/java/gandiva/src/main/cpp/jni_common.cc index 4a4ccda035375..d85644d7831f3 100644 --- a/java/gandiva/src/main/cpp/jni_common.cc +++ b/java/gandiva/src/main/cpp/jni_common.cc @@ -253,7 +253,6 @@ DataTypePtr ProtoTypeToInterval(const types::ExtGandivaType& ext_type) { } DataTypePtr ProtoTypeToList(const types::ExtGandivaType& ext_type) { - std::cout << "LR TODO 2 checking a field type " << ext_type.type() << " and it has listType:" << ext_type.listtype() << std::endl; DataTypePtr childType = SimpleProtoTypeToDataType(ext_type.listtype()); return arrow::list(childType); } @@ -330,7 +329,6 @@ DataTypePtr ProtoTypeToDataType(const types::ExtGandivaType& ext_type) { DataTypePtr ProtoTypeToDataType(const types::Field& f) { const types::ExtGandivaType& ext_type = f.type(); - std::cout << "LR TODO checking a field type " << ext_type.type() << " and it has listType:" << ext_type.listtype() << std::endl; if (ext_type.type() == types::LIST) { if (f.children().size() > 0 && f.children()[0].type().type() != types::LIST) { DataTypePtr childType = ProtoTypeToDataType(f.children()[0].type()); @@ -689,7 +687,6 @@ Status make_record_batch_with_buf_addrs(SchemaPtr schema, int num_rows, auto type = field->type(); auto type_id = type->id(); -//num_rows = num_records or ?? if (type_id == arrow::Type::LIST) { if (buf_idx >= in_bufs_len) { @@ -888,7 +885,7 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_build status = Projector::Make(schema_ptr, expr_vector, mode, config, sec_cache, &projector); if (!status.ok()) { - ss << "Failed to make LLVM module [1]cdue to " << status.message() << "\n"; + ss << "Failed to make LLVM module due to " << status.message() << "\n"; releaseProjectorInput(schema_arr, schema_bytes, exprs_arr, exprs_bytes, env); goto err_out; } @@ -953,22 +950,12 @@ Status JavaResizableBuffer::Reserve(const int64_t new_capacity) { jlong ret_capacity = env_->GetLongField(ret, list_expander_ret_capacity_); jlong outer_valid_address = env_->GetLongField(ret, list_expander_outer_valid_address_); - std::cout << "Buffer expand: New capacity is " << new_capacity << - " vector id " << vector_idx_ << " expander method " << method_ << - " jexpander_ " << jexpander_ << " returned size is " << ret_capacity << - " and the original buffer ptr=" << reinterpret_cast(data_) << " and the new ptr=" << ret_address << std::endl; - data_ = reinterpret_cast(ret_address); capacity_ = ret_capacity; } else { jlong ret_address = env_->GetLongField(ret, vector_expander_ret_address_); jlong ret_capacity = env_->GetLongField(ret, vector_expander_ret_capacity_); - std::cout << "Buffer expand: New capacity is " << new_capacity << - " vector id " << vector_idx_ << " expander method " << method_ << - " jexpander_ " << jexpander_ << " returned size is " << ret_capacity << - " and the original buffer ptr=" << reinterpret_cast(data_) << " and the new ptr=" << ret_address << std::endl; - data_ = reinterpret_cast(ret_address); capacity_ = ret_capacity; } @@ -1135,17 +1122,12 @@ Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector( outBufJava->offsetBuffer = reinterpret_cast(out_bufs[1]); outBufJava->offsetCapacity = out_sizes[1]; outBufJava->validityBuffer = reinterpret_cast(out_bufs[2]); - //outBufJava->outerValidityBuffer = reinterpret_cast(out_bufs[0]); child_buffers.push_back(outBufJava); - //LR TODO - - std::cout << "LR Creating array for type: " << field->type()->ToString() << std::endl; std::shared_ptr dt2 = std::make_shared(); if (field->type()->id() == arrow::Type::LIST && field->type()->num_fields() > 0) { dt2 = field->type()->fields()[0]->type(); } - std::cout << "LR using sub type: " << dt2->ToString() << std::endl; auto array_data_child = arrow::ArrayData::Make(dt2, output_row_count, child_buffers); std::vector> kids; diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java index 80b61332e62e9..c870cf8f9ab8e 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java @@ -169,8 +169,6 @@ private static ArrowType getArrowTypeSimple(GandivaType type) { return new ArrowType.Null(); case GandivaType.DECIMAL_VALUE: return new ArrowType.Decimal(0, 0, 128); - case GandivaType.STRUCT_VALUE: - return new ArrowType.Struct(); case GandivaType.LIST_VALUE: return new ArrowType.List(); case GandivaType.FIXED_SIZE_BINARY_VALUE: diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java index fe82c25736aac..5485d46882336 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java @@ -368,7 +368,6 @@ private void evaluate(int numRows, List buffers, List buf outAddrs[idx] = valueVector.getValidityBuffer().memoryAddress(); outSizes[idx++] = valueVector.getValidityBuffer().capacity(); if (isVarWidth) { - logger.error("LR Projector.java evaluate isVarWidth setting buffer=" + idx); outAddrs[idx] = valueVector.getOffsetBuffer().memoryAddress(); outSizes[idx++] = valueVector.getOffsetBuffer().capacity(); hasVariableWidthColumns = true; @@ -408,7 +407,6 @@ private void evaluate(int numRows, List buffers, List buf for (ValueVector valueVector : outColumns) { if (valueVector instanceof ListVector) { - //LR TODO check if this is necessary. ((ListVector) valueVector).setLastSet(selectionVectorRecordCount - 1); } } diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java index 9e84bc6d05561..91bf5b633c590 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java @@ -309,7 +309,6 @@ public static GandivaTypes.ExtGandivaType arrowTypeToProtobuf(ArrowType arrowTyp break; } case Type.Struct_: { // 13 - ArrowTypeHelper.initArrowTypeStruct((ArrowType.Struct) arrowType, builder); break; } case Type.Union: { // 14 @@ -373,19 +372,15 @@ public static GandivaTypes.Field arrowFieldToProtobuf(Field field) throws Gandiv builder.setName(field.getName()); builder.setNullable(field.isNullable()); - //LR TODO ArrowType subType = null; if (field.getChildren().size() > 0 && field.getChildren().get(0) .getType().getTypeID().getFlatbufID() != Type.List) { - //builder.setListType(arrowTypeToProtobuf(f.getChildren().get(0).getType(), null)); subType = field.getChildren().get(0).getType(); } builder.setType(ArrowTypeHelper.arrowTypeToProtobuf(field.getType(), subType)); for (Field child : field.getChildren()) { - System.out.println("LR TODO arrowFieldToProtobuf child field id is " + child.getType().getTypeID() ); if (child.getType() != ArrowType.Null.INSTANCE) { - System.out.println("LR TODO adding child=" + child.getName() + " type=" + child.getType()); builder.addChildren(ArrowTypeHelper.arrowFieldToProtobuf(child)); } } diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java index 14d6286a3282c..0097e2236fa07 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java @@ -67,7 +67,6 @@ class FunctionNode implements TreeNode { public GandivaTypes.TreeNode toProtobuf() throws GandivaException { GandivaTypes.FunctionNode.Builder fnNode = GandivaTypes.FunctionNode.newBuilder(); fnNode.setFunctionName(function); - System.out.println("LR TODO retType, retListType)=" + retType + "==" + retListType); fnNode.setReturnType(ArrowTypeHelper.arrowTypeToProtobuf(retType, retListType)); for (TreeNode arg : children) { diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java index f9f2a4cd775b3..a020dcda38091 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java @@ -94,7 +94,6 @@ public static TreeNode makeField(Field field) { public static TreeNode makeFunction(String function, List children, ArrowType retType) { - System.out.println("LR TODO TreeNode makeFunction Type"); StackTraceElement[] elements = Thread.currentThread().getStackTrace(); for (int i = 1; i < elements.length; i++) { StackTraceElement s = elements[i]; @@ -116,13 +115,6 @@ public static TreeNode makeFunction(String function, public static TreeNode makeFunction(String function, List children, ArrowType retType, ArrowType listType) { - System.out.println("LR TODO TreeNode makeFunction Type2"); - StackTraceElement[] elements = Thread.currentThread().getStackTrace(); - for (int i = 1; i < elements.length; i++) { - StackTraceElement s = elements[i]; - System.out.println("\tat " + s.getClassName() + "." + s.getMethodName() + - "(" + s.getFileName() + ":" + s.getLineNumber() + ")"); - } return new FunctionNode(function, children, retType, listType); } @@ -137,13 +129,6 @@ public static TreeNode makeFunction(String function, public static TreeNode makeFunction(String function, List children, Field retType) { - System.out.println("LR TODO TreeNode makeFunction Field"); - StackTraceElement[] elements = Thread.currentThread().getStackTrace(); - for (int i = 1; i < elements.length; i++) { - StackTraceElement s = elements[i]; - System.out.println("\tat " + s.getClassName() + "." + s.getMethodName() + - "(" + s.getFileName() + ":" + s.getLineNumber() + ")"); - } return new FunctionNode(function, children, retType); } diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java index df0fd8639b231..8dd759ee885d2 100644 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java +++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java @@ -48,7 +48,6 @@ import org.apache.arrow.vector.IntervalYearVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.holders.NullableIntervalDayHolder; import org.apache.arrow.vector.holders.NullableIntervalYearHolder; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; @@ -58,7 +57,6 @@ import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Assert; import org.junit.Ignore; @@ -290,54 +288,6 @@ public void testEvaluate() throws GandivaException, Exception { eval.close(); } - @Test - public void testEvaluateArray() throws GandivaException, Exception { - ArrowType int32 = new ArrowType.Int(32, true); - ArrowType listInt32 = new ArrowType.List(); - - Field a = Field.nullable("a", int32); - List args = Lists.newArrayList(a); - - Field retType = Field.nullable("c", listInt32); - ExpressionTree root = TreeBuilder.makeExpression("array_makeGandiva", args, retType); - - List exprs = Lists.newArrayList(root); - - Schema schema = new Schema(args); - Projector eval = Projector.make(schema, exprs); - - int numRows = 16; - byte[] validity = new byte[]{(byte) 255, 0}; - // second half is "undefined" - int[] aValues = new int[]{1, 2, 3, 42, 5}; - - - ArrowBuf validitya = buf(validity); - ArrowBuf valuesa = intBuf(aValues); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 5)), - Lists.newArrayList(validitya, valuesa)); - - FieldType ft = new FieldType(true, int32, null); - ListVector intVector = new ListVector("result", allocator, ft, null); - //ListVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(intVector); - eval.evaluate(batch, output); - - System.out.println(intVector.getDataVector()); - - - - // free buffers - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - @Test public void testEvaluateDivZero() throws GandivaException, Exception { Field a = Field.nullable("a", int32);