From 7d746085eaee88b9ed7c38325f1e3456679040b5 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Wed, 25 Oct 2023 15:07:23 -0700 Subject: [PATCH] Cleanup, tested --- cpp/src/arrow/c/bridge.cc | 2 +- cpp/src/arrow/type.cc | 5 -- cpp/src/arrow/type.h | 5 +- cpp/src/arrow/type_fwd.h | 6 -- cpp/src/gandiva/array_ops.cc | 21 ------- cpp/src/gandiva/array_ops.h | 5 -- cpp/src/gandiva/expression_registry.cc | 8 +-- cpp/src/gandiva/function_registry_array.cc | 9 --- cpp/src/gandiva/function_registry_string.cc | 9 --- cpp/src/gandiva/llvm_types.cc | 1 - cpp/src/gandiva/precompiled/string_ops.cc | 59 ------------------- cpp/src/gandiva/precompiled/types.h | 12 ---- .../main/cpp/expression_registry_helper.cc | 4 -- java/gandiva/src/main/cpp/jni_common.cc | 3 +- .../arrow/gandiva/evaluator/Projector.java | 5 -- 15 files changed, 7 insertions(+), 147 deletions(-) diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index 32dbc088a7118..85a5156d11db2 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -195,7 +195,7 @@ struct SchemaExporter { } Status ExportSchema(const Schema& schema) { - static const StructType dummy_struct_type = StructType(); + static const StructType dummy_struct_type({}); flags_ = 0; RETURN_NOT_OK(ExportFormat(dummy_struct_type)); diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 60b71cbb71df7..4804570bdf52f 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -856,10 +856,6 @@ StructType::StructType(const std::vector>& fields) children_ = fields; } -StructType::StructType() - : NestedType(Type::STRUCT) { -} - StructType::~StructType() {} std::string StructType::ToString() const { @@ -2531,7 +2527,6 @@ TYPE_FACTORY(float16, HalfFloatType) TYPE_FACTORY(float32, FloatType) TYPE_FACTORY(float64, DoubleType) TYPE_FACTORY(utf8, StringType) -TYPE_FACTORY(structType, StructType) TYPE_FACTORY(large_utf8, LargeStringType) TYPE_FACTORY(binary, BinaryType) TYPE_FACTORY(large_binary, LargeBinaryType) diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index ddeb45b721f89..29ac79037d508 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -1079,10 +1079,7 @@ class ARROW_EXPORT StructType : public NestedType { static constexpr const char* type_name() { return "struct"; } explicit StructType(const std::vector>& fields); - explicit StructType(); - StructType(const StructType& rhs) = delete; - StructType& operator=(const StructType& rhs) = delete; - + ~StructType() override; DataTypeLayout layout() const override { diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h index 450ed9a136d26..66fd6c75f0ddb 100644 --- a/cpp/src/arrow/type_fwd.h +++ b/cpp/src/arrow/type_fwd.h @@ -153,11 +153,6 @@ class FixedSizeListArray; class FixedSizeListBuilder; struct FixedSizeListScalar; -class StructType; -class StructArray; -class StructBuilder; -struct StructScalar; - class Decimal128; class Decimal256; class DecimalType; @@ -459,7 +454,6 @@ ARROW_EXPORT const std::shared_ptr& float32(); ARROW_EXPORT const std::shared_ptr& float64(); /// \brief Return a StringType instance ARROW_EXPORT const std::shared_ptr& utf8(); -ARROW_EXPORT const std::shared_ptr& structType(); /// \brief Return a LargeStringType instance ARROW_EXPORT const std::shared_ptr& large_utf8(); /// \brief Return a BinaryType instance diff --git a/cpp/src/gandiva/array_ops.cc b/cpp/src/gandiva/array_ops.cc index b9ff34b4a8a4f..b5d1a57e5fe5f 100644 --- a/cpp/src/gandiva/array_ops.cc +++ b/cpp/src/gandiva/array_ops.cc @@ -74,18 +74,6 @@ bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf, return false; } -//LR TODO -int32_t* array_int32_make_array(int64_t context_ptr, int32_t contains_data, int32_t* out_len) { - - int integers[] = { contains_data, 21, 3, contains_data, 5 }; - *out_len = 5;// * 4; - //length is number of items, but buffers must account for byte size. - uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, *out_len * 4); - memcpy(ret, integers, *out_len * 4); - - return reinterpret_cast(ret); -} - bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf, int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, int64_t contains_data, bool entry_validWhat, @@ -227,15 +215,6 @@ void ExportedArrayFunctions::AddMappings(Engine* engine) const { types->i1_type() /*return_type*/, args, reinterpret_cast(array_int64_contains_int64)); - - args = {types->i64_type(), // int64_t execution_context - types->i32_type(), // array item input - types->i32_ptr_type()}; // out array length - - engine->AddGlobalMappingForFunc("array_int32_make_array", - types->i32_ptr_type(), args, - reinterpret_cast(array_int32_make_array)); - args = {types->i64_type(), // int64_t execution_context types->i32_ptr_type(), // int8_t* input data ptr types->i32_type(), // int32_t input length diff --git a/cpp/src/gandiva/array_ops.h b/cpp/src/gandiva/array_ops.h index 8fdf957f3d22c..8d51b6e09f7f7 100644 --- a/cpp/src/gandiva/array_ops.h +++ b/cpp/src/gandiva/array_ops.h @@ -47,11 +47,6 @@ bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf, int64_t loop_var, int64_t validity_index_var, bool* valid_buf); -GANDIVA_EXPORT -int32_t* array_int32_make_array(int64_t context_ptr, - int32_t contains_data, - int32_t* out_len); - GANDIVA_EXPORT int32_t* array_int32_remove(int64_t context_ptr, const int32_t* entry_buf, int32_t entry_len, const int32_t* entry_validity, bool combined_row_validity, diff --git a/cpp/src/gandiva/expression_registry.cc b/cpp/src/gandiva/expression_registry.cc index 20be12548e0f9..12ac0d0b154e8 100644 --- a/cpp/src/gandiva/expression_registry.cc +++ b/cpp/src/gandiva/expression_registry.cc @@ -166,12 +166,12 @@ static void AddArrowTypesToVector(arrow::Type::type type, DataTypeVector& vector case arrow::Type::type::INTERVAL_DAY_TIME: vector.push_back(arrow::day_time_interval()); break; - case arrow::Type::type::STRUCT: - vector.push_back(arrow::struct_({field("lattitude", arrow::float64(), false), field("longitude", arrow::float64(), false)})); - break; case arrow::Type::type::LIST: - //vector.push_back(arrow::list(arrow::utf8())); vector.push_back(arrow::list(arrow::int32())); + vector.push_back(arrow::list(arrow::int64())); + vector.push_back(arrow::list(arrow::float32())); + vector.push_back(arrow::list(arrow::float64())); + vector.push_back(arrow::list(arrow::utf8())); break; default: // Unsupported types. test ensures that diff --git a/cpp/src/gandiva/function_registry_array.cc b/cpp/src/gandiva/function_registry_array.cc index f7c587a64b74d..439b275d0ace2 100644 --- a/cpp/src/gandiva/function_registry_array.cc +++ b/cpp/src/gandiva/function_registry_array.cc @@ -31,18 +31,9 @@ std::vector GetArrayFunctionRegistry() { NativeFunction("array_containsGandiva", {}, DataTypeVector{list(int32()), int32()}, boolean(), kResultNullInternal, "array_int32_contains_int32", NativeFunction::kNeedsContext), - NativeFunction("array_contains", {}, DataTypeVector{list(int32()), int32()}, - boolean(), kResultNullIfNull, "array_int32_contains_int32", - NativeFunction::kNeedsContext), - NativeFunction("array_makeGandiva", {}, DataTypeVector{int32()}, - list(int32()), kResultNullIfNull, "array_int32_make_array", - NativeFunction::kNeedsContext), NativeFunction("array_removeGandiva", {}, DataTypeVector{list(int32()), int32()}, list(int32()), kResultNullInternal, "array_int32_remove", NativeFunction::kNeedsContext), - /*NativeFunction("array_containsGandiva", {}, DataTypeVector{list(int64()), int64()}, - boolean(), kResultNullIfNull, "array_int64_contains_int64", - NativeFunction::kNeedsContext),*/ }; return array_fn_registry_; } diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index edb900e976c59..d93757b40cfd0 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -263,15 +263,6 @@ std::vector GetStringFunctionRegistry() { NativeFunction::kNeedsFunctionHolder | NativeFunction::kCanReturnErrors), - NativeFunction("st_geohash", {}, DataTypeVector{float64(), float64()}, - utf8(), kResultNullIfNull, "gdv_fn_geo_hash_encode_float64_float64", - NativeFunction::kNeedsContext), - - NativeFunction("st_fromgeohash", {}, DataTypeVector{utf8()}, - arrow::struct_({field("lattitude", arrow::float64(), false), field("longitude", arrow::float64(), false)}), kResultNullIfNull, "gdv_fn_geo_hash_decode_utf8", - //arrow::structType(), kResultNullIfNull, "gdv_fn_geo_hash_decode_utf8", - NativeFunction::kNeedsContext), - NativeFunction("concatOperator", {}, DataTypeVector{utf8(), utf8()}, utf8(), kResultNullIfNull, "concatOperator_utf8_utf8", NativeFunction::kNeedsContext), diff --git a/cpp/src/gandiva/llvm_types.cc b/cpp/src/gandiva/llvm_types.cc index 68be62816f60e..3eb49f39037f6 100644 --- a/cpp/src/gandiva/llvm_types.cc +++ b/cpp/src/gandiva/llvm_types.cc @@ -42,7 +42,6 @@ LLVMTypes::LLVMTypes(llvm::LLVMContext& context) : context_(context) { {arrow::Type::type::BINARY, i8_ptr_type()}, {arrow::Type::type::DECIMAL, i128_type()}, {arrow::Type::type::INTERVAL_MONTHS, i32_type()}, - {arrow::Type::type::STRUCT, struct_type()}, {arrow::Type::type::INTERVAL_DAY_TIME, i64_type()}, {arrow::Type::type::LIST, list_type()}}; } diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc index 9c4458ea1b705..c255b9a11c084 100644 --- a/cpp/src/gandiva/precompiled/string_ops.cc +++ b/cpp/src/gandiva/precompiled/string_ops.cc @@ -827,65 +827,6 @@ const char* substr_utf8_int64(gdv_int64 context, const char* input, gdv_int32 in return substr_utf8_int64_int64(context, input, in_len, offset64, in_len, out_len); } -FORCE_INLINE -const char* gdv_fn_geo_hash_encode_float64_float64(gdv_int64 context, gdv_float64 lat, gdv_float64 lon, - gdv_int32* out_len) { - //if (repeat_number == 0 || in_len <= 0) { - // *out_len = 0; - // return ""; - //} - - - //Gandiva-blarg - *out_len = 14; - char* ret = reinterpret_cast(gdv_fn_context_arena_malloc(context, *out_len)); - if (ret == nullptr) { - gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); - *out_len = 0; - return ""; - } - - std::string out_string = "Gandiva-blarg"; - memcpy(ret, out_string.c_str(), *out_len); - return ret; -} - -FORCE_INLINE -const gdv_struct gdv_fn_geo_hash_decode_utf8(gdv_int64 context, const char* input, gdv_int32 in_len) { - //gdv_struct* ret = reinterpret_cast(gdv_fn_context_arena_malloc(context, sizeof(gdv_struct))); - gdv_struct ret; - ret.lattitude = 42; - ret.longitude = 142; - return ret; - - //if (repeat_number == 0 || in_len <= 0) { - // *out_len = 0; - // return ""; - //} - - /*auto s = arrow::struct_({field("a", arrow::int32(), false), field("b", arrow::int32(), false)}); - - MemoryPool* pool_ = default_memory_pool(); - std::unique_ptr tmp; - MakeBuilder(pool_, s, &tmp); - - - -//std::vector list_lengths = {42, 43}; -//std::vector list_offsets = {142, 143}; -//410 ListBuilder* list_vb = checked_cast(builder_->field_builder(0)); - Int32Builder* int_vb = checked_cast(builder_->field_builder(0)); - Int32Builder* int_vb2 = checked_cast(builder_->field_builder(1)); -//420 ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(), -//421 list_is_valid.data())); - - int_vb->UnsafeAppend(42); - int_vb->UnsafeAppend(43); - int_vb2->UnsafeAppend(142); - int_vb2->UnsafeAppend(143); -*/ -} - FORCE_INLINE const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len, gdv_int32 repeat_number, gdv_int32* out_len) { diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h index 15fba4867e650..117b27b2808dd 100644 --- a/cpp/src/gandiva/precompiled/types.h +++ b/cpp/src/gandiva/precompiled/types.h @@ -43,13 +43,6 @@ using gdv_utf8 = char*; using gdv_binary = char*; using gdv_day_time_interval = int64_t; -struct GeoStruct { - double lattitude; - double longitude; -}; - -using gdv_struct = GeoStruct; - #ifdef GANDIVA_UNIT_TEST // unit tests may be compiled without O2, so inlining may not happen. #define FORCE_INLINE @@ -473,11 +466,6 @@ gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale); const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len, gdv_int32 repeat_times, gdv_int32* out_len); -const char* gdv_fn_geo_hash_encode_float64_float64(gdv_int64 context, gdv_float64 lat, gdv_float64 lon, - gdv_int32* out_len); - -const gdv_struct gdv_fn_geo_hash_decode_utf8(gdv_int64 context, const char* input, gdv_int32 in_len); - const char* substr_utf8_int64_int64(gdv_int64 context, const char* input, gdv_int32 in_len, gdv_int64 offset64, gdv_int64 length, gdv_int32* out_len); diff --git a/java/gandiva/src/main/cpp/expression_registry_helper.cc b/java/gandiva/src/main/cpp/expression_registry_helper.cc index c74a1b7271788..9c135ea8065d4 100644 --- a/java/gandiva/src/main/cpp/expression_registry_helper.cc +++ b/java/gandiva/src/main/cpp/expression_registry_helper.cc @@ -136,10 +136,6 @@ void ArrowToProtobuf(DataTypePtr type, types::ExtGandivaType* gandiva_data_type) gandiva_data_type->set_type(types::GandivaType::INTERVAL); gandiva_data_type->set_intervaltype(types::IntervalType::DAY_TIME); break; - //LR TODO - case arrow::Type::STRUCT: - gandiva_data_type->set_type(types::GandivaType::STRUCT); - break; case arrow::Type::LIST: gandiva_data_type->set_type(types::GandivaType::LIST); break; diff --git a/java/gandiva/src/main/cpp/jni_common.cc b/java/gandiva/src/main/cpp/jni_common.cc index 41b2593d501cd..e6852e2198ec7 100644 --- a/java/gandiva/src/main/cpp/jni_common.cc +++ b/java/gandiva/src/main/cpp/jni_common.cc @@ -297,9 +297,8 @@ DataTypePtr ProtoTypeToDataType(const types::ExtGandivaType& ext_type) { return ProtoTypeToTimestamp(ext_type); case types::INTERVAL: return ProtoTypeToInterval(ext_type); - case types::STRUCT: - return arrow::struct_({field("lattitude", arrow::float64(), false), field("longitude", arrow::float64(), false)}); case types::LIST: + //LR TODO return arrow::list(arrow::int32()); //return arrow::list(arrow::utf8()); case types::FIXED_SIZE_BINARY: diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java index 7d677927f0ced..fe82c25736aac 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java @@ -31,7 +31,6 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VariableWidthVector; import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.ipc.message.ArrowBuffer; import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.arrow.vector.types.pojo.Schema; @@ -377,10 +376,6 @@ private void evaluate(int numRows, List buffers, List buf // save vector to allow for resizing. resizableVectors[outColumnIdx] = (BaseVariableWidthVector) valueVector; } - if (valueVector instanceof StructVector) { - outAddrs[idx] = ((StructVector) valueVector).getChild("lattitude").getDataBuffer().memoryAddress(); - outSizes[idx++] = ((StructVector) valueVector).getChild("lattitude").getDataBuffer().capacity(); - } if (valueVector instanceof ListVector) { hasVariableWidthColumns = true; resizableListVectors[outColumnIdx] = (ListVector) valueVector;