Skip to content

Commit

Permalink
Removed logging and testing bigger size
Browse files Browse the repository at this point in the history
  • Loading branch information
lriggs committed Sep 12, 2023
1 parent fafb35a commit 8aaa30b
Show file tree
Hide file tree
Showing 11 changed files with 207 additions and 179 deletions.
46 changes: 28 additions & 18 deletions cpp/src/gandiva/annotator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ FieldDescriptorPtr Annotator::MakeDesc(FieldPtr field, bool is_output) {
}

if (field->type()->id() == arrow::Type::LIST) {
std::cout << "LR Annotator::MakeDesc 1" << std::endl;
//std::cout << "LR Annotator::MakeDesc 1" << std::endl;
offsets_idx = buffer_count_++;
if (arrow::is_binary_like(field->type()->field(0)->type()->id())) {
child_offsets_idx = buffer_count_++;
Expand Down Expand Up @@ -81,37 +81,42 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc,
// The validity buffer is optional. Use nullptr if it does not have one.
if (array_data.buffers[buffer_idx]) {
uint8_t* validity_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -6 " << &validity_buf << std::endl;
eval_batch->SetBuffer(desc.validity_idx(), validity_buf, array_data.offset);
} else {
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -5 null " << std::endl;
eval_batch->SetBuffer(desc.validity_idx(), nullptr, array_data.offset);
}
++buffer_idx;

if (desc.HasOffsetsIdx()) {
uint8_t* offsets_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -4 " << &offsets_buf << std::endl;
eval_batch->SetBuffer(desc.offsets_idx(), offsets_buf, array_data.offset);

if (desc.HasChildOffsetsIdx()) {
std::cout << "LR Annotator::PrepareBuffersForField 1 for field " << desc.Name() << " type is " << array_data.type->id() << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 1 for field " << desc.Name() << " type is " << array_data.type->id() << std::endl;
if (is_output) {
// if list field is output field, we should put buffer pointer into eval batch
// for resizing
uint8_t* child_offsets_buf = reinterpret_cast<uint8_t*>(
array_data.child_data.at(0)->buffers[buffer_idx].get());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -3 " << &child_offsets_buf << std::endl;
eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_offsets_buf,
array_data.child_data.at(0)->offset);
} else {
std::cout << "LR Annotator::PrepareBuffersForField 2" << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 2" << std::endl;
// if list field is input field, just put buffer data into eval batch
uint8_t* child_offsets_buf = const_cast<uint8_t*>(
array_data.child_data.at(0)->buffers[buffer_idx]->data());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -2 " << &child_offsets_buf << std::endl;
eval_batch->SetBuffer(desc.child_data_offsets_idx(), child_offsets_buf,
array_data.child_data.at(0)->offset);
}
}
if (array_data.type->id() != arrow::Type::LIST ||
arrow::is_binary_like(array_data.type->field(0)->type()->id())) {
std::cout << "LR Annotator::PrepareBuffersForField 3" << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 3" << std::endl;

// primitive type list data buffer index is 1
// binary like type list data buffer index is 2
Expand All @@ -120,21 +125,23 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc,
}

if (array_data.type->id() != arrow::Type::LIST) {
std::cout << "LR Annotator::PrepareBuffersForField 4" << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 4" << std::endl;

std::cout << "LR Annotator::PrepareBuffersForField 4 buffer_idx " << buffer_idx << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 4 buffer_idx " << buffer_idx << std::endl;
uint8_t* data_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
std::cout << "LR Annotator::PrepareBuffersForField 4a" << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 4a" << std::endl;
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer -1 " << &data_buf << std::endl;
eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.offset);
std::cout << "LR Annotator::PrepareBuffersForField 4b" << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 4b" << std::endl;
} else {
std::cout << "LR Annotator::PrepareBuffersForField 5 " << desc.Name() << " buffer_idx " << buffer_idx << std::endl;
std::cout << "LR Annotator::PrepareBuffersForField 5 array_data child size " << array_data.child_data.size() << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 5 " << desc.Name() << " buffer_idx " << buffer_idx << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 5 array_data child size " << array_data.child_data.size() << std::endl;

uint8_t* data_buf =
const_cast<uint8_t*>(array_data.child_data.at(0)->buffers[buffer_idx]->data());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer 0 " << &data_buf << std::endl;
eval_batch->SetBuffer(desc.data_idx(), data_buf, array_data.child_data.at(0)->offset);
std::cout << "LR Annotator::PrepareBuffersForField 5a" << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField 5a" << std::endl;
}

if (is_output) {
Expand All @@ -143,13 +150,16 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc,
if (array_data.type->id() != arrow::Type::LIST) {
uint8_t* data_buf_ptr =
reinterpret_cast<uint8_t*>(array_data.buffers[buffer_idx].get());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer 1 " << &data_buf_ptr << std::endl;
eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr, array_data.offset);
} else {
std::cout << "LR Annotator::PrepareBuffersForField is_output index " << desc.data_buffer_ptr_idx() << std::endl;
//std::cout << "LR Annotator::PrepareBuffersForField is_output index " << desc.data_buffer_ptr_idx() << std::endl;

// list data buffer is in child data buffer
uint8_t* data_buf_ptr = reinterpret_cast<uint8_t*>(
array_data.child_data.at(0)->buffers[buffer_idx].get());
std::cout << "LR Annotator::PrepareBuffersForField setting eval buffer 2 " << &data_buf_ptr << std::endl;

eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr,
array_data.child_data.at(0)->offset);
}
Expand All @@ -162,7 +172,7 @@ EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch,
EvalBatchPtr eval_batch = std::make_shared<EvalBatch>(
record_batch.num_rows(), buffer_count_, local_bitmap_count_);

std::cout << "LR PrepareEvalBatch 1" << std::endl;
//std::cout << "LR PrepareEvalBatch 1" << std::endl;
// Fill in the entries for the input fields.
for (int i = 0; i < record_batch.num_columns(); ++i) {
const std::string& name = record_batch.column_name(i);
Expand All @@ -172,27 +182,27 @@ EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch,
continue;
}

std::cout << "LR PrepareEvalBatch 1a i=" << i << " record batch schema " << record_batch.schema()->ToString()
/*std::cout << "LR PrepareEvalBatch 1a i=" << i << " record batch schema " << record_batch.schema()->ToString()
<< " num rows " << record_batch.num_rows()
<< " num columns " << record_batch.num_columns()
<< " data size " << record_batch.column_data().size()
<< " col 1 " << record_batch.column(0)->ToString()
<< std::endl;
<< std::endl;*/

std::cout << "LR PrepareEvalBatch 1a i=" << i << " record batch data " << record_batch.ToString() << std::endl;
//std::cout << "LR PrepareEvalBatch 1a i=" << i << " record batch data " << record_batch.ToString() << std::endl;
PrepareBuffersForField(*(found->second), *(record_batch.column_data(i)),
eval_batch.get(), false /*is_output*/);
}

// Fill in the entries for the output fields.
std::cout << "LR PrepareEvalBatch preparing output fields" << std::endl;
//std::cout << "LR PrepareEvalBatch preparing output fields" << std::endl;
int idx = 0;
for (auto& arraydata : out_vector) {
const FieldDescriptorPtr& desc = out_descs_.at(idx);
PrepareBuffersForField(*desc, *arraydata, eval_batch.get(), true /*is_output*/);
++idx;
}
std::cout << "LR PrepareEvalBatch 2" << std::endl;
//std::cout << "LR PrepareEvalBatch 2" << std::endl;
return eval_batch;
}

Expand Down
24 changes: 12 additions & 12 deletions cpp/src/gandiva/array_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ bool array_utf8_contains_utf8(int64_t context_ptr, const char* entry_buf,
bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf,
int32_t entry_offsets_len,
int32_t contains_data) {
std::cout << "LR array_int32_contains_int32 offset length=" << entry_offsets_len << std::endl;
//std::cout << "LR array_int32_contains_int32 offset length=" << entry_offsets_len << std::endl;
for (int i = 0; i < entry_offsets_len; i++) {
std::cout << "LR going to check " << entry_buf + i << std::endl;
//std::cout << "LR going to check " << entry_buf + i << std::endl;
//LR TODO
//int32_t entry_len = *(entry_buf + i);
//coming as int64 for some reason. *2
int32_t entry_len = *(entry_buf + (i * 2));
std::cout << "LR checking value " << entry_len << " against target " << contains_data << std::endl;
//std::cout << "LR checking value " << entry_len << " against target " << contains_data << std::endl;
if (entry_len == contains_data) {
return true;
}
Expand All @@ -68,11 +68,11 @@ bool array_int32_contains_int32(int64_t context_ptr, const int32_t* entry_buf,
bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf,
int32_t entry_offsets_len,
int64_t contains_data) {
std::cout << "LR array_int64_contains_int64 offset length=" << entry_offsets_len << std::endl;
//std::cout << "LR array_int64_contains_int64 offset length=" << entry_offsets_len << std::endl;
for (int i = 0; i < entry_offsets_len; i++) {
std::cout << "LR going to check " << entry_buf + i << std::endl;
//std::cout << "LR going to check " << entry_buf + i << std::endl;
int64_t entry_len = *(entry_buf + (i*2)); //LR TODO sizeof int64?
std::cout << "LR checking value " << entry_len << " against target " << contains_data << std::endl;
//std::cout << "LR checking value " << entry_len << " against target " << contains_data << std::endl;
if (entry_len == contains_data) {
return true;
}
Expand All @@ -82,14 +82,14 @@ bool array_int64_contains_int64(int64_t context_ptr, const int64_t* entry_buf,


int32_t* array_int32_make_array(int64_t context_ptr, int32_t contains_data, int32_t* out_len) {
std::cout << "LR array_int32_make_array offset data=" << contains_data << std::endl;
//std::cout << "LR array_int32_make_array offset data=" << contains_data << std::endl;

int integers[] = { contains_data, 21, 3, contains_data, 5 };
*out_len = 5;// * 4;
//length is number of items, but buffers must account for byte size.
uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, *out_len * 4);
memcpy(ret, integers, *out_len * 4);
std::cout << "LR made a buffer length" << *out_len * 4 << " item 3 is = " << int32_t(ret[3*4]) << std::endl;
//std::cout << "LR made a buffer length" << *out_len * 4 << " item 3 is = " << int32_t(ret[3*4]) << std::endl;


//return reinterpret_cast<int32_t*>(ret);
Expand All @@ -98,15 +98,15 @@ int32_t* array_int32_make_array(int64_t context_ptr, int32_t contains_data, int3

int32_t* array_int32_remove(int64_t context_ptr, const int32_t* entry_buf,
int32_t entry_offsets_len, int32_t remove_data, int32_t* out_len) {
std::cout << "LR array_int32_remove offset data=" << remove_data << std::endl;
//std::cout << "LR array_int32_remove offset data=" << remove_data << std::endl;

//LR sizes are HACK
int* integers = new int[5];
int j = 0;
for (int i = 0; i < entry_offsets_len; i++) {
std::cout << "LR going to check " << entry_buf + i << std::endl;
//std::cout << "LR going to check " << entry_buf + i << std::endl;
int32_t entry_len = *(entry_buf + (i * 2));
std::cout << "LR checking value " << entry_len << " against target " << remove_data << std::endl;
//std::cout << "LR checking value " << entry_len << " against target " << remove_data << std::endl;
if (entry_len == remove_data) {
continue;
} else {
Expand All @@ -118,7 +118,7 @@ int32_t* array_int32_remove(int64_t context_ptr, const int32_t* entry_buf,
//length is number of items, but buffers must account for byte size.
uint8_t* ret = gdv_fn_context_arena_malloc(context_ptr, *out_len * 4);
memcpy(ret, integers, *out_len * 4);
std::cout << "LR made a buffer length" << *out_len * 4 << " item 3 is = " << int32_t(ret[3*4]) << std::endl;
//std::cout << "LR made a buffer length" << *out_len * 4 << " item 3 is = " << int32_t(ret[3*4]) << std::endl;

delete [] integers;
//return reinterpret_cast<int32_t*>(ret);
Expand Down
17 changes: 10 additions & 7 deletions cpp/src/gandiva/expr_decomposer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,28 @@ namespace gandiva {
Status ExprDecomposer::Visit(const FieldNode& node) {
auto desc = annotator_.CheckAndAddInputFieldDescriptor(node.field());

std::cout << "LR ExprDecomposer" << std::endl;
//std::cout << "LR ExprDecomposer" << std::endl;
DexPtr validity_dex = std::make_shared<VectorReadValidityDex>(desc);
DexPtr value_dex;
if (desc->HasChildOffsetsIdx()) {
std::cout << "LR ExprDecomposer 1" << std::endl;
//std::cout << "LR ExprDecomposer 1" << std::endl;
// handle list<binary> type
value_dex = std::make_shared<VectorReadVarLenValueListDex>(desc);
} else if (desc->HasOffsetsIdx()) {
std::cout << "LR ExprDecomposer 2" << std::endl;
//std::cout << "LR ExprDecomposer 2" << std::endl;
if (desc->field()->type()->id() == arrow::Type::LIST) {
// handle list<primitive> type
std::cout << "LR ExprDecomposer 3" << std::endl;
value_dex = std::make_shared<VectorReadFixedLenValueListDex>(desc);
//std::cout << "LR ExprDecomposer 3" << std::endl;
auto p = std::make_shared<VectorReadFixedLenValueListDex>(desc);
value_dex = p;
int v = p->DataIdx();
//std::cout << "LR primitive list type " v << " " <<
} else {
std::cout << "LR ExprDecomposer 4" << std::endl;
//std::cout << "LR ExprDecomposer 4" << std::endl;
value_dex = std::make_shared<VectorReadVarLenValueDex>(desc);
}
} else {
std::cout << "LR ExprDecomposer 5" << std::endl;
//std::cout << "LR ExprDecomposer 5" << std::endl;
value_dex = std::make_shared<VectorReadFixedLenValueDex>(desc);
}
result_ = std::make_shared<ValueValidityPair>(validity_dex, value_dex);
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/gandiva/function_registry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ SignatureMap FunctionRegistry::InitPCMap() {
pc_registry_.insert(std::end(pc_registry_), v7.begin(), v7.end());

for (auto& elem : pc_registry_) {
std::cout << "LR pc_registry_ item " << elem.pc_name() << " first signature name " << elem.signatures()[0].base_name() << std::endl;
//std::cout << "LR pc_registry_ item " << elem.pc_name() << " first signature name " << elem.signatures()[0].base_name() << std::endl;
for (auto& func_signature : elem.signatures()) {
std::cout << "LR Adding function to map " << func_signature.base_name() << std::endl;
//std::cout << "LR Adding function to map " << func_signature.base_name() << std::endl;
//std::cout << " LR args " << func_signature.param_types
map.insert(std::make_pair(&(func_signature), &elem));
}
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/gandiva/gdv_function_stubs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,18 +164,19 @@ int32_t gdv_fn_populate_varlen_vector(int64_t context_ptr, int8_t* data_ptr,
int32_t gdv_fn_populate_list_##TYPE##_vector(int64_t context_ptr, int8_t* data_ptr, \
int32_t* offsets, int64_t slot, \
TYPE* entry_buf, int32_t entry_len) { \
std::cout << "gdv_fn_populate 1" << std::endl; \
std::cout << "gdv_fn_populate 1 data_ptr is " << data_ptr << std::endl; \
auto buffer = reinterpret_cast<arrow::ResizableBuffer*>(data_ptr); \
int32_t offset = static_cast<int32_t>(buffer->size()); \
std::cout << "gdv_fn_populate 2 data_ptr" << data_ptr << " buffer " << buffer << \
" offset " << offset << " entry_len " << entry_len << " scale " << SCALE << std::endl; \
auto status = buffer->Resize(offset + entry_len * SCALE, false /*shrink*/); \
" offset " << offset << " entry_len " << entry_len << " scale " << SCALE << \
" want to resize to " << (offset + entry_len * SCALE) << std::endl; \
/*auto status = buffer->Resize(offset + entry_len * SCALE, false); \
if (!status.ok()) { \
gandiva::ExecutionContext* context = \
reinterpret_cast<gandiva::ExecutionContext*>(context_ptr); \
context->set_error_msg(status.message().c_str()); \
return -1; \
} \
} */ \
std::cout << "gdv_fn_populate resized buffer to =" << offset + entry_len * SCALE << std::endl; \
std::cout << "gdv_fn_populate copying bytes =" << entry_len * SCALE << std::endl; \
std::cout << "gdv_fn_populate buffer =" << buffer->ToString() << " offeset " << offset << std::endl; \
Expand Down
Loading

0 comments on commit 8aaa30b

Please sign in to comment.