From eba5044687372787c88ba800d580b2183eae3325 Mon Sep 17 00:00:00 2001 From: mwish Date: Tue, 24 Oct 2023 22:01:27 +0800 Subject: [PATCH] Prepare GH-38432 --- cpp/src/parquet/encoding.cc | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 5221f2588c0d3..8ad1112738e30 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -1205,16 +1205,22 @@ struct ArrowBinaryHelper { return Status::OK(); } - Status PrepareNextInput(int64_t next_value_length, - std::optional estimated_remaining_data_length = {}) { + Status PrepareNextInput(int64_t next_value_length) { + if (ARROW_PREDICT_FALSE(!CanFit(next_value_length))) { + // This element would exceed the capacity of a chunk + return PushChunk(); + } + return Status::OK(); + } + + Status PrepareNextInputWithEstimatedLength(int64_t next_value_length, + int64_t estimated_remaining_data_length) { if (ARROW_PREDICT_FALSE(!CanFit(next_value_length))) { // This element would exceed the capacity of a chunk RETURN_NOT_OK(PushChunk()); RETURN_NOT_OK(acc_->builder->Reserve(entries_remaining_)); - if (estimated_remaining_data_length.has_value()) { - RETURN_NOT_OK(acc_->builder->ReserveData( - std::min(*estimated_remaining_data_length, chunk_space_remaining_))); - } + RETURN_NOT_OK(acc_->builder->ReserveData( + std::min(estimated_remaining_data_length, chunk_space_remaining_))); } return Status::OK(); } @@ -1271,8 +1277,10 @@ struct ArrowBinaryHelper { return acc_->Reserve(entries_remaining_); } - Status PrepareNextInput(int64_t next_value_length, - std::optional estimated_remaining_data_length = {}) { + Status PrepareNextInput(int64_t next_value_length) { return Status::OK(); } + + Status PrepareNextInputWithEstimatedLength(int64_t next_value_length, + int64_t estimated_remaining_data_length) { return Status::OK(); } @@ -1421,7 +1429,7 @@ class PlainByteArrayDecoder : public PlainDecoder, if (ARROW_PREDICT_FALSE(len_ < increment)) { ParquetException::EofException(); } - RETURN_NOT_OK(helper.PrepareNextInput(value_len, len_)); + RETURN_NOT_OK(helper.PrepareNextInputWithEstimatedLength(value_len, len_)); helper.UnsafeAppend(data_ + 4, value_len); data_ += increment; len_ -= increment; @@ -1915,7 +1923,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl, int32_t indices[kBufferSize]; ArrowBinaryHelper helper(out, num_values); - RETURN_NOT_OK(helper.Prepare()); + // RETURN_NOT_OK(helper.Prepare()); auto dict_values = reinterpret_cast(dictionary_->data()); int values_decoded = 0; @@ -1983,7 +1991,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl, int values_decoded = 0; ArrowBinaryHelper helper(out, num_values); - RETURN_NOT_OK(helper.Prepare(len_)); + // RETURN_NOT_OK(helper.Prepare(len_)); auto dict_values = reinterpret_cast(dictionary_->data());