diff --git a/cpp/src/arrow/array/array_dict.cc b/cpp/src/arrow/array/array_dict.cc index 0b714ddc2b9ee..ee0d14c24b41b 100644 --- a/cpp/src/arrow/array/array_dict.cc +++ b/cpp/src/arrow/array/array_dict.cc @@ -212,12 +212,12 @@ Result> TransposeDictIndices( return out_data; } -struct CompactDictionaryNullValuesVistor { +struct CountDictionaryNullValuesVistor { const std::shared_ptr& data; int64_t& out_null_count; template - Status CompactDictionaryNullValuesImpl() { + Status CountDictionaryNullValuesImpl() { int64_t index_length = data->length; int64_t dict_length = data->dictionary->length; const uint8_t* dictionary_null_bit_map = data->dictionary->GetValues(0); @@ -245,7 +245,7 @@ struct CompactDictionaryNullValuesVistor { template enable_if_integer Visit(const Type&) { - return CompactDictionaryNullValuesImpl(); + return CountDictionaryNullValuesImpl(); } Status Visit(const DataType& type) { @@ -253,10 +253,10 @@ struct CompactDictionaryNullValuesVistor { } }; -Result CompactDictionaryNullValues(const std::shared_ptr& data) { +Result CountDictionaryNullValues(const std::shared_ptr& data) { int64_t out_null_count = 0; const auto& dict_type = checked_cast(*data->type); - CompactDictionaryNullValuesVistor vistor{data, out_null_count}; + CountDictionaryNullValuesVistor vistor{data, out_null_count}; RETURN_NOT_OK(VisitTypeInline(*dict_type.index_type(), &vistor)); return out_null_count; @@ -374,12 +374,11 @@ Result> DictionaryArray::Transpose( } Result DictionaryArray::CountNullValues() const { - if (this->dictionary()->null_count() == 0 || this->indices()->null_count() == 0) { + if (this->dictionary()->null_count() == 0 || this->indices()->length() == 0) { return this->indices()->null_count(); } - ARROW_ASSIGN_OR_RAISE(int64_t dictionary_null_count, - CompactDictionaryNullValues(data_)); + ARROW_ASSIGN_OR_RAISE(int64_t dictionary_null_count, CountDictionaryNullValues(data_)); return dictionary_null_count + this->indices()->null_count(); } diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 2f3ee6e2d49a5..87821a52c7637 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1428,6 +1428,47 @@ TEST(TestDictionary, IndicesArray) { ASSERT_OK(arr->indices()->ValidateFull()); } +void CheckDictionaryComputeNullValues(const std::shared_ptr& dict_type, + const std::string& input_dictionary_json, + const std::string& input_index_json, + const int64_t& expected_null_count) { + auto input = DictArrayFromJSON(dict_type, input_index_json, input_dictionary_json); + const DictionaryArray& input_ref = checked_cast(*input); + + ASSERT_OK_AND_ASSIGN(int64_t actual, input_ref.CountNullValues()); + ASSERT_EQ(expected_null_count, actual); +} + +TEST(TestDictionary, ComputeNullValues) { + std::shared_ptr type; + std::shared_ptr dict_type; + + for (const auto& index_type : all_dictionary_index_types()) { + ARROW_SCOPED_TRACE("index_type = ", index_type->ToString()); + + type = boolean(); + dict_type = dictionary(index_type, type); + + // no null value + CheckDictionaryComputeNullValues(dict_type, "[]", "[]", 0); + CheckDictionaryComputeNullValues(dict_type, "[true, false]", "[0, 1, 0]", 0); + + // only indices contain null value + CheckDictionaryComputeNullValues(dict_type, "[true, false]", "[null, 0, 1]", 1); + CheckDictionaryComputeNullValues(dict_type, "[true, false]", "[null, null]", 2); + + // only dictionary contains null value + CheckDictionaryComputeNullValues(dict_type, "[null, true]", "[]", 0); + CheckDictionaryComputeNullValues(dict_type, "[null, true, false]", "[0, 1, 0]", 2); + + // both indices and dictionary contain null value + CheckDictionaryComputeNullValues(dict_type, "[null, true, false]", "[0, 1, 0, null]", + 3); + CheckDictionaryComputeNullValues(dict_type, "[null, true, null, false]", + "[null, 1, 0, 2, 3]", 3); + } +} + void CheckDictionaryCompact(const std::shared_ptr& dict_type, const std::string& input_dictionary_json, const std::string& input_index_json,