Skip to content

Commit

Permalink
add test
Browse files Browse the repository at this point in the history
  • Loading branch information
R-JunmingChen committed Nov 12, 2023
1 parent 66e1c1f commit f6e8958
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 8 deletions.
15 changes: 7 additions & 8 deletions cpp/src/arrow/array/array_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -212,12 +212,12 @@ Result<std::shared_ptr<ArrayData>> TransposeDictIndices(
return out_data;
}

struct CompactDictionaryNullValuesVistor {
struct CountDictionaryNullValuesVistor {
const std::shared_ptr<ArrayData>& data;
int64_t& out_null_count;

template <typename IndexArrowType>
Status CompactDictionaryNullValuesImpl() {
Status CountDictionaryNullValuesImpl() {
int64_t index_length = data->length;
int64_t dict_length = data->dictionary->length;
const uint8_t* dictionary_null_bit_map = data->dictionary->GetValues<uint8_t>(0);
Expand Down Expand Up @@ -245,18 +245,18 @@ struct CompactDictionaryNullValuesVistor {

template <typename Type>
enable_if_integer<Type, Status> Visit(const Type&) {
return CompactDictionaryNullValuesImpl<Type>();
return CountDictionaryNullValuesImpl<Type>();
}

Status Visit(const DataType& type) {
return Status::TypeError("Expected an Index Type of Int or UInt");
}
};

Result<int64_t> CompactDictionaryNullValues(const std::shared_ptr<ArrayData>& data) {
Result<int64_t> CountDictionaryNullValues(const std::shared_ptr<ArrayData>& data) {
int64_t out_null_count = 0;
const auto& dict_type = checked_cast<const DictionaryType&>(*data->type);
CompactDictionaryNullValuesVistor vistor{data, out_null_count};
CountDictionaryNullValuesVistor vistor{data, out_null_count};
RETURN_NOT_OK(VisitTypeInline(*dict_type.index_type(), &vistor));

return out_null_count;
Expand Down Expand Up @@ -374,12 +374,11 @@ Result<std::shared_ptr<Array>> DictionaryArray::Transpose(
}

Result<int64_t> DictionaryArray::CountNullValues() const {
if (this->dictionary()->null_count() == 0 || this->indices()->null_count() == 0) {
if (this->dictionary()->null_count() == 0 || this->indices()->length() == 0) {
return this->indices()->null_count();
}

ARROW_ASSIGN_OR_RAISE(int64_t dictionary_null_count,
CompactDictionaryNullValues(data_));
ARROW_ASSIGN_OR_RAISE(int64_t dictionary_null_count, CountDictionaryNullValues(data_));
return dictionary_null_count + this->indices()->null_count();
}

Expand Down
41 changes: 41 additions & 0 deletions cpp/src/arrow/array/array_dict_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1428,6 +1428,47 @@ TEST(TestDictionary, IndicesArray) {
ASSERT_OK(arr->indices()->ValidateFull());
}

void CheckDictionaryComputeNullValues(const std::shared_ptr<DataType>& dict_type,
const std::string& input_dictionary_json,
const std::string& input_index_json,
const int64_t& expected_null_count) {
auto input = DictArrayFromJSON(dict_type, input_index_json, input_dictionary_json);
const DictionaryArray& input_ref = checked_cast<const DictionaryArray&>(*input);

ASSERT_OK_AND_ASSIGN(int64_t actual, input_ref.CountNullValues());
ASSERT_EQ(expected_null_count, actual);
}

TEST(TestDictionary, ComputeNullValues) {
std::shared_ptr<arrow::DataType> type;
std::shared_ptr<arrow::DataType> dict_type;

for (const auto& index_type : all_dictionary_index_types()) {
ARROW_SCOPED_TRACE("index_type = ", index_type->ToString());

type = boolean();
dict_type = dictionary(index_type, type);

// no null value
CheckDictionaryComputeNullValues(dict_type, "[]", "[]", 0);
CheckDictionaryComputeNullValues(dict_type, "[true, false]", "[0, 1, 0]", 0);

// only indices contain null value
CheckDictionaryComputeNullValues(dict_type, "[true, false]", "[null, 0, 1]", 1);
CheckDictionaryComputeNullValues(dict_type, "[true, false]", "[null, null]", 2);

// only dictionary contains null value
CheckDictionaryComputeNullValues(dict_type, "[null, true]", "[]", 0);
CheckDictionaryComputeNullValues(dict_type, "[null, true, false]", "[0, 1, 0]", 2);

// both indices and dictionary contain null value
CheckDictionaryComputeNullValues(dict_type, "[null, true, false]", "[0, 1, 0, null]",
3);
CheckDictionaryComputeNullValues(dict_type, "[null, true, null, false]",
"[null, 1, 0, 2, 3]", 3);
}
}

void CheckDictionaryCompact(const std::shared_ptr<DataType>& dict_type,
const std::string& input_dictionary_json,
const std::string& input_index_json,
Expand Down

0 comments on commit f6e8958

Please sign in to comment.