Skip to content

Commit

Permalink
Add stream parameter to external dict APIs (#14115)
Browse files Browse the repository at this point in the history
This PR adds stream parameter to public dictionary APIs, which include:

1. `cudf::dictionary::encode`
2. `cudf::dictionary::decode`
3. `cudf::dictionary::get_index`
4. `cudf::dictionary::add_keys`
5. `cudf::dictionary::remove_keys`
6. `cudf::dictionary::remove_unused_keys`
7. `cudf::dictionary::set_keys` 
8. `cudf::dictionary::match_dictionaries`

Reference [13744](#13744)

Authors:
  - Suraj Aralihalli (https://github.com/SurajAralihalli)
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #14115
  • Loading branch information
SurajAralihalli authored Sep 25, 2023
1 parent 1b925bf commit f3402c4
Show file tree
Hide file tree
Showing 12 changed files with 164 additions and 27 deletions.
6 changes: 5 additions & 1 deletion cpp/include/cudf/dictionary/encode.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,12 +53,14 @@ namespace dictionary {
*
* @param column The column to dictionary encode
* @param indices_type The integer type to use for the indices
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Returns a dictionary column
*/
std::unique_ptr<column> encode(
column_view const& column,
data_type indices_type = data_type{type_id::UINT32},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -72,11 +74,13 @@ std::unique_ptr<column> encode(
* @endcode
*
* @param dictionary_column Existing dictionary column
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column with type matching the dictionary_column's keys
*/
std::unique_ptr<column> decode(
dictionary_column_view const& dictionary_column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
6 changes: 4 additions & 2 deletions cpp/include/cudf/dictionary/search.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,12 +37,14 @@ namespace dictionary {
*
* @param dictionary The dictionary to search for the key.
* @param key The value to search for in the dictionary keyset.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned scalar's device memory.
* @return Numeric scalar index value of the key within the dictionary
* @return Numeric scalar index value of the key within the dictionary.
*/
std::unique_ptr<scalar> get_index(
dictionary_column_view const& dictionary,
scalar const& key,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
16 changes: 13 additions & 3 deletions cpp/include/cudf/dictionary/update_keys.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -51,13 +51,15 @@ namespace dictionary {
* @throw cudf_logic_error if the new_keys contain nulls.
*
* @param dictionary_column Existing dictionary column.
* @param new_keys New keys to incorporate into the dictionary_column
* @param new_keys New keys to incorporate into the dictionary_column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> add_keys(
dictionary_column_view const& dictionary_column,
column_view const& new_keys,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -81,13 +83,15 @@ std::unique_ptr<column> add_keys(
* @throw cudf_logic_error if the keys_to_remove contain nulls.
*
* @param dictionary_column Existing dictionary column.
* @param keys_to_remove The keys to remove from the dictionary_column
* @param keys_to_remove The keys to remove from the dictionary_column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> remove_keys(
dictionary_column_view const& dictionary_column,
column_view const& keys_to_remove,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -103,11 +107,13 @@ std::unique_ptr<column> remove_keys(
* @endcode
*
* @param dictionary_column Existing dictionary column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> remove_unused_keys(
dictionary_column_view const& dictionary_column,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -134,12 +140,14 @@ std::unique_ptr<column> remove_unused_keys(
*
* @param dictionary_column Existing dictionary column.
* @param keys New keys to use for the output column. Must not contain nulls.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> set_keys(
dictionary_column_view const& dictionary_column,
column_view const& keys,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -149,11 +157,13 @@ std::unique_ptr<column> set_keys(
* The result is a vector of new dictionaries with a common set of keys.
*
* @param input Dictionary columns to match keys.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary columns.
*/
std::vector<std::unique_ptr<column>> match_dictionaries(
cudf::host_span<dictionary_column_view const> input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
Expand Down
18 changes: 13 additions & 5 deletions cpp/include/cudf_test/column_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -944,8 +944,10 @@ class dictionary_column_wrapper : public detail::column_wrapper {
template <typename InputIterator>
dictionary_column_wrapper(InputIterator begin, InputIterator end) : column_wrapper{}
{
wrapped = cudf::dictionary::encode(
fixed_width_column_wrapper<KeyElementTo, SourceElementT>(begin, end));
wrapped =
cudf::dictionary::encode(fixed_width_column_wrapper<KeyElementTo, SourceElementT>(begin, end),
cudf::data_type{type_id::UINT32},
cudf::test::get_default_stream());
}

/**
Expand Down Expand Up @@ -978,7 +980,9 @@ class dictionary_column_wrapper : public detail::column_wrapper {
: column_wrapper{}
{
wrapped = cudf::dictionary::encode(
fixed_width_column_wrapper<KeyElementTo, SourceElementT>(begin, end, v));
fixed_width_column_wrapper<KeyElementTo, SourceElementT>(begin, end, v),
cudf::data_type{type_id::UINT32},
cudf::test::get_default_stream());
}

/**
Expand Down Expand Up @@ -1134,7 +1138,9 @@ class dictionary_column_wrapper<std::string> : public detail::column_wrapper {
template <typename StringsIterator>
dictionary_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{}
{
wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end));
wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end),
cudf::data_type{type_id::UINT32},
cudf::test::get_default_stream());
}

/**
Expand Down Expand Up @@ -1169,7 +1175,9 @@ class dictionary_column_wrapper<std::string> : public detail::column_wrapper {
dictionary_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v)
: column_wrapper{}
{
wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v));
wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v),
cudf::data_type{type_id::UINT32},
cudf::test::get_default_stream());
}

/**
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/dictionary/add_keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,11 @@ std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column

std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column,
column_view const& keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::add_keys(dictionary_column, keys, cudf::get_default_stream(), mr);
return detail::add_keys(dictionary_column, keys, stream, mr);
}

} // namespace dictionary
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/dictionary/decode.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -65,10 +65,11 @@ std::unique_ptr<column> decode(dictionary_column_view const& source,
} // namespace detail

std::unique_ptr<column> decode(dictionary_column_view const& source,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::decode(source, cudf::get_default_stream(), mr);
return detail::decode(source, stream, mr);
}

} // namespace dictionary
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/dictionary/encode.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -89,10 +89,11 @@ data_type get_indices_type_for_size(size_type keys_size)

std::unique_ptr<column> encode(column_view const& input_column,
data_type indices_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::encode(input_column, indices_type, cudf::get_default_stream(), mr);
return detail::encode(input_column, indices_type, stream, mr);
}

} // namespace dictionary
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/dictionary/remove_keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,19 @@ std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& diction

std::unique_ptr<column> remove_keys(dictionary_column_view const& dictionary_column,
column_view const& keys_to_remove,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::remove_keys(dictionary_column, keys_to_remove, cudf::get_default_stream(), mr);
return detail::remove_keys(dictionary_column, keys_to_remove, stream, mr);
}

std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& dictionary_column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::remove_unused_keys(dictionary_column, cudf::get_default_stream(), mr);
return detail::remove_unused_keys(dictionary_column, stream, mr);
}

} // namespace dictionary
Expand Down
11 changes: 5 additions & 6 deletions cpp/src/dictionary/search.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -79,10 +79,8 @@ struct find_index_fn {
using ScalarType = cudf::scalar_type_t<Element>;
auto find_key = static_cast<ScalarType const&>(key).value(stream);
auto keys_view = column_device_view::create(input.keys(), stream);
auto iter = thrust::equal_range(rmm::exec_policy(cudf::get_default_stream()),
keys_view->begin<Element>(),
keys_view->end<Element>(),
find_key);
auto iter = thrust::equal_range(
rmm::exec_policy(stream), keys_view->begin<Element>(), keys_view->end<Element>(), find_key);
return type_dispatcher(input.indices().type(),
dispatch_scalar_index{},
thrust::distance(keys_view->begin<Element>(), iter.first),
Expand Down Expand Up @@ -176,10 +174,11 @@ std::unique_ptr<scalar> get_insert_index(dictionary_column_view const& dictionar

std::unique_ptr<scalar> get_index(dictionary_column_view const& dictionary,
scalar const& key,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::get_index(dictionary, key, cudf::get_default_stream(), mr);
return detail::get_index(dictionary, key, stream, mr);
}

} // namespace dictionary
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/dictionary/set_keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -241,17 +241,20 @@ std::pair<std::vector<std::unique_ptr<column>>, std::vector<table_view>> match_d

std::unique_ptr<column> set_keys(dictionary_column_view const& dictionary_column,
column_view const& keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::set_keys(dictionary_column, keys, cudf::get_default_stream(), mr);
return detail::set_keys(dictionary_column, keys, stream, mr);
}

std::vector<std::unique_ptr<column>> match_dictionaries(
cudf::host_span<dictionary_column_view const> input, rmm::mr::device_memory_resource* mr)
cudf::host_span<dictionary_column_view const> input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::match_dictionaries(input, cudf::get_default_stream(), mr);
return detail::match_dictionaries(input, stream, mr);
}

} // namespace dictionary
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing)
ConfigureTest(
STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp STREAM_MODE
testing
Expand Down
Loading

0 comments on commit f3402c4

Please sign in to comment.