Skip to content

Commit

Permalink
Expose stream parameter in public strings find APIs (#14060)
Browse files Browse the repository at this point in the history
Add stream parameter to public APIs:

- `cudf::strings::find()`
- `cudf::strings::rfind()`
- `cudf::strings::contains()`
- `cudf::strings::starts_with()`
- `cudf::strings::ends_with()`
- `cudf::strings::findall()`
- `cudf::strings::find_multiple()`

Also cleaned up some of the doxygen comments. 

Reference #13744

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #14060
  • Loading branch information
davidwendt authored Sep 21, 2023
1 parent e87d2fc commit fe99e4b
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 61 deletions.
102 changes: 59 additions & 43 deletions cpp/include/cudf/strings/find.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,21 @@ namespace strings {
*
* @throw cudf::logic_error if start position is greater than stop position.
*
* @param strings Strings instance for this operation.
* @param target UTF-8 encoded string to search for in each string.
* @param start First character position to include in the search.
* @param input Strings instance for this operation
* @param target UTF-8 encoded string to search for in each string
* @param start First character position to include in the search
* @param stop Last position (exclusive) to include in the search.
* Default of -1 will search to the end of the string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New integer column with character position values.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New integer column with character position values
*/
std::unique_ptr<column> find(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
size_type start = 0,
size_type stop = -1,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -72,19 +74,21 @@ std::unique_ptr<column> find(
*
* @throw cudf::logic_error if start position is greater than stop position.
*
* @param strings Strings instance for this operation.
* @param target UTF-8 encoded string to search for in each string.
* @param start First position to include in the search.
* @param input Strings instance for this operation
* @param target UTF-8 encoded string to search for in each string
* @param start First position to include in the search
* @param stop Last position (exclusive) to include in the search.
* Default of -1 will search starting at the end of the string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New integer column with character position values.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New integer column with character position values
*/
std::unique_ptr<column> rfind(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
size_type start = 0,
size_type stop = -1,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -123,37 +127,41 @@ std::unique_ptr<column> find(
*
* Any null string entries return corresponding null entries in the output columns.
*
* @param strings Strings instance for this operation.
* @param target UTF-8 encoded string to search for in each string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New type_id::BOOL8 column.
* @param input Strings instance for this operation
* @param target UTF-8 encoded string to search for in each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New BOOL8 column
*/
std::unique_ptr<column> contains(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a column of boolean values for each string where true indicates
* the corresponding target string was found within that string in the provided column.
*
* The 'output[i] = true` if string `targets[i]` is found inside `strings[i]` otherwise
* The 'output[i] = true` if string `targets[i]` is found inside `input[i]` otherwise
* `output[i] = false`.
* If `target[i]` is an empty string, true is returned for `output[i]`.
* If `target[i]` is null, false is returned for `output[i]`.
*
* Any null `strings[i]` row results in a null `output[i]` row.
* Any null string entries return corresponding null entries in the output columns.
*
* @throw cudf::logic_error if `strings.size() != targets.size()`.
*
* @param strings Strings instance for this operation.
* @param targets Strings column of targets to check row-wise in `strings`.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New type_id::BOOL8 column.
* @param input Strings instance for this operation
* @param targets Strings column of targets to check row-wise in `strings`
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New BOOL8 column
*/
std::unique_ptr<column> contains(
strings_column_view const& strings,
strings_column_view const& input,
strings_column_view const& targets,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -166,14 +174,16 @@ std::unique_ptr<column> contains(
*
* Any null string entries return corresponding null entries in the output columns.
*
* @param strings Strings instance for this operation.
* @param target UTF-8 encoded string to search for in each string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @param input Strings instance for this operation
* @param target UTF-8 encoded string to search for in each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New type_id::BOOL8 column.
*/
std::unique_ptr<column> starts_with(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -190,14 +200,16 @@ std::unique_ptr<column> starts_with(
*
* @throw cudf::logic_error if `strings.size() != targets.size()`.
*
* @param strings Strings instance for this operation.
* @param targets Strings instance for this operation.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New type_id::BOOL8 column.
* @param input Strings instance for this operation
* @param targets Strings instance for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New BOOL8 column
*/
std::unique_ptr<column> starts_with(
strings_column_view const& strings,
strings_column_view const& input,
strings_column_view const& targets,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -210,14 +222,16 @@ std::unique_ptr<column> starts_with(
*
* Any null string entries return corresponding null entries in the output columns.
*
* @param strings Strings instance for this operation.
* @param target UTF-8 encoded string to search for in each string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New type_id::BOOL8 column.
* @param input Strings instance for this operation
* @param target UTF-8 encoded string to search for in each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New BOOL8 column
*/
std::unique_ptr<column> ends_with(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -234,14 +248,16 @@ std::unique_ptr<column> ends_with(
*
* @throw cudf::logic_error if `strings.size() != targets.size()`.
*
* @param strings Strings instance for this operation.
* @param targets Strings instance for this operation.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New type_id::BOOL8 column.
* @param input Strings instance for this operation
* @param targets Strings instance for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New BOOL8 column
*/
std::unique_ptr<column> ends_with(
strings_column_view const& strings,
strings_column_view const& input,
strings_column_view const& targets,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/** @} */ // end of doxygen group
} // namespace strings
Expand Down
12 changes: 7 additions & 5 deletions cpp/include/cudf/strings/find_multiple.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -48,14 +48,16 @@ namespace strings {
*
* @throw cudf::logic_error if `targets` is empty or contains nulls
*
* @param input Strings instance for this operation.
* @param targets Strings to search for in each string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Lists column with character position values.
* @param input Strings instance for this operation
* @param targets Strings to search for in each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Lists column with character position values
*/
std::unique_ptr<column> find_multiple(
strings_column_view const& input,
strings_column_view const& targets,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cudf/strings/findall.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,14 @@ struct regex_program;
*
* @param input Strings instance for this operation
* @param prog Regex program instance
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New lists column of strings
*/
std::unique_ptr<column> findall(
strings_column_view const& input,
regex_program const& prog,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
24 changes: 16 additions & 8 deletions cpp/src/strings/search/find.cu
Original file line number Diff line number Diff line change
Expand Up @@ -305,20 +305,22 @@ std::unique_ptr<column> find(strings_column_view const& strings,
string_scalar const& target,
size_type start,
size_type stop,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::find(strings, target, start, stop, cudf::get_default_stream(), mr);
return detail::find(strings, target, start, stop, stream, mr);
}

std::unique_ptr<column> rfind(strings_column_view const& strings,
string_scalar const& target,
size_type start,
size_type stop,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::rfind(strings, target, start, stop, cudf::get_default_stream(), mr);
return detail::rfind(strings, target, start, stop, stream, mr);
}

std::unique_ptr<column> find(strings_column_view const& input,
Expand Down Expand Up @@ -618,50 +620,56 @@ std::unique_ptr<column> ends_with(strings_column_view const& strings,

std::unique_ptr<column> contains(strings_column_view const& strings,
string_scalar const& target,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::contains(strings, target, cudf::get_default_stream(), mr);
return detail::contains(strings, target, stream, mr);
}

std::unique_ptr<column> contains(strings_column_view const& strings,
strings_column_view const& targets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::contains(strings, targets, cudf::get_default_stream(), mr);
return detail::contains(strings, targets, stream, mr);
}

std::unique_ptr<column> starts_with(strings_column_view const& strings,
string_scalar const& target,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::starts_with(strings, target, cudf::get_default_stream(), mr);
return detail::starts_with(strings, target, stream, mr);
}

std::unique_ptr<column> starts_with(strings_column_view const& strings,
strings_column_view const& targets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::starts_with(strings, targets, cudf::get_default_stream(), mr);
return detail::starts_with(strings, targets, stream, mr);
}

std::unique_ptr<column> ends_with(strings_column_view const& strings,
string_scalar const& target,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ends_with(strings, target, cudf::get_default_stream(), mr);
return detail::ends_with(strings, target, stream, mr);
}

std::unique_ptr<column> ends_with(strings_column_view const& strings,
strings_column_view const& targets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ends_with(strings, targets, cudf::get_default_stream(), mr);
return detail::ends_with(strings, targets, stream, mr);
}

} // namespace strings
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/strings/search/find_multiple.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ std::unique_ptr<column> find_multiple(strings_column_view const& input,
results->set_null_count(0);

auto offsets = cudf::detail::sequence(strings_count + 1,
numeric_scalar<size_type>(0),
numeric_scalar<size_type>(targets_count),
numeric_scalar<size_type>(0, true, stream),
numeric_scalar<size_type>(targets_count, true, stream),
stream,
mr);
return make_lists_column(strings_count,
Expand All @@ -88,10 +88,11 @@ std::unique_ptr<column> find_multiple(strings_column_view const& input,
// external API
std::unique_ptr<column> find_multiple(strings_column_view const& input,
strings_column_view const& targets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::find_multiple(input, targets, cudf::get_default_stream(), mr);
return detail::find_multiple(input, targets, stream, mr);
}

} // namespace strings
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/strings/search/findall.cu
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,11 @@ std::unique_ptr<column> findall(strings_column_view const& input,

std::unique_ptr<column> findall(strings_column_view const& input,
regex_program const& prog,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::findall(input, prog, cudf::get_default_stream(), mr);
return detail::findall(input, prog, stream, mr);
}

} // namespace strings
Expand Down
5 changes: 4 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,10 @@ ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE t
ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_STRINGS_TEST streams/strings/case_test.cpp STREAM_MODE testing)
ConfigureTest(
STREAM_STRINGS_TEST streams/strings/case_test.cpp streams/strings/find_test.cpp STREAM_MODE
testing
)

# ##################################################################################################
# Install tests ####################################################################################
Expand Down
Loading

0 comments on commit fe99e4b

Please sign in to comment.