diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json similarity index 91% rename from .devcontainer/cuda12.5-conda/devcontainer.json rename to .devcontainer/cuda12.8-conda/devcontainer.json index 3ed6fa9c37b..ad36130c6a0 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.8-conda/devcontainer.json @@ -3,7 +3,7 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "conda", "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { @@ -20,7 +20,7 @@ "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"], "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent", @@ -29,7 +29,7 @@ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json similarity index 88% rename from .devcontainer/cuda12.5-pip/devcontainer.json rename to .devcontainer/cuda12.8-pip/devcontainer.json index fe402024e29..a2955b81a60 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.8-pip/devcontainer.json @@ -3,20 +3,20 @@ "context": "${localWorkspaceFolder}/.devcontainer", "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile", "args": { - "CUDA": "12.5", + "CUDA": "12.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.18.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.8-ucx1.18.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { - "version": "12.5", + "version": "12.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, @@ -28,7 +28,7 @@ "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], - "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"], + "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}"], "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"], "workspaceFolder": "/home/coder", "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent", @@ -36,7 +36,7 @@ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e48f2e11acd..8e3134b896e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -195,7 +195,7 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' - cuda: '["12.5"]' + cuda: '["12.8"]' node_type: cpu32 build_command: | sccache -z; diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml similarity index 96% rename from conda/environments/all_cuda-125_arch-x86_64.yaml rename to conda/environments/all_cuda-128_arch-x86_64.yaml index 83126df9194..22c5f594a42 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvtx-dev - cuda-profiler-api - cuda-python>=12.6.2,<13.0a0 -- cuda-version=12.5 +- cuda-version=12.8 - cudf==25.2.*,>=0.0.0a0 - cupy>=12.0.0 - cxx-compiler @@ -77,4 +77,4 @@ dependencies: - torchmetrics - ucx-py==0.42.*,>=0.0.0a0 - wheel -name: all_cuda-125_arch-x86_64 +name: all_cuda-128_arch-x86_64 diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh index c94f456f215..b31624da840 100644 --- a/cpp/include/cugraph/detail/decompress_edge_partition.cuh +++ b/cpp/include/cugraph/detail/decompress_edge_partition.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh index 628c3cc10cc..b887309bf6f 100644 --- a/cpp/include/cugraph/edge_partition_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_device_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,10 +26,10 @@ #include #include +#include #include #include #include -#include #include #include #include @@ -43,7 +43,7 @@ namespace cugraph { namespace detail { template -__device__ thrust::optional major_hypersparse_idx_from_major_nocheck_impl( +__device__ cuda::std::optional major_hypersparse_idx_from_major_nocheck_impl( raft::device_span dcs_nzd_vertices, vertex_t major) { // we can avoid binary search (and potentially improve performance) if we add an auxiliary array @@ -51,10 +51,10 @@ __device__ thrust::optional major_hypersparse_idx_from_major_nocheck_i auto it = thrust::lower_bound(thrust::seq, dcs_nzd_vertices.begin(), dcs_nzd_vertices.end(), major); return it != dcs_nzd_vertices.end() - ? (*it == major ? thrust::optional{static_cast( + ? (*it == major ? cuda::std::optional{static_cast( thrust::distance(dcs_nzd_vertices.begin(), it))} - : thrust::nullopt) - : thrust::nullopt; + : cuda::std::nullopt) + : cuda::std::nullopt; } template @@ -490,7 +490,7 @@ class edge_partition_device_view_t major_hypersparse_first() const noexcept + __host__ __device__ cuda::std::optional major_hypersparse_first() const noexcept { return major_hypersparse_first_; } @@ -528,15 +528,16 @@ class edge_partition_device_view_t major_idx_from_major_nocheck(vertex_t major) const noexcept + __device__ cuda::std::optional major_idx_from_major_nocheck( + vertex_t major) const noexcept { if (major_hypersparse_first_ && (major >= *major_hypersparse_first_)) { auto major_hypersparse_idx = detail::major_hypersparse_idx_from_major_nocheck_impl(*dcs_nzd_vertices_, major); return major_hypersparse_idx - ? thrust::make_optional((*major_hypersparse_first_ - major_range_first_) + - *major_hypersparse_idx) - : thrust::nullopt; + ? cuda::std::make_optional((*major_hypersparse_first_ - major_range_first_) + + *major_hypersparse_idx) + : cuda::std::nullopt; } else { return major - major_range_first_; } @@ -554,23 +555,23 @@ class edge_partition_device_view_t major_hypersparse_idx_from_major_nocheck( + __device__ cuda::std::optional major_hypersparse_idx_from_major_nocheck( vertex_t major) const noexcept { if (dcs_nzd_vertices_) { return detail::major_hypersparse_idx_from_major_nocheck_impl(*dcs_nzd_vertices_, major); } else { - return thrust::nullopt; + return cuda::std::nullopt; } } // major_hypersparse_idx: index within the hypersparse segment - __device__ thrust::optional major_from_major_hypersparse_idx_nocheck( + __device__ cuda::std::optional major_from_major_hypersparse_idx_nocheck( vertex_t major_hypersparse_idx) const noexcept { return dcs_nzd_vertices_ - ? thrust::optional{(*dcs_nzd_vertices_)[major_hypersparse_idx]} - : thrust::nullopt; + ? cuda::std::optional{(*dcs_nzd_vertices_)[major_hypersparse_idx]} + : cuda::std::nullopt; } __host__ __device__ vertex_t minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept @@ -578,36 +579,36 @@ class edge_partition_device_view_t> for consistency (see - // dcs_nzd_range_bitmap()) - __host__ __device__ thrust::optional dcs_nzd_vertices() const + // FIxME: better return cuda::std::optional> for consistency + // (see dcs_nzd_range_bitmap()) + __host__ __device__ cuda::std::optional dcs_nzd_vertices() const { - return dcs_nzd_vertices_ ? thrust::optional{(*dcs_nzd_vertices_).data()} - : thrust::nullopt; + return dcs_nzd_vertices_ ? cuda::std::optional{(*dcs_nzd_vertices_).data()} + : cuda::std::nullopt; } - __host__ __device__ thrust::optional dcs_nzd_vertex_count() const + __host__ __device__ cuda::std::optional dcs_nzd_vertex_count() const { return dcs_nzd_vertices_ - ? thrust::optional{static_cast((*dcs_nzd_vertices_).size())} - : thrust::nullopt; + ? cuda::std::optional{static_cast((*dcs_nzd_vertices_).size())} + : cuda::std::nullopt; } - __host__ __device__ thrust::optional> dcs_nzd_range_bitmap() + __host__ __device__ cuda::std::optional> dcs_nzd_range_bitmap() const { return dcs_nzd_range_bitmap_ - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*dcs_nzd_range_bitmap_).data(), (*dcs_nzd_range_bitmap_).size()) - : thrust::nullopt; + : cuda::std::nullopt; } private: // should be trivially copyable to device - thrust::optional> dcs_nzd_vertices_{thrust::nullopt}; - thrust::optional> dcs_nzd_range_bitmap_{thrust::nullopt}; - thrust::optional major_hypersparse_first_{thrust::nullopt}; + cuda::std::optional> dcs_nzd_vertices_{cuda::std::nullopt}; + cuda::std::optional> dcs_nzd_range_bitmap_{cuda::std::nullopt}; + cuda::std::optional major_hypersparse_first_{cuda::std::nullopt}; vertex_t major_range_first_{0}; vertex_t major_range_last_{0}; @@ -790,10 +791,10 @@ class edge_partition_device_view_t major_hypersparse_first() const noexcept + __host__ __device__ cuda::std::optional major_hypersparse_first() const noexcept { assert(false); - return thrust::nullopt; + return cuda::std::nullopt; } __host__ __device__ constexpr vertex_t major_range_first() const noexcept { return vertex_t{0}; } @@ -823,7 +824,8 @@ class edge_partition_device_view_t major_idx_from_major_nocheck(vertex_t major) const noexcept + __device__ cuda::std::optional major_idx_from_major_nocheck( + vertex_t major) const noexcept { return major_offset_from_major_nocheck(major); } @@ -834,19 +836,19 @@ class edge_partition_device_view_t major_hypersparse_idx_from_major_nocheck( + __device__ cuda::std::optional major_hypersparse_idx_from_major_nocheck( vertex_t major) const noexcept { assert(false); - return thrust::nullopt; + return cuda::std::nullopt; } // major_hypersparse_idx: index within the hypersparse segment - __device__ thrust::optional major_from_major_hypersparse_idx_nocheck( + __device__ cuda::std::optional major_from_major_hypersparse_idx_nocheck( vertex_t major_hypersparse_idx) const noexcept { assert(false); - return thrust::nullopt; + return cuda::std::nullopt; } __host__ __device__ vertex_t minor_from_minor_offset_nocheck(vertex_t minor_offset) const noexcept @@ -854,14 +856,14 @@ class edge_partition_device_view_t dcs_nzd_vertices() const + __host__ __device__ cuda::std::optional dcs_nzd_vertices() const { - return thrust::nullopt; + return cuda::std::nullopt; } - __host__ __device__ thrust::optional dcs_nzd_vertex_count() const + __host__ __device__ cuda::std::optional dcs_nzd_vertex_count() const { - return thrust::nullopt; + return cuda::std::nullopt; } private: diff --git a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh index 4b324bcf348..061c4108f4d 100644 --- a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh @@ -21,8 +21,8 @@ #include #include +#include #include -#include namespace cugraph { @@ -182,7 +182,7 @@ template class edge_partition_edge_dummy_property_device_view_t { public: using edge_type = edge_t; - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; static constexpr bool is_packed_bool = false; static constexpr bool has_packed_bool_element = false; @@ -194,7 +194,7 @@ class edge_partition_edge_dummy_property_device_view_t { { } - __device__ auto get(edge_t offset) const { return thrust::nullopt; } + __device__ auto get(edge_t offset) const { return cuda::std::nullopt; } }; } // namespace detail diff --git a/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh b/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh index f86675e5572..e5600848db6 100644 --- a/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_endpoint_property_device_view.cuh @@ -23,12 +23,12 @@ #include +#include #include #include #include #include #include -#include namespace cugraph { @@ -184,9 +184,10 @@ class edge_partition_endpoint_property_device_view_t { } private: - thrust::optional> keys_{thrust::nullopt}; - thrust::optional> key_chunk_start_offsets_{thrust::nullopt}; - thrust::optional key_chunk_size_{thrust::nullopt}; + cuda::std::optional> keys_{cuda::std::nullopt}; + cuda::std::optional> key_chunk_start_offsets_{ + cuda::std::nullopt}; + cuda::std::optional key_chunk_size_{cuda::std::nullopt}; ValueIterator value_first_{}; vertex_t range_first_{}; @@ -214,7 +215,7 @@ template class edge_partition_endpoint_dummy_property_device_view_t { public: using vertex_type = vertex_t; - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; static constexpr bool is_packed_bool = false; static constexpr bool has_packed_bool_element = false; @@ -230,7 +231,7 @@ class edge_partition_endpoint_dummy_property_device_view_t { { } - __device__ auto get(vertex_t offset) const { return thrust::nullopt; } + __device__ auto get(vertex_t offset) const { return cuda::std::nullopt; } }; } // namespace detail diff --git a/cpp/include/cugraph/edge_property.hpp b/cpp/include/cugraph/edge_property.hpp index d46d4e52fd4..11041f504f3 100644 --- a/cpp/include/cugraph/edge_property.hpp +++ b/cpp/include/cugraph/edge_property.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,8 +22,8 @@ #include +#include #include -#include #include #include @@ -63,7 +63,7 @@ class edge_property_view_t { class edge_dummy_property_view_t { public: - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; using value_iterator = void*; }; @@ -155,7 +155,7 @@ class edge_property_t { class edge_dummy_property_t { public: - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; auto view() const { return edge_dummy_property_view_t{}; } }; diff --git a/cpp/include/cugraph/edge_src_dst_property.hpp b/cpp/include/cugraph/edge_src_dst_property.hpp index d27f6856428..f7096ce32fa 100644 --- a/cpp/include/cugraph/edge_src_dst_property.hpp +++ b/cpp/include/cugraph/edge_src_dst_property.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,9 +24,9 @@ #include #include +#include #include #include -#include #include #include @@ -365,7 +365,7 @@ class edge_minor_property_t { class edge_endpoint_dummy_property_view_t { public: - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; using value_iterator = void*; }; @@ -557,14 +557,14 @@ class edge_dst_property_t { class edge_src_dummy_property_t { public: - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; auto view() const { return detail::edge_endpoint_dummy_property_view_t{}; } }; class edge_dst_dummy_property_t { public: - using value_type = thrust::nullopt_t; + using value_type = cuda::std::nullopt_t; auto view() const { return detail::edge_endpoint_dummy_property_view_t{}; } }; diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp index 35c51c1ea6d..0ccf49ddfb6 100644 --- a/cpp/include/cugraph/sampling_functions.hpp +++ b/cpp/include/cugraph/sampling_functions.hpp @@ -1152,7 +1152,10 @@ lookup_endpoints_from_edge_ids_and_types( * @param dst_biases Optional bias for randomly selecting destination vertices. If std::nullopt * vertices will be selected uniformly. In multi-GPU environment the biases should be partitioned * based on the vertex partitions. - * @param num_samples Number of negative samples to generate + * @param num_samples Number of negative samples to generate. In SG mode this represents the total + * number of samples to generate. In MG mode, each gpu will provide the number of samples desired + * on that GPU. The total number of samples in MG mode will be the aggregation of these values, the + * resulting samples will be randomly distributed across the ranks. * @param remove_duplicates If true, remove duplicate samples * @param remove_existing_edges If true, remove samples that are actually edges in the graph * @param exact_number_of_samples If true, repeat generation until we get the exact number of diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index 91a349007da..b25dd9a41f4 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,11 +23,11 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -87,15 +87,15 @@ std::tuple, std::vector> compute_offset_aligned_ } template -thrust::optional to_thrust_optional(std::optional val) +cuda::std::optional to_thrust_optional(std::optional val) { - thrust::optional ret{thrust::nullopt}; + cuda::std::optional ret{cuda::std::nullopt}; if (val) { ret = *val; } return ret; } template -std::optional to_std_optional(thrust::optional val) +std::optional to_std_optional(cuda::std::optional val) { std::optional ret{std::nullopt}; if (val) { ret = *val; } diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index 98fa2cb1706..d173cc08a1c 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,21 +145,21 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const& comm, template struct key_group_id_less_t { - KeyToGroupIdOp key_to_group_id_op{}; + KeyToGroupIdOp key_to_group_id_op; int pivot{}; __device__ bool operator()(key_type k) const { return key_to_group_id_op(k) < pivot; } }; template struct value_group_id_less_t { - ValueToGroupIdOp value_to_group_id_op{}; + ValueToGroupIdOp value_to_group_id_op; int pivot{}; __device__ bool operator()(value_type v) const { return value_to_group_id_op(v) < pivot; } }; template struct kv_pair_group_id_less_t { - KeyToGroupIdOp key_to_group_id_op{}; + KeyToGroupIdOp key_to_group_id_op; int pivot{}; __device__ bool operator()(thrust::tuple t) const { @@ -169,14 +169,14 @@ struct kv_pair_group_id_less_t { template struct value_group_id_greater_equal_t { - ValueToGroupIdOp value_to_group_id_op{}; + ValueToGroupIdOp value_to_group_id_op; int pivot{}; __device__ bool operator()(value_type v) const { return value_to_group_id_op(v) >= pivot; } }; template struct kv_pair_group_id_greater_equal_t { - KeyToGroupIdOp key_to_group_id_op{}; + KeyToGroupIdOp key_to_group_id_op; int pivot{}; __device__ bool operator()(thrust::tuple t) const { diff --git a/cpp/src/centrality/betweenness_centrality_impl.cuh b/cpp/src/centrality/betweenness_centrality_impl.cuh index 88ef3987a03..4eb57b621ea 100644 --- a/cpp/src/centrality/betweenness_centrality_impl.cuh +++ b/cpp/src/centrality/betweenness_centrality_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,8 +36,8 @@ #include +#include #include -#include #include // @@ -52,10 +52,11 @@ struct brandes_e_op_t { const vertex_t invalid_distance_{std::numeric_limits::max()}; template - __device__ thrust::optional operator()( + __device__ cuda::std::optional operator()( vertex_t, vertex_t, value_t src_sigma, vertex_t dst_distance, ignore_t) const { - return (dst_distance == invalid_distance_) ? thrust::make_optional(src_sigma) : thrust::nullopt; + return (dst_distance == invalid_distance_) ? cuda::std::make_optional(src_sigma) + : cuda::std::nullopt; } }; @@ -64,7 +65,7 @@ struct extract_edge_e_op_t { vertex_t d{}; template - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( vertex_t src, vertex_t dst, thrust::tuple src_props, @@ -72,8 +73,8 @@ struct extract_edge_e_op_t { weight_t edge_centrality) const { return ((thrust::get<0>(dst_props) == d) && (thrust::get<0>(src_props) == (d - 1))) - ? thrust::optional>{thrust::make_tuple(src, dst)} - : thrust::nullopt; + ? cuda::std::optional>{thrust::make_tuple(src, dst)} + : cuda::std::nullopt; } }; @@ -153,8 +154,8 @@ std::tuple, rmm::device_uvector> brandes_b thrust::make_zip_iterator(distances.begin(), sigmas.begin()), [hop] __device__(auto v, auto old_values, auto v_sigma) { return thrust::make_tuple( - thrust::make_optional(bucket_idx_next), - thrust::make_optional(thrust::make_tuple(hop + 1, v_sigma))); + cuda::std::make_optional(bucket_idx_next), + cuda::std::make_optional(thrust::make_tuple(hop + 1, v_sigma))); }); vertex_frontier.bucket(bucket_idx_cur).clear(); diff --git a/cpp/src/community/approx_weighted_matching_impl.cuh b/cpp/src/community/approx_weighted_matching_impl.cuh index 869ed4e7ae6..57bf9d50b9a 100644 --- a/cpp/src/community/approx_weighted_matching_impl.cuh +++ b/cpp/src/community/approx_weighted_matching_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,7 +66,8 @@ std::tuple, weight_t> approximate_weighted_matchin cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) { return !(src == dst); }, edge_masks_even.mutable_view()); @@ -130,7 +131,7 @@ std::tuple, weight_t> approximate_weighted_matchin graph_view_t::is_multi_gpu ? src_key_cache.view() : detail::edge_major_property_view_t(local_vertices.begin()), - [] __device__(auto, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wt) { + [] __device__(auto, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto wt) { return thrust::make_tuple(wt, dst); }, thrust::make_tuple(weight_t{0.0}, invalid_partner), @@ -314,7 +315,7 @@ std::tuple, weight_t> approximate_weighted_matchin dst_match_flags.view(), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) { return !((is_src_matched == true) || (is_dst_matched == true)); }, edge_masks_odd.mutable_view()); @@ -327,7 +328,7 @@ std::tuple, weight_t> approximate_weighted_matchin vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) { return !((is_src_matched == true) || (is_dst_matched == true)); }, edge_masks_odd.mutable_view()); @@ -346,7 +347,7 @@ std::tuple, weight_t> approximate_weighted_matchin dst_match_flags.view(), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) { return !((is_src_matched == true) || (is_dst_matched == true)); }, edge_masks_even.mutable_view()); @@ -359,7 +360,7 @@ std::tuple, weight_t> approximate_weighted_matchin vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto src, auto dst, auto is_src_matched, auto is_dst_matched, thrust::nullopt_t) { + auto src, auto dst, auto is_src_matched, auto is_dst_matched, cuda::std::nullopt_t) { return !((is_src_matched == true) || (is_dst_matched == true)); }, edge_masks_even.mutable_view()); diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index 18fb3fdb251..d37a8864e68 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,11 +30,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -140,7 +140,7 @@ struct cluster_update_op_t { template struct return_edge_weight_t { __device__ auto operator()( - vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, weight_t w) const + vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, weight_t w) const { return w; } @@ -150,7 +150,7 @@ struct return_edge_weight_t { template struct return_one_t { __device__ auto operator()( - vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) const { return 1.0; } diff --git a/cpp/src/community/detail/maximal_independent_moves.cuh b/cpp/src/community/detail/maximal_independent_moves.cuh index 85892f711ba..70b812d687a 100644 --- a/cpp/src/community/detail/maximal_independent_moves.cuh +++ b/cpp/src/community/detail/maximal_independent_moves.cuh @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,13 +32,13 @@ #include #include #include -#include #include #include #include #include #include +#include namespace cugraph { diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index 01a68a3a0d7..6038fcc6f27 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -48,6 +47,8 @@ #include #include +#include + CUCO_DECLARE_BITWISE_COMPARABLE(float) CUCO_DECLARE_BITWISE_COMPARABLE(double) // FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched. diff --git a/cpp/src/community/ecg_impl.cuh b/cpp/src/community/ecg_impl.cuh index d01b13f0b35..100efdb025d 100644 --- a/cpp/src/community/ecg_impl.cuh +++ b/cpp/src/community/ecg_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,6 +31,8 @@ #include +#include + namespace cugraph { namespace detail { @@ -106,7 +108,7 @@ std::tuple, size_t, weight_t> ecg( edge_dst_dummy_property_t{}.view(), view_concat(*edge_weight_view, modified_edge_weights.view()), [min_weight, ensemble_size = static_cast(ensemble_size)] __device__( - auto, auto, thrust::nullopt_t, thrust::nullopt_t, auto edge_properties) { + auto, auto, cuda::std::nullopt_t, cuda::std::nullopt_t, auto edge_properties) { auto e_weight = thrust::get<0>(edge_properties); auto e_frequency = thrust::get<1>(edge_properties); return min_weight + (e_weight - min_weight) * e_frequency / ensemble_size; diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index fbf47615dbe..b210bcacf35 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -28,6 +28,7 @@ #include +#include #include #include #include @@ -353,9 +354,9 @@ edge_property_t, edge_t> edge_t num_edges = edgelist_srcs.size(), num_triangles = num_triangles.data()] __device__(auto src, auto dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) { + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) { auto pair = thrust::make_tuple(src, dst); // Find its position in 'edges' diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 2b712a6de77..25a1cf63f4d 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -19,6 +19,7 @@ #include "prims/extract_transform_e.cuh" #include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/fill_edge_property.cuh" +#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" #include "prims/update_edge_src_dst_property.cuh" @@ -31,63 +32,131 @@ #include +#include +#include #include #include #include #include #include -#include #include #include #include namespace cugraph { -namespace { +template +struct extract_weak_edges { + edge_t k{}; + __device__ cuda::std::optional> operator()( + vertex_t src, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, edge_t count) const + { + // No need to process edges with count == 0 + return ((count < k - 2) && (count != 0)) + ? cuda::std::optional>{thrust::make_tuple(src, dst)} + : cuda::std::nullopt; + } +}; -template -struct exclude_self_loop_t { - __device__ thrust::optional> operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const +template +struct is_k_or_greater_t { + edge_t k{}; + __device__ bool operator()(edge_t core_number) const { return core_number >= edge_t{k}; } +}; + +template +struct extract_triangles_endpoints { + size_t chunk_start{}; + raft::device_span intersection_offsets{}; + raft::device_span intersection_indices{}; + raft::device_span weak_srcs{}; + raft::device_span weak_dsts{}; + + __device__ thrust::tuple operator()(edge_t i) const { - return src != dst - ? thrust::optional>{thrust::make_tuple(src, dst)} - : thrust::nullopt; + auto itr = thrust::upper_bound( + thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); + + auto endpoints = thrust::make_tuple(weak_srcs[chunk_start + idx], // p + weak_dsts[chunk_start + idx], // q + intersection_indices[i] // r + ); + + auto p = weak_srcs[chunk_start + idx]; + auto q = weak_dsts[chunk_start + idx]; + auto r = intersection_indices[i]; + // Re-order the endpoints such that p < q < r in order to identify duplicate triangles + // which will cause overcompensation. comparing the vertex IDs is cheaper than comparing the + // degrees (d(p) < d(q) < d(r)) which will be done once in the latter stage to retrieve the + // direction of the edges once the triplet dependency is broken. + if (p > q) cuda::std::swap(p, q); + if (p > r) cuda::std::swap(p, r); + if (q > r) cuda::std::swap(q, r); + + return thrust::make_tuple(p, q, r); } }; -template -struct extract_low_to_high_degree_weighted_edges_t { - __device__ thrust::optional> operator()( - vertex_t src, vertex_t dst, edge_t src_out_degree, edge_t dst_out_degree, weight_t wgt) const +namespace { + +template +struct exclude_self_loop_t { + __device__ cuda::std::optional> operator()( + vertex_t src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { - return (src_out_degree < dst_out_degree) - ? thrust::optional>{thrust::make_tuple( - src, dst, wgt)} - : (((src_out_degree == dst_out_degree) && - (src < dst) /* tie-breaking using vertex ID */) - ? thrust::optional< - thrust::tuple>{thrust::make_tuple( - src, dst, wgt)} - : thrust::nullopt); + return src != dst + ? cuda::std::optional>{thrust::make_tuple(src, dst)} + : cuda::std::nullopt; } }; template -struct extract_low_to_high_degree_edges_t { - __device__ thrust::optional> operator()(vertex_t src, - vertex_t dst, - edge_t src_out_degree, - edge_t dst_out_degree, - thrust::nullopt_t) const +struct extract_low_to_high_degree_edges_from_endpoints_t { + raft::device_span srcs{}; + raft::device_span dsts{}; + raft::device_span count{}; + __device__ cuda::std::optional> operator()( + vertex_t src, + vertex_t dst, + edge_t src_out_degree, + edge_t dst_out_degree, + cuda::std::nullopt_t) const { - return (src_out_degree < dst_out_degree) - ? thrust::optional>{thrust::make_tuple(src, dst)} - : (((src_out_degree == dst_out_degree) && - (src < dst) /* tie-breaking using vertex ID */) - ? thrust::optional>{thrust::make_tuple(src, - dst)} - : thrust::nullopt); + // FIXME: Not the most efficient way because the entire edgelist is scan just to find + // the direction of the edges + auto itr = thrust::lower_bound(thrust::seq, + thrust::make_zip_iterator(srcs.begin(), dsts.begin()), + thrust::make_zip_iterator(srcs.end(), dsts.end()), + thrust::make_tuple(src, dst)); + + if ((itr != thrust::make_zip_iterator(srcs.end(), dsts.end())) && + (*itr == thrust::make_tuple(src, dst))) { + auto idx = thrust::distance(thrust::make_zip_iterator(srcs.begin(), dsts.begin()), itr); + + if (src_out_degree < dst_out_degree) { + return cuda::std::optional>{ + thrust::make_tuple(src, dst, count[idx])}; + } else if (dst_out_degree < src_out_degree) { + return cuda::std::optional>{ + thrust::make_tuple(dst, src, count[idx])}; + } else { + if ((src_out_degree == dst_out_degree) && (src < dst) /* tie-breaking using vertex ID */) { + return cuda::std::optional>{ + thrust::make_tuple(src, dst, count[idx])}; + } else if ((src_out_degree == dst_out_degree) && + (src > dst) /* tie-breaking using vertex ID */) { + return cuda::std::optional>{ + thrust::make_tuple(dst, src, count[idx])}; + } + } + } else { + return cuda::std::nullopt; + } } }; @@ -114,246 +183,543 @@ k_truss(raft::handle_t const& handle, // nothing to do } - std::optional> modified_graph{std::nullopt}; - std::optional> modified_graph_view{std::nullopt}; - std::optional> renumber_map{std::nullopt}; - std::optional, weight_t>> - edge_weight{std::nullopt}; - std::optional> wgts{std::nullopt}; - - if (graph_view.count_self_loops(handle) > edge_t{0}) { - auto [srcs, dsts] = extract_transform_e(handle, - graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_dummy_property_t{}.view(), - exclude_self_loop_t{}); - - if constexpr (multi_gpu) { - std::tie( - srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(srcs), - std::move(dsts), - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt); - } + // 2. Exclude self-loops and edges that do not belong to (k-1)-core - std::tie(*modified_graph, std::ignore, std::ignore, std::ignore, renumber_map) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(srcs), - std::move(dsts), - std::nullopt, - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + auto cur_graph_view = graph_view; + auto unmasked_cur_graph_view = cur_graph_view; - modified_graph_view = (*modified_graph).view(); - } + if (unmasked_cur_graph_view.has_edge_mask()) { unmasked_cur_graph_view.clear_edge_mask(); } + // mask for self-loops and edges not part of k-1 core + cugraph::edge_property_t undirected_mask(handle); + { + // 2.1 Exclude self-loops - // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core + if (cur_graph_view.count_self_loops(handle) > edge_t{0}) { + // 2.1. Exclude self-loops - { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - - auto vertex_partition_range_lasts = - renumber_map - ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) - : std::nullopt; - - rmm::device_uvector core_numbers(cur_graph_view.number_of_vertices(), - handle.get_stream()); - core_number( - handle, cur_graph_view, core_numbers.data(), k_core_degree_type_t::OUT, size_t{2}, size_t{2}); - - raft::device_span core_number_span{core_numbers.data(), core_numbers.size()}; - - auto [srcs, dsts, wgts] = k_core(handle, - cur_graph_view, - edge_weight_view, - k - 1, - std::make_optional(k_core_degree_type_t::OUT), - std::make_optional(core_number_span)); - - if constexpr (multi_gpu) { - std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt); - } + cugraph::edge_property_t self_loop_edge_mask(handle, + cur_graph_view); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, self_loop_edge_mask.mutable_view(), false); - std::optional> tmp_renumber_map{std::nullopt}; - std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) = - create_graph_from_edgelist( + transform_e( handle, - std::nullopt, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto, auto, auto) { return src != dst; }, + self_loop_edge_mask.mutable_view()); + + undirected_mask = std::move(self_loop_edge_mask); + if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); } + cur_graph_view.attach_edge_mask(undirected_mask.view()); + } - modified_graph_view = (*modified_graph).view(); + // 2.2 Find (k-1)-core and exclude edges that do not belong to (k-1)-core + { + rmm::device_uvector core_numbers(cur_graph_view.number_of_vertices(), + handle.get_stream()); + core_number(handle, + cur_graph_view, + core_numbers.data(), + k_core_degree_type_t::OUT, + size_t{2}, + size_t{2}); + + edge_src_property_t edge_src_in_k_minus_1_cores( + handle, cur_graph_view); + edge_dst_property_t edge_dst_in_k_minus_1_cores( + handle, cur_graph_view); + auto in_k_minus_1_core_first = + thrust::make_transform_iterator(core_numbers.begin(), is_k_or_greater_t{k - 1}); + rmm::device_uvector in_k_minus_1_core_flags(core_numbers.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + in_k_minus_1_core_first, + in_k_minus_1_core_first + core_numbers.size(), + in_k_minus_1_core_flags.begin()); + update_edge_src_property(handle, + cur_graph_view, + in_k_minus_1_core_flags.begin(), + edge_src_in_k_minus_1_cores.mutable_view()); + update_edge_dst_property(handle, + cur_graph_view, + in_k_minus_1_core_flags.begin(), + edge_dst_in_k_minus_1_cores.mutable_view()); + + cugraph::edge_property_t in_k_minus_1_core_edge_mask( + handle, cur_graph_view); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, in_k_minus_1_core_edge_mask.mutable_view(), false); + + transform_e( + handle, + cur_graph_view, + edge_src_in_k_minus_1_cores.view(), + edge_dst_in_k_minus_1_cores.view(), + edge_dummy_property_t{}.view(), + [] __device__(auto, auto, auto src_in_k_minus_1_core, auto dst_in_k_minus_1_core, auto) { + return src_in_k_minus_1_core && dst_in_k_minus_1_core; + }, + in_k_minus_1_core_edge_mask.mutable_view()); - if (renumber_map) { // collapse renumber_map - unrenumber_int_vertices(handle, - (*tmp_renumber_map).data(), - (*tmp_renumber_map).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts); + undirected_mask = std::move(in_k_minus_1_core_edge_mask); + if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); } + cur_graph_view.attach_edge_mask(undirected_mask.view()); } - - renumber_map = std::move(tmp_renumber_map); } // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. - { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - - auto vertex_partition_range_lasts = - renumber_map - ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) - : std::nullopt; + edge_src_property_t edge_src_out_degrees(handle, + cur_graph_view); + edge_dst_property_t edge_dst_out_degrees(handle, + cur_graph_view); + cugraph::edge_property_t, bool> dodg_mask( + handle, cur_graph_view); + { auto out_degrees = cur_graph_view.compute_out_degrees(handle); - edge_src_property_t edge_src_out_degrees(handle, - cur_graph_view); - edge_dst_property_t edge_dst_out_degrees(handle, - cur_graph_view); update_edge_src_property( handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view()); update_edge_dst_property( handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view()); - rmm::device_uvector srcs(0, handle.get_stream()); - rmm::device_uvector dsts(0, handle.get_stream()); - - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; - if (edge_weight_view) { - std::tie(srcs, dsts, wgts) = extract_transform_e( - handle, - cur_graph_view, - edge_src_out_degrees.view(), - edge_dst_out_degrees.view(), - *edge_weight_view, - extract_low_to_high_degree_weighted_edges_t{}); - } else { - std::tie(srcs, dsts) = - extract_transform_e(handle, - cur_graph_view, - edge_src_out_degrees.view(), - edge_dst_out_degrees.view(), - edge_dummy_property_t{}.view(), - extract_low_to_high_degree_edges_t{}); - } - - if constexpr (multi_gpu) { - std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - std::nullopt, - std::nullopt); - } - - std::optional> tmp_renumber_map{std::nullopt}; - - std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()}, - true); - - modified_graph_view = (*modified_graph).view(); - if (renumber_map) { // collapse renumber_map - unrenumber_int_vertices(handle, - (*tmp_renumber_map).data(), - (*tmp_renumber_map).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts); - } - renumber_map = std::move(tmp_renumber_map); + cugraph::fill_edge_property( + handle, unmasked_cur_graph_view, dodg_mask.mutable_view(), bool{false}); + + cugraph::transform_e( + handle, + cur_graph_view, + edge_src_out_degrees.view(), + edge_dst_out_degrees.view(), + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_out_degree, auto dst_out_degree, auto) { + return (src_out_degree < dst_out_degree) ? true + : ((src_out_degree == dst_out_degree) && + (src < dst) /* tie-breaking using vertex ID */) + ? true + : false; + }, + dodg_mask.mutable_view(), + do_expensive_check); + + if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); } + cur_graph_view.attach_edge_mask(dodg_mask.view()); } // 4. Compute triangle count using nbr_intersection and unroll weak edges { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; + // Mask self loops and edges not being part of k-1 core + auto weak_edges_mask = std::move(undirected_mask); - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; + auto edge_triangle_counts = + edge_triangle_count(handle, cur_graph_view, false); - cugraph::edge_property_t edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true}); + cugraph::edge_bucket_t edgelist_weak(handle); + cugraph::edge_bucket_t edges_to_decrement_count(handle); + size_t prev_chunk_size = 0; // FIXME: Add support for chunking while (true) { - // FIXME: This approach is very expensive when invalidating only few edges per iteration - // and should be address. - auto edge_triangle_counts = - edge_triangle_count(handle, cur_graph_view); + // Extract weak edges + auto [weak_edgelist_srcs, weak_edgelist_dsts] = + extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + extract_weak_edges{k}); + + auto weak_edgelist_first = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + auto weak_edgelist_last = + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); + + thrust::sort(handle.get_thrust_policy(), weak_edgelist_first, weak_edgelist_last); + + // Perform nbr_intersection of the weak edges from the undirected + // graph view + cur_graph_view.clear_edge_mask(); + + // Attach the weak edge mask + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto [intersection_offsets, intersection_indices] = per_v_pair_dst_nbr_intersection( + handle, cur_graph_view, weak_edgelist_first, weak_edgelist_last, do_expensive_check); + + // This array stores (p, q, r) which are endpoints for the triangles with weak edges + + auto triangles_endpoints = + allocate_dataframe_buffer>( + intersection_indices.size(), handle.get_stream()); + + // Extract endpoints for triangles with weak edges + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints), + extract_triangles_endpoints{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}); + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + auto unique_triangle_end = thrust::unique(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + auto num_unique_triangles = thrust::distance( // Triangles are represented by their endpoints + get_dataframe_buffer_begin(triangles_endpoints), + unique_triangle_end); + + resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream()); + + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + // Shuffle the edges with respect to the undirected graph view to the GPU + // owning edge (p, q). Remember that the triplet (p, q, r) is ordered based on the + // vertex ID and not the degree so (p, q) might not be an edge in the DODG but is + // surely an edge in the undirected graph + std::tie(triangles_endpoints, std::ignore) = groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints), + + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + unique_triangle_end = thrust::unique(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_endpoints), + get_dataframe_buffer_end(triangles_endpoints)); + + num_unique_triangles = + thrust::distance(get_dataframe_buffer_begin(triangles_endpoints), unique_triangle_end); + resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream()); + } + + auto edgelist_to_update_count = allocate_dataframe_buffer>( + 3 * num_unique_triangles, handle.get_stream()); + + // The order no longer matters since duplicated triangles have been removed + // Flatten the endpoints to a list of egdes. + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size_dataframe_buffer(edgelist_to_update_count)), + get_dataframe_buffer_begin(edgelist_to_update_count), + [num_unique_triangles, + triangles_endpoints = + get_dataframe_buffer_begin(triangles_endpoints)] __device__(auto idx) { + auto idx_triangle = idx % num_unique_triangles; + auto idx_vertex_in_triangle = idx / num_unique_triangles; + auto triangle = (triangles_endpoints + idx_triangle).get_iterator_tuple(); + vertex_t src; + vertex_t dst; + + if (idx_vertex_in_triangle == 0) { + src = *(thrust::get<0>(triangle)); + dst = *(thrust::get<1>(triangle)); + } + + if (idx_vertex_in_triangle == 1) { + src = *(thrust::get<0>(triangle)); + dst = *(thrust::get<2>(triangle)); + } + + if (idx_vertex_in_triangle == 2) { + src = *(thrust::get<1>(triangle)); + dst = *(thrust::get<2>(triangle)); + } + + return thrust::make_tuple(src, dst); + }); + + if constexpr (multi_gpu) { + std::tie(std::get<0>(edgelist_to_update_count), + std::get<1>(edgelist_to_update_count), + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(edgelist_to_update_count)), + std::move(std::get<1>(edgelist_to_update_count)), + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto unique_pair_count = + thrust::unique_count(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( + unique_pair_count, handle.get_stream()); + + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); + + thrust::reduce_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), + thrust::make_constant_iterator(size_t{1}), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + decrease_count.begin(), + thrust::equal_to>{}); + + std::tie(std::get<0>(vertex_pair_buffer_unique), + std::get<1>(vertex_pair_buffer_unique), + decrease_count) = + extract_transform_e( + handle, + cur_graph_view, + edge_src_out_degrees.view(), + edge_dst_out_degrees.view(), + edge_dummy_property_t{}.view(), + extract_low_to_high_degree_edges_from_endpoints_t{ + raft::device_span(std::get<0>(vertex_pair_buffer_unique).data(), + std::get<0>(vertex_pair_buffer_unique).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_unique).data(), + std::get<1>(vertex_pair_buffer_unique).size()), + raft::device_span(decrease_count.data(), decrease_count.size())}); + + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + std::forward_as_tuple(std::tie(std::get<0>(vertex_pair_buffer_unique), + std::get<1>(vertex_pair_buffer_unique), + decrease_count), + std::ignore) = + groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<1>(vertex_pair_buffer_unique).begin(), + decrease_count.begin()), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).end(), + decrease_count.end()), + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + } + + thrust::sort_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + get_dataframe_buffer_end(vertex_pair_buffer_unique), + decrease_count.begin()); + + // Update count of weak edges + edges_to_decrement_count.clear(); + + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).begin()); + + cur_graph_view.clear_edge_mask(); + // Check for edge existance on the directed graph view + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + // Update count of weak edges from the DODG view + cugraph::transform_e( + handle, + cur_graph_view, + edges_to_decrement_count, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [edge_buffer_first = + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<1>(vertex_pair_buffer_unique).begin()), + edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).end()), + decrease_count = raft::device_span( + decrease_count.data(), decrease_count.size())] __device__(auto src, + auto dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + edge_t count) { + auto itr_pair = thrust::lower_bound( + thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst)); + auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); + count -= decrease_count[idx_pair]; + + return count; + }, + edge_triangle_counts.mutable_view(), + do_expensive_check); + + edgelist_weak.clear(); + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end())); - // Mask all the edges that have k - 2 count + edgelist_weak.insert( + weak_edgelist_srcs.begin(), weak_edgelist_srcs.end(), weak_edgelist_dsts.begin()); + + // Get undirected graph view + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); cugraph::transform_e( handle, cur_graph_view, + edgelist_weak, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [k] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - return count >= k - 2; + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) { + return false; }, - edge_mask.mutable_view(), - false); + weak_edges_mask.mutable_view(), + do_expensive_check); + + edgelist_weak.clear(); + + // shuffle the edges if multi_gpu + if constexpr (multi_gpu) { + std::tie(weak_edgelist_dsts, + weak_edgelist_srcs, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(weak_edgelist_dsts), + std::move(weak_edgelist_srcs), + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()), + thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end())); + + edgelist_weak.insert( + weak_edgelist_dsts.begin(), weak_edgelist_dsts.end(), weak_edgelist_srcs.begin()); + + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_weak, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) { + return false; + }, + weak_edges_mask.mutable_view(), + do_expensive_check); - cur_graph_view.attach_edge_mask(edge_mask.view()); + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } + + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); } + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + cugraph::transform_e( + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [] __device__(auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, auto count) { + return count == 0 ? false : true; + }, + dodg_mask.mutable_view(), + do_expensive_check); + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; @@ -362,11 +728,10 @@ k_truss(raft::handle_t const& handle, decompress_to_edgelist( handle, cur_graph_view, - edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, + edge_weight_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - std::make_optional( - raft::device_span((*renumber_map).data(), (*renumber_map).size()))); + std::optional>{std::nullopt}); std::tie(edgelist_srcs, edgelist_dsts, diff --git a/cpp/src/community/triangle_count_impl.cuh b/cpp/src/community/triangle_count_impl.cuh index 100451f06f3..6718d08cdd7 100644 --- a/cpp/src/community/triangle_count_impl.cuh +++ b/cpp/src/community/triangle_count_impl.cuh @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -64,19 +64,20 @@ struct is_two_or_greater_t { template struct extract_low_to_high_degree_edges_t { - __device__ thrust::optional> operator()(vertex_t src, - vertex_t dst, - edge_t src_out_degree, - edge_t dst_out_degree, - thrust::nullopt_t) const + __device__ cuda::std::optional> operator()( + vertex_t src, + vertex_t dst, + edge_t src_out_degree, + edge_t dst_out_degree, + cuda::std::nullopt_t) const { return (src_out_degree < dst_out_degree) - ? thrust::optional>{thrust::make_tuple(src, dst)} + ? cuda::std::optional>{thrust::make_tuple(src, dst)} : (((src_out_degree == dst_out_degree) && (src < dst) /* tie-breaking using vertex ID */) - ? thrust::optional>{thrust::make_tuple(src, - dst)} - : thrust::nullopt); + ? cuda::std::optional>{thrust::make_tuple(src, + dst)} + : cuda::std::nullopt); } }; @@ -85,8 +86,8 @@ struct intersection_op_t { __device__ thrust::tuple operator()( vertex_t, vertex_t, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, raft::device_span intersection) const { return thrust::make_tuple(static_cast(intersection.size()), diff --git a/cpp/src/components/mis_impl.cuh b/cpp/src/components/mis_impl.cuh index b593c639946..2714d7e3d63 100644 --- a/cpp/src/components/mis_impl.cuh +++ b/cpp/src/components/mis_impl.cuh @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,13 +32,13 @@ #include #include #include -#include #include #include #include #include #include +#include namespace cugraph { diff --git a/cpp/src/components/vertex_coloring_impl.cuh b/cpp/src/components/vertex_coloring_impl.cuh index fa7fb1f6099..7ad06d12027 100644 --- a/cpp/src/components/vertex_coloring_impl.cuh +++ b/cpp/src/components/vertex_coloring_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,8 @@ #include #include +#include + namespace cugraph { namespace detail { @@ -52,7 +54,8 @@ rmm::device_uvector vertex_coloring( edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) { return !(src == dst); // mask out self-loop }, edge_masks_even.mutable_view()); @@ -119,7 +122,7 @@ rmm::device_uvector vertex_coloring( is_vertex_in_mis.begin(), vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [color_id] __device__( - auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) { + auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, cuda::std::nullopt_t) { return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true})); }, edge_masks_odd.mutable_view()); @@ -140,7 +143,7 @@ rmm::device_uvector vertex_coloring( is_vertex_in_mis.begin(), vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [color_id] __device__( - auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) { + auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, cuda::std::nullopt_t) { return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true})); }, edge_masks_even.mutable_view()); diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh index e791f4dcad3..46db347e0bc 100644 --- a/cpp/src/components/weakly_connected_components_impl.cuh +++ b/cpp/src/components/weakly_connected_components_impl.cuh @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -45,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -189,11 +189,11 @@ struct e_op_t { EdgeIterator edge_buffer_first{}; size_t* num_edge_inserts{}; - __device__ thrust::optional operator()(thrust::tuple tagged_src, - vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) const + __device__ cuda::std::optional operator()(thrust::tuple tagged_src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { auto tag = thrust::get<1>(tagged_src); auto dst_offset = dst - dst_first; @@ -207,8 +207,8 @@ struct e_op_t { *(edge_buffer_first + edge_idx) = tag >= old ? thrust::make_tuple(tag, old) : thrust::make_tuple(old, tag); } - return old == invalid_component_id::value ? thrust::optional{tag} - : thrust::nullopt; + return old == invalid_component_id::value ? cuda::std::optional{tag} + : cuda::std::nullopt; } }; @@ -231,9 +231,10 @@ struct v_op_t { size_t bucket_idx_conflict{}; // relevant only if GraphViewType::is_multi_gpu is true template - __device__ std::enable_if_t, thrust::optional>> - operator()(thrust::tuple tagged_v, int /* v_val */) const + __device__ + std::enable_if_t, cuda::std::optional>> + operator()(thrust::tuple tagged_v, int /* v_val */) const { auto tag = thrust::get<1>(tagged_v); auto v_offset = @@ -242,22 +243,23 @@ struct v_op_t { auto old = invalid_component_id::value; bool success = v_component.compare_exchange_strong(old, tag, cuda::std::memory_order_relaxed); if (!success && (old != tag)) { // conflict - return thrust::make_tuple(thrust::optional{bucket_idx_conflict}, - thrust::optional{std::byte{0}} /* dummy */); + return thrust::make_tuple(cuda::std::optional{bucket_idx_conflict}, + cuda::std::optional{std::byte{0}} /* dummy */); } else { return thrust::make_tuple( - success ? thrust::optional{bucket_idx_next} : thrust::nullopt, - success ? thrust::optional{std::byte{0}} /* dummy */ : thrust::nullopt); + success ? cuda::std::optional{bucket_idx_next} : cuda::std::nullopt, + success ? cuda::std::optional{std::byte{0}} /* dummy */ : cuda::std::nullopt); } } template - __device__ std::enable_if_t, thrust::optional>> - operator()(thrust::tuple /* tagged_v */, int /* v_val */) const + __device__ + std::enable_if_t, cuda::std::optional>> + operator()(thrust::tuple /* tagged_v */, int /* v_val */) const { - return thrust::make_tuple(thrust::optional{bucket_idx_next}, - thrust::optional{std::byte{0}} /* dummy */); + return thrust::make_tuple(cuda::std::optional{bucket_idx_next}, + cuda::std::optional{std::byte{0}} /* dummy */); } }; diff --git a/cpp/src/cores/core_number_impl.cuh b/cpp/src/cores/core_number_impl.cuh index a2b6f6430f0..f1ff0912002 100644 --- a/cpp/src/cores/core_number_impl.cuh +++ b/cpp/src/cores/core_number_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include +#include #include #include #include @@ -35,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -53,10 +53,10 @@ struct e_op_t { size_t k{}; edge_t delta{}; - __device__ thrust::optional operator()( - vertex_t, vertex_t, thrust::nullopt_t, edge_t dst_val, thrust::nullopt_t) const + __device__ cuda::std::optional operator()( + vertex_t, vertex_t, cuda::std::nullopt_t, edge_t dst_val, cuda::std::nullopt_t) const { - return dst_val >= k ? thrust::optional{delta} : thrust::nullopt; + return dst_val >= k ? cuda::std::optional{delta} : cuda::std::nullopt; } }; @@ -251,8 +251,8 @@ void core_number(raft::handle_t const& handle, auto new_core_number = v_val >= pushed_val ? v_val - pushed_val : edge_t{0}; new_core_number = new_core_number < (k - delta) ? (k - delta) : new_core_number; new_core_number = new_core_number < k_first ? edge_t{0} : new_core_number; - return thrust::make_tuple(thrust::optional{bucket_idx_next}, - thrust::optional{new_core_number}); + return thrust::make_tuple(cuda::std::optional{bucket_idx_next}, + cuda::std::optional{new_core_number}); }); } diff --git a/cpp/src/detail/permute_range.cuh b/cpp/src/detail/permute_range.cuh index c7cd57c2048..a9d1b27f52d 100644 --- a/cpp/src/detail/permute_range.cuh +++ b/cpp/src/detail/permute_range.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,7 +58,7 @@ rmm::device_uvector permute_range(raft::handle_t const& handle, sub_range_sizes.begin(), sub_range_sizes.end(), sub_range_sizes.begin(), global_start); CUGRAPH_EXPECTS( sub_range_sizes[comm_rank] == local_range_start, - "Invalid input arguments: a rage must have contiguous and non-overlapping values"); + "Invalid input arguments: a range must have contiguous and non-overlapping values"); } rmm::device_uvector permuted_integers(local_range_size, handle.get_stream()); diff --git a/cpp/src/detail/permute_range_v32.cu b/cpp/src/detail/permute_range_v32.cu index 6a7bc059901..91d23487f03 100644 --- a/cpp/src/detail/permute_range_v32.cu +++ b/cpp/src/detail/permute_range_v32.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,23 +16,6 @@ #include "detail/permute_range.cuh" -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include - namespace cugraph { namespace detail { diff --git a/cpp/src/detail/permute_range_v64.cu b/cpp/src/detail/permute_range_v64.cu index ad7daf16419..a6dbc9a72ae 100644 --- a/cpp/src/detail/permute_range_v64.cu +++ b/cpp/src/detail/permute_range_v64.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,23 +16,6 @@ #include "detail/permute_range.cuh" -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include - namespace cugraph { namespace detail { diff --git a/cpp/src/lookup/lookup_src_dst_impl.cuh b/cpp/src/lookup/lookup_src_dst_impl.cuh index 45bbf870d80..dd03e621022 100644 --- a/cpp/src/lookup/lookup_src_dst_impl.cuh +++ b/cpp/src/lookup/lookup_src_dst_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,8 @@ #include +#include + namespace cugraph { template @@ -370,7 +372,7 @@ EdgeTypeAndIdToSrcDstLookupContainerType build_edge_id_and_type_to_src_dst_looku EdgeIdInputWrapper edge_id_view, EdgeTypeInputWrapper edge_type_view) { - static_assert(!std::is_same_v, + static_assert(!std::is_same_v, "Can not create edge id lookup table without edge ids"); using vertex_t = typename GraphViewType::vertex_type; @@ -411,17 +413,17 @@ EdgeTypeAndIdToSrcDstLookupContainerType build_edge_id_and_type_to_src_dst_looku cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), view_concat(edge_id_view, edge_type_view), - cuda::proclaim_return_type>>( + cuda::proclaim_return_type>>( [key_func = cugraph::detail::compute_gpu_id_from_ext_edge_id_t{ comm_size, major_comm_size, minor_comm_size}] __device__(auto, auto, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, thrust::tuple id_and_type) { - return thrust::optional>{thrust::make_tuple( + return cuda::std::optional>{thrust::make_tuple( key_func(thrust::get<0>(id_and_type)), thrust::get<1>(id_and_type))}; })); @@ -518,9 +520,9 @@ EdgeTypeAndIdToSrcDstLookupContainerType build_edge_id_and_type_to_src_dst_looku cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), edge_type_view, - cuda::proclaim_return_type>( - [] __device__(auto, auto, thrust::nullopt_t, thrust::nullopt_t, edge_type_t et) { - return thrust::optional{et}; + cuda::proclaim_return_type>( + [] __device__(auto, auto, cuda::std::nullopt_t, cuda::std::nullopt_t, edge_type_t et) { + return cuda::std::optional{et}; })); thrust::sort(handle.get_thrust_policy(), edge_types.begin(), edge_types.end()); diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index 2b89d214fd7..9816753852f 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -47,7 +48,6 @@ #include #include #include -#include #include #include @@ -138,7 +138,7 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, BufferKeyOutputIterator buffer_key_output_first, BufferValueOutputIterator buffer_value_output_first, size_t* buffer_idx_ptr, @@ -238,7 +238,7 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree( if (edge_partition_e_mask) { for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{cuda::std::nullopt}; if (i < static_cast(num_edges_this_warp)) { auto key_idx_this_warp = static_cast(thrust::distance( @@ -261,7 +261,7 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree( } } else { for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{cuda::std::nullopt}; if (i < static_cast(num_edges_this_warp)) { auto key_idx_this_warp = static_cast(thrust::distance( @@ -304,7 +304,7 @@ __global__ static void extract_transform_v_frontier_e_mid_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, BufferKeyOutputIterator buffer_key_output_first, BufferValueOutputIterator buffer_value_output_first, size_t* buffer_idx_ptr, @@ -359,7 +359,7 @@ __global__ static void extract_transform_v_frontier_e_mid_degree( if (edge_partition_e_mask) { for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{cuda::std::nullopt}; if ((i < static_cast(local_degree)) && ((*edge_partition_e_mask).get(local_edge_offset + i))) { e_op_result = call_e_op(i); @@ -370,7 +370,7 @@ __global__ static void extract_transform_v_frontier_e_mid_degree( } } else { for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{cuda::std::nullopt}; if (i < static_cast(local_degree)) { e_op_result = call_e_op(i); } warp_push_buffer_elements( @@ -400,7 +400,7 @@ __global__ static void extract_transform_v_frontier_e_high_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, BufferKeyOutputIterator buffer_key_output_first, BufferValueOutputIterator buffer_value_output_first, size_t* buffer_idx_ptr, @@ -429,7 +429,7 @@ __global__ static void extract_transform_v_frontier_e_high_degree( ((static_cast(num_edges) + (raft::warp_size() - 1)) / raft::warp_size()) * raft::warp_size(); while (idx < rounded_up_num_edges) { - e_op_result_t e_op_result{thrust::nullopt}; + e_op_result_t e_op_result{cuda::std::nullopt}; if (idx < num_edges) { auto key_idx = thrust::distance( key_local_degree_offsets.begin() + 1, @@ -494,7 +494,7 @@ void extract_transform_v_frontier_e_edge_partition( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, OptionalOutputKeyIterator output_key_first, OptionalOutputValueIterator output_value_first, raft::device_span count /* size = 1 */, @@ -665,21 +665,21 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, EdgeOp>::type; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, edge_partition_endpoint_dummy_property_device_view_t, edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, edge_partition_endpoint_dummy_property_device_view_t, edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -692,12 +692,12 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, static_assert(!std::is_same_v); static_assert( std::is_same_v && - !std::is_same_v, - thrust::optional>, - std::conditional_t, - thrust::optional, - thrust::optional>>>); + std::conditional_t< + !std::is_same_v && !std::is_same_v, + cuda::std::optional>, + std::conditional_t, + cuda::std::optional, + cuda::std::optional>>>); constexpr bool try_bitmap = GraphViewType::is_multi_gpu && std::is_same_v && KeyBucketType::is_sorted_unique; @@ -1401,10 +1401,10 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(partition_idx)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, partition_idx) - : thrust::nullopt; + : cuda::std::nullopt; size_t num_streams_per_loop{1}; if (stream_pool_indices) { assert((*stream_pool_indices).size() >= num_concurrent_loops); diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 847c1db6937..d21a8153dc6 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -49,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -106,7 +106,7 @@ struct update_rx_major_local_degree_t { int minor_comm_size{}; edge_partition_device_view_t edge_partition{}; - thrust::optional> + cuda::std::optional> edge_partition_e_mask{}; size_t reordered_idx_first{}; @@ -155,7 +155,7 @@ struct update_rx_major_local_nbrs_t { edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; - thrust::optional> + cuda::std::optional> edge_partition_e_mask{}; size_t reordered_idx_first{}; @@ -214,7 +214,7 @@ struct update_rx_major_local_nbrs_t { if (local_degree > 0) { if (edge_partition_e_mask) { auto mask_first = (*edge_partition_e_mask).value_first(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto input_first = thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()); copy_if_mask_set(input_first, @@ -233,7 +233,7 @@ struct update_rx_major_local_nbrs_t { local_degree); } } else { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto input_first = thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()) + edge_offset; @@ -278,7 +278,7 @@ struct pick_min_degree_t { raft::device_span second_element_offsets{}; edge_partition_device_view_t edge_partition{}; - thrust::optional> + cuda::std::optional> edge_partition_e_mask{}; __device__ edge_t operator()(thrust::tuple pair) const @@ -413,7 +413,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; - thrust::optional> + cuda::std::optional> edge_partition_e_mask{}; VertexPairIterator vertex_pair_first; @@ -430,7 +430,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { auto pair = *(vertex_pair_first + i); vertex_t const* indices0{}; - std::conditional_t, + std::conditional_t, edge_property_value_t const*, void*> edge_property_values0{}; @@ -439,7 +439,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_t local_degree0{0}; if constexpr (std::is_same_v) { indices0 = edge_partition.indices(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_property_values0 = edge_partition_e_value_input.value_first(); } @@ -468,7 +468,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } } else { indices0 = first_element_indices.begin(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_property_values0 = first_element_edge_property_values; } @@ -478,7 +478,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } vertex_t const* indices1{}; - std::conditional_t, + std::conditional_t, edge_property_value_t const*, void*> edge_property_values1{}; @@ -487,7 +487,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_t local_degree1{0}; if constexpr (std::is_same_v) { indices1 = edge_partition.indices(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_property_values1 = edge_partition_e_value_input.value_first(); } @@ -516,7 +516,7 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { } } else { indices1 = second_element_indices.begin(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_property_values1 = second_element_edge_property_values; } @@ -618,7 +618,7 @@ struct gatherv_indices_t { // in a single warp (better optimize if this becomes a performance bottleneck) for (int j = 0; j < minor_comm_size; ++j) { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto zipped_gathered_begin = thrust::make_zip_iterator( thrust::make_tuple(gathered_intersection_indices.begin(), gathered_nbr_intersection_e_property_values0, @@ -664,7 +664,7 @@ struct gatherv_indices_t { // number of groups" is recommended for load-balancing. template std::conditional_t< - !std::is_same_v, + !std::is_same_v, std::tuple, rmm::device_uvector, rmm::device_uvector, @@ -684,7 +684,7 @@ nbr_intersection(raft::handle_t const& handle, using edge_property_value_t = typename EdgeValueInputIterator::value_type; using edge_partition_e_input_device_view_t = - std::conditional_t, + std::conditional_t, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -692,16 +692,16 @@ nbr_intersection(raft::handle_t const& handle, edge_property_value_t>>; using optional_property_buffer_value_type = - std::conditional_t, + std::conditional_t, edge_property_value_t, void>; using optional_property_buffer_view_t = - std::conditional_t, + std::conditional_t, edge_property_value_t const*, void*>; using optional_property_buffer_mutable_view_t = - std::conditional_t, + std::conditional_t, edge_property_value_t*, void*>; @@ -907,11 +907,11 @@ nbr_intersection(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail:: edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); auto reordered_idx_first = (i == size_t{0}) ? size_t{0} : h_rx_reordered_group_lasts[i * major_comm_size - 1]; @@ -950,7 +950,7 @@ nbr_intersection(raft::handle_t const& handle, optional_property_buffer_mutable_view_t optional_local_e_property_values{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { local_e_property_values_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), handle.get_stream()); optional_local_e_property_values = local_e_property_values_for_rx_majors.data(); @@ -964,11 +964,11 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_e_input_device_view_t(edge_value_input, i); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail:: edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); auto reordered_idx_first = @@ -1045,7 +1045,7 @@ nbr_intersection(raft::handle_t const& handle, std::tie(major_nbr_indices, std::ignore) = shuffle_values( major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { std::tie(major_e_property_values, std::ignore) = shuffle_values(major_comm, local_e_property_values_for_rx_majors.begin(), @@ -1132,16 +1132,18 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions()); - [[maybe_unused]] std::conditional_t, - std::vector>, - std::byte /* dummy */> + [[maybe_unused]] std::conditional_t< + !std::is_same_v, + std::vector>, + std::byte /* dummy */> edge_partition_nbr_intersection_e_property_values0{}; - [[maybe_unused]] std::conditional_t, - std::vector>, - std::byte /* dummy */> + [[maybe_unused]] std::conditional_t< + !std::is_same_v, + std::vector>, + std::byte /* dummy */> edge_partition_nbr_intersection_e_property_values1{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_partition_nbr_intersection_e_property_values0.reserve( graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_e_property_values1.reserve( @@ -1198,10 +1200,10 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_e_input_device_view_t(edge_value_input, i); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); @@ -1249,7 +1251,7 @@ nbr_intersection(raft::handle_t const& handle, optional_property_buffer_mutable_view_t rx_v_pair_optional_nbr_intersection_e_property_values1{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { rx_v_pair_nbr_intersection_e_property_values0.resize( rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); rx_v_pair_nbr_intersection_e_property_values1.resize( @@ -1264,7 +1266,7 @@ nbr_intersection(raft::handle_t const& handle, if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { optional_property_buffer_view_t optional_major_e_property_values{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { optional_major_e_property_values = major_e_property_values.data(); } @@ -1309,7 +1311,7 @@ nbr_intersection(raft::handle_t const& handle, CUGRAPH_FAIL("unimplemented."); } - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { rx_v_pair_nbr_intersection_indices.resize( thrust::distance(rx_v_pair_nbr_intersection_indices.begin(), thrust::remove(handle.get_thrust_policy(), @@ -1515,7 +1517,7 @@ nbr_intersection(raft::handle_t const& handle, rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { device_multicast_sendrecv(minor_comm, rx_v_pair_nbr_intersection_e_property_values0.begin(), rx_v_pair_nbr_intersection_index_tx_counts, @@ -1548,7 +1550,7 @@ nbr_intersection(raft::handle_t const& handle, gathered_nbr_intersection_e_property_values1.size(), handle.get_stream()); } - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), @@ -1598,7 +1600,7 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_sizes.push_back(std::move(combined_nbr_intersection_sizes)); edge_partition_nbr_intersection_indices.push_back( std::move(combined_nbr_intersection_indices)); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { edge_partition_nbr_intersection_e_property_values0.push_back( std::move(combined_nbr_intersection_e_property_values0)); edge_partition_nbr_intersection_e_property_values1.push_back( @@ -1612,7 +1614,7 @@ nbr_intersection(raft::handle_t const& handle, num_nbr_intersection_indices += edge_partition_nbr_intersection_indices[i].size(); } nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(), handle.get_stream()); nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(), @@ -1631,7 +1633,7 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_indices[i].end(), nbr_intersection_indices.begin() + index_offset); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { thrust::copy(handle.get_thrust_policy(), edge_partition_nbr_intersection_e_property_values0[i].begin(), edge_partition_nbr_intersection_e_property_values0[i].end(), @@ -1660,10 +1662,10 @@ nbr_intersection(raft::handle_t const& handle, auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, 0); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, 0) - : thrust::nullopt; + : cuda::std::nullopt; rmm::device_uvector nbr_intersection_sizes( input_size, @@ -1699,7 +1701,7 @@ nbr_intersection(raft::handle_t const& handle, optional_property_buffer_mutable_view_t optional_nbr_intersection_e_property_values0{}; optional_property_buffer_mutable_view_t optional_nbr_intersection_e_property_values1{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(), handle.get_stream()); nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(), @@ -1770,7 +1772,7 @@ nbr_intersection(raft::handle_t const& handle, size_t{1} << 27, static_cast(thrust::distance(nbr_intersection_indices.begin() + num_scanned, nbr_intersection_indices.end()))); - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { num_copied += static_cast(thrust::distance( tmp_indices.begin() + num_copied, thrust::copy_if(handle.get_thrust_policy(), @@ -1804,12 +1806,12 @@ nbr_intersection(raft::handle_t const& handle, num_scanned += this_scan_size; } nbr_intersection_indices = std::move(tmp_indices); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { nbr_intersection_e_property_values0 = std::move(tmp_property_values0); nbr_intersection_e_property_values1 = std::move(tmp_property_values1); } #else - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { nbr_intersection_indices.resize( thrust::distance(nbr_intersection_indices.begin(), thrust::remove(handle.get_thrust_policy(), @@ -1845,7 +1847,7 @@ nbr_intersection(raft::handle_t const& handle, // 5. Return - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { return std::make_tuple(std::move(nbr_intersection_offsets), std::move(nbr_intersection_indices)); diff --git a/cpp/src/prims/detail/partition_v_frontier.cuh b/cpp/src/prims/detail/partition_v_frontier.cuh index 018960d9a54..f5249aec304 100644 --- a/cpp/src/prims/detail/partition_v_frontier.cuh +++ b/cpp/src/prims/detail/partition_v_frontier.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,14 +35,12 @@ #include #include #include -#include #include #include #include #include #include -#include #include namespace cugraph { diff --git a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh index c521774a50d..1e47bb53a9e 100644 --- a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh +++ b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -54,7 +55,6 @@ #include #include #include -#include #include #include #include @@ -265,7 +265,7 @@ __global__ static void per_v_transform_reduce_e_hypersparse( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, @@ -296,7 +296,7 @@ __global__ static void per_v_transform_reduce_e_hypersparse( while (idx < key_count) { key_t key{}; vertex_t major{}; - thrust::optional major_idx{}; + cuda::std::optional major_idx{}; if constexpr (use_input_key) { key = *(key_first + idx); major = thrust_tuple_get_or_identity(key); @@ -402,7 +402,7 @@ __global__ static void per_v_transform_reduce_e_low_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, @@ -512,7 +512,7 @@ __global__ static void per_v_transform_reduce_e_mid_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, @@ -596,7 +596,7 @@ __global__ static void per_v_transform_reduce_e_mid_degree( ((static_cast(local_degree) + (raft::warp_size() - 1)) / raft::warp_size()) * raft::warp_size(); for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { - thrust::optional e_op_result{thrust::nullopt}; + cuda::std::optional e_op_result{cuda::std::nullopt}; if ((i < static_cast(local_degree)) && (*edge_partition_e_mask).get(edge_offset + i) && call_pred_op(i)) { e_op_result = call_e_op(i); @@ -630,7 +630,7 @@ __global__ static void per_v_transform_reduce_e_mid_degree( ((static_cast(local_degree) + (raft::warp_size() - 1)) / raft::warp_size()) * raft::warp_size(); for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { - thrust::optional e_op_result{thrust::nullopt}; + cuda::std::optional e_op_result{cuda::std::nullopt}; if (i < static_cast(local_degree) && call_pred_op(i)) { e_op_result = call_e_op(i); } @@ -699,7 +699,7 @@ __global__ static void per_v_transform_reduce_e_high_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, @@ -790,7 +790,7 @@ __global__ static void per_v_transform_reduce_e_high_degree( per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) * per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size; for (size_t i = threadIdx.x; i < rounded_up_local_degree; i += blockDim.x) { - thrust::optional e_op_result{thrust::nullopt}; + cuda::std::optional e_op_result{cuda::std::nullopt}; if ((i < static_cast(local_degree)) && (*edge_partition_e_mask).get(edge_offset + i) && call_pred_op(i)) { e_op_result = call_e_op(i); @@ -835,7 +835,7 @@ __global__ static void per_v_transform_reduce_e_high_degree( per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) * per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size; for (size_t i = threadIdx.x; i < rounded_up_local_degree; i += blockDim.x) { - thrust::optional e_op_result{thrust::nullopt}; + cuda::std::optional e_op_result{cuda::std::nullopt}; if ((i < static_cast(local_degree)) && call_pred_op(i)) { e_op_result = call_e_op(i); } @@ -1141,7 +1141,7 @@ void per_v_transform_reduce_e_edge_partition( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultValueOutputIteratorOrWrapper output_buffer, EdgeOp e_op, T major_init, @@ -1415,21 +1415,21 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, typename iterator_value_type_or_default_t::value_type; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -1519,10 +1519,10 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(static_cast(minor_comm_rank))); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, static_cast(minor_comm_rank)) - : thrust::nullopt; + : cuda::std::nullopt; std::optional> edge_partition_stream_pool_indices{std::nullopt}; if (local_vertex_partition_segment_offsets && (handle.get_stream_pool_size() >= max_segments)) { @@ -1737,10 +1737,10 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, sorted_unique_key_first, sorted_unique_nzd_key_last, deg1_v_first = (filter_input_key && graph_view.use_dcs()) - ? thrust::make_optional(graph_view.local_vertex_partition_range_first() + - (*local_vertex_partition_segment_offsets)[3] + - *((*hypersparse_degree_offsets).rbegin() + 1)) - : thrust::nullopt, + ? cuda::std::make_optional(graph_view.local_vertex_partition_range_first() + + (*local_vertex_partition_segment_offsets)[3] + + *((*hypersparse_degree_offsets).rbegin() + 1)) + : cuda::std::nullopt, vertex_partition_range_first = graph_view.local_vertex_partition_range_first()] __device__(size_t i) { if (i == 0) { @@ -3102,10 +3102,10 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(partition_idx)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, partition_idx) - : thrust::nullopt; + : cuda::std::nullopt; size_t num_streams_per_loop{1}; if (stream_pool_indices) { assert((*stream_pool_indices).size() >= num_concurrent_loops); diff --git a/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh b/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh index dd0da77851b..3e38b85f105 100644 --- a/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh +++ b/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,12 +38,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -474,10 +474,10 @@ compute_valid_local_nbr_count_inclusive_sums( graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_frontier_major_first = aggregate_local_frontier_major_first + local_frontier_displacements[i]; @@ -1255,10 +1255,10 @@ compute_aggregate_local_frontier_local_degrees( graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_frontier_major_first = aggregate_local_frontier_major_first + local_frontier_displacements[i]; @@ -1501,9 +1501,9 @@ rmm::device_uvector convert_to_unmasked_local thrust::make_counting_iterator(size_t{0}), cuda::proclaim_return_type( [K, - key_indices = key_indices ? thrust::make_optional>( + key_indices = key_indices ? cuda::std::make_optional>( (*key_indices).data(), (*key_indices).size()) - : thrust::nullopt] __device__(size_t i) { + : cuda::std::nullopt] __device__(size_t i) { return key_indices ? (*key_indices)[i] : i / K; })); auto pair_first = thrust::make_zip_iterator(local_nbr_indices.begin(), sample_major_idx_first); @@ -1513,10 +1513,10 @@ rmm::device_uvector convert_to_unmasked_local graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_frontier_major_first = aggregate_local_frontier_major_first + local_frontier_displacements[i]; @@ -1863,10 +1863,10 @@ biased_sample_and_compute_local_nbr_indices( sample_local_random_numbers.data() + local_frontier_sample_offsets[i], local_frontier_sample_offsets[i + 1] - local_frontier_sample_offsets[i]), key_indices = - key_indices ? thrust::make_optional>( + key_indices ? cuda::std::make_optional>( (*key_indices).data() + local_frontier_sample_offsets[i], local_frontier_sample_offsets[i + 1] - local_frontier_sample_offsets[i]) - : thrust::nullopt, + : cuda::std::nullopt, key_idx_to_unique_key_idx = raft::device_span(aggregate_local_frontier_key_idx_to_unique_key_idx.data() + local_frontier_displacements[i], diff --git a/cpp/src/prims/detail/transform_v_frontier_e.cuh b/cpp/src/prims/detail/transform_v_frontier_e.cuh index 5ebcddfe8da..8f414391596 100644 --- a/cpp/src/prims/detail/transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/transform_v_frontier_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,9 +29,9 @@ #include +#include #include #include -#include #include #include @@ -389,21 +389,21 @@ auto transform_v_frontier_e(raft::handle_t const& handle, static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -424,10 +424,10 @@ auto transform_v_frontier_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_frontier_key_first = aggregate_local_frontier_key_first + local_frontier_displacements[i]; @@ -470,10 +470,10 @@ auto transform_v_frontier_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_frontier_key_first = aggregate_local_frontier_key_first + local_frontier_displacements[i]; diff --git a/cpp/src/prims/extract_transform_e.cuh b/cpp/src/prims/extract_transform_e.cuh index 5741c98d90e..bb003aa8747 100644 --- a/cpp/src/prims/extract_transform_e.cuh +++ b/cpp/src/prims/extract_transform_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,9 +69,9 @@ namespace cugraph { * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not * access edge property values). * @param e_op Quinary operator takes edge source, edge destination, property values for the source, - * property values for the destination, and property values for the edge and returns thrust::nullopt - * (if the return value is to be discarded) or a valid @p e_op output to be extracted and - * accumulated. + * property values for the destination, and property values for the edge and returns + * cuda::std::nullopt (if the return value is to be discarded) or a valid @p e_op output to be + * extracted and accumulated. * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return Dataframe buffer object storing extracted and accumulated valid @p e_op return values. */ diff --git a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh index ba227b263bc..46984d6b4e5 100644 --- a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh +++ b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,9 +57,9 @@ namespace cugraph { * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not * access edge property values). * @param e_op Quinary operator takes edge source, edge destination, property values for the source, - * property values for the destination, and property values for the edge and returns thrust::nullopt - * (if the return value is to be discarded) or a valid @p e_op output to be extracted and - * accumulated. + * property values for the destination, and property values for the edge and returns + * cuda::std::nullopt (if the return value is to be discarded) or a valid @p e_op output to be + * extracted and accumulated. * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return Dataframe buffer object storing extracted and accumulated valid @p e_op return values. */ diff --git a/cpp/src/prims/fill_edge_property.cuh b/cpp/src/prims/fill_edge_property.cuh index 54d0c454ec2..3e1383707a2 100644 --- a/cpp/src/prims/fill_edge_property.cuh +++ b/cpp/src/prims/fill_edge_property.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include +#include #include #include @@ -50,10 +51,10 @@ void fill_edge_property(raft::handle_t const& handle, for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; if constexpr (cugraph::has_packed_bool_element< std::remove_reference_t, diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index f03e8f54fb2..728c1eac2bd 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,6 +32,7 @@ #include +#include #include #include #include @@ -43,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -109,7 +109,7 @@ struct call_intersection_op_t { typename GraphViewType::edge_type, GraphViewType::is_multi_gpu> edge_partition{}; - thrust::optional> unique_vertices; + cuda::std::optional> unique_vertices; VertexValueInputIterator vertex_property_first; IntersectionOp intersection_op{}; size_t const* nbr_offsets{nullptr}; @@ -135,17 +135,17 @@ struct call_intersection_op_t { auto intersection = raft::device_span( nbr_indices + nbr_offsets[i], nbr_indices + nbr_offsets[i + 1]); - std::conditional_t, + std::conditional_t, raft::device_span, std::byte /* dummy */> property_values0{}; - std::conditional_t, + std::conditional_t, raft::device_span, std::byte /* dummy */> property_values1{}; - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { property_values0 = raft::device_span( nbr_intersection_property_values0 + nbr_offsets[i], nbr_intersection_property_values0 + +nbr_offsets[i + 1]); @@ -392,7 +392,7 @@ void per_v_pair_transform_dst_nbr_intersection( [[maybe_unused]] rmm::device_uvector r_nbr_intersection_property_values1(size_t{0}, handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { std::tie(intersection_offsets, intersection_indices, r_nbr_intersection_property_values0, @@ -430,7 +430,7 @@ void per_v_pair_transform_dst_nbr_intersection( VertexPairIterator, VertexPairValueOutputIterator>{ edge_partition, - thrust::make_optional>( + cuda::std::make_optional>( (*sorted_unique_vertices).data(), (*sorted_unique_vertices).size()), vertex_value_input_for_sorted_unique_vertices_first, intersection_op, @@ -442,28 +442,29 @@ void per_v_pair_transform_dst_nbr_intersection( vertex_pair_first, vertex_pair_value_output_first}); } else { - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(this_chunk_size), - detail::call_intersection_op_t< - GraphViewType, - VertexValueInputIterator, - typename decltype(r_nbr_intersection_property_values0)::const_pointer, - IntersectionOp, - decltype(chunk_vertex_pair_index_first), - VertexPairIterator, - VertexPairValueOutputIterator>{ - edge_partition, - thrust::optional>{thrust::nullopt}, - vertex_value_input_first, - intersection_op, - intersection_offsets.data(), - intersection_indices.data(), - r_nbr_intersection_property_values0.data(), - r_nbr_intersection_property_values1.data(), - chunk_vertex_pair_index_first, - vertex_pair_first, - vertex_pair_value_output_first}); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(this_chunk_size), + detail::call_intersection_op_t< + GraphViewType, + VertexValueInputIterator, + typename decltype(r_nbr_intersection_property_values0)::const_pointer, + IntersectionOp, + decltype(chunk_vertex_pair_index_first), + VertexPairIterator, + VertexPairValueOutputIterator>{ + edge_partition, + cuda::std::optional>{cuda::std::nullopt}, + vertex_value_input_first, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + r_nbr_intersection_property_values0.data(), + r_nbr_intersection_property_values1.data(), + chunk_vertex_pair_index_first, + vertex_pair_first, + vertex_pair_value_output_first}); } chunk_vertex_pair_index_first += this_chunk_size; diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 30706632ad2..812e0a9b926 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,11 +35,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -93,7 +93,7 @@ struct transform_local_nbr_indices_t { using edge_t = typename GraphViewType::edge_type; edge_partition_device_view_t edge_partition{}; - thrust::optional local_key_indices{thrust::nullopt}; + cuda::std::optional local_key_indices{cuda::std::nullopt}; KeyIterator key_first{}; LocalNbrIdxIterator local_nbr_idx_first{}; EdgePartitionSrcValueInputWrapper edge_partition_src_value_input; @@ -101,7 +101,7 @@ struct transform_local_nbr_indices_t { EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input; EdgeOp e_op{}; edge_t invalid_idx{}; - thrust::optional invalid_value{thrust::nullopt}; + cuda::std::optional invalid_value{cuda::std::nullopt}; size_t K{}; __device__ T operator()(size_t i) const @@ -241,21 +241,21 @@ per_v_random_select_transform_e(raft::handle_t const& handle, using key_buffer_t = dataframe_buffer_type_t; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, edge_partition_endpoint_dummy_property_device_view_t, edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, edge_partition_endpoint_dummy_property_device_view_t, edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -430,7 +430,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, EdgeOp, T>{ edge_partition, - thrust::make_optional(edge_partition_sample_key_index_first), + cuda::std::make_optional(edge_partition_sample_key_index_first), edge_partition_key_list_first, edge_partition_sample_local_nbr_index_first, edge_partition_src_value_input, @@ -454,7 +454,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, edge_partition_e_input_device_view_t, EdgeOp, T>{edge_partition, - thrust::nullopt, + cuda::std::nullopt, edge_partition_key_list_first, edge_partition_sample_local_nbr_index_first, edge_partition_src_value_input, diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index c13816242bc..4ebda9d42c4 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -49,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -89,15 +89,16 @@ struct tuple_to_minor_comm_rank_t { int minor_comm_size{}; template - __device__ std::enable_if_t, int> operator()( + __device__ std::enable_if_t, int> + operator()( thrust::tuple val /* major, minor key, edge value */) const { return key_func(thrust::get<1>(val)) % minor_comm_size; } template - __device__ std::enable_if_t, int> operator()( - thrust::tuple val /* major, minor key */) const + __device__ std::enable_if_t, int> + operator()(thrust::tuple val /* major, minor key */) const { return key_func(thrust::get<1>(val)) % minor_comm_size; } @@ -123,13 +124,13 @@ template struct call_key_aggregated_e_op_t { EdgePartitionDeviceView edge_partition{}; - thrust::optional edge_major_value_map{}; + cuda::std::optional edge_major_value_map{}; EdgePartitionMajorValueInputWrapper edge_partition_major_value_input{}; EdgeMinorKeyValueMap edge_minor_key_value_map{}; KeyAggregatedEdgeOp key_aggregated_e_op{}; template - __device__ std::enable_if_t, e_op_result_t> + __device__ std::enable_if_t, e_op_result_t> operator()(thrust::tuple val /* major, minor key, aggregated edge value */) const { @@ -145,7 +146,7 @@ struct call_key_aggregated_e_op_t { } template - __device__ std::enable_if_t, e_op_result_t> + __device__ std::enable_if_t, e_op_result_t> operator()(thrust::tuple val /* major, minor key */) const { auto major = thrust::get<0>(val); @@ -155,7 +156,7 @@ struct call_key_aggregated_e_op_t { : edge_partition_major_value_input.get( edge_partition.major_offset_from_major_nocheck(major)); return key_aggregated_e_op( - major, minor_key, major_val, edge_minor_key_value_map.find(minor_key), thrust::nullopt); + major, minor_key, major_val, edge_minor_key_value_map.find(minor_key), cuda::std::nullopt); } }; @@ -284,16 +285,16 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( using edge_value_t = typename EdgeValueInputWrapper::value_type; using kv_pair_value_t = typename KVStoreViewType::value_type; using optional_edge_value_buffer_value_type = - std::conditional_t, edge_value_t, void>; + std::conditional_t, edge_value_t, void>; static_assert( - std::is_same_v || std::is_arithmetic_v, + std::is_same_v || std::is_arithmetic_v, "Currently only scalar values are supported, should be extended to support thrust::tuple of " "arithmetic types and void (for dummy property values) to be consistent with other " "primitives."); // this will also require a custom edge value aggregation op. using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, @@ -303,7 +304,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( vertex_t, typename EdgeDstKeyInputWrapper::value_iterator>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -315,7 +316,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( auto total_global_mem = handle.get_device_properties().totalGlobalMem; size_t element_size = sizeof(vertex_t) * 2; // major + minor keys - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); if constexpr (is_thrust_tuple_of_arithmetic::value) { element_size += sum_thrust_tuple_element_sizes(); @@ -323,7 +324,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( element_size += sizeof(edge_value_t); } } - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); if constexpr (is_thrust_tuple_of_arithmetic::value) { element_size += sum_thrust_tuple_element_sizes(); @@ -350,10 +351,10 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; auto edge_partition_src_value_input = edge_partition_src_input_device_view_t(edge_src_value_input, i); @@ -472,7 +473,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( 1, handle.get_stream()); handle.sync_stream(); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { detail::copy_if_mask_set( handle, thrust::make_zip_iterator(minor_key_first, @@ -505,7 +506,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( (offsets_with_mask ? (*offsets_with_mask).data() : edge_partition.offsets()) + h_vertex_offsets[j], detail::rebase_offset_t{h_edge_offsets[j]}); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { cub::DeviceSegmentedSort::SortPairs( static_cast(nullptr), tmp_storage_bytes, @@ -536,7 +537,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( if (tmp_storage_bytes > d_tmp_storage.size()) { d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); } - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { cub::DeviceSegmentedSort::SortPairs( d_tmp_storage.data(), tmp_storage_bytes, @@ -573,7 +574,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( thrust::make_zip_iterator(unreduced_majors.begin(), unreduced_minor_keys.begin()); auto output_key_first = thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { reduced_size += thrust::distance(output_key_first + reduced_size, thrust::get<0>(thrust::reduce_by_key( @@ -626,7 +627,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( auto const minor_comm_size = minor_comm.get_size(); rmm::device_uvector d_tx_value_counts(0, handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto triplet_first = thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin(), @@ -782,7 +783,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( tmp_minor_keys.resize(0, handle.get_stream()); tmp_minor_keys.shrink_to_fit(handle.get_stream()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { std::tie(rx_key_aggregated_edge_values, std::ignore) = shuffle_values(minor_comm, detail::get_optional_dataframe_buffer_begin( @@ -795,7 +796,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( detail::shrink_to_fit_optional_dataframe_buffer( tmp_key_aggregated_edge_values, handle.get_stream()); } else { - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto triplet_first = thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin(), @@ -820,7 +821,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( } auto key_pair_first = thrust::make_zip_iterator(rx_majors.begin(), rx_minor_keys.begin()); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { if (rx_majors.size() > mem_frugal_threshold) { // trade-off parallelism to lower peak memory auto second_first = @@ -956,15 +957,15 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( auto major_value_map_device_view = (GraphViewType::is_multi_gpu && edge_src_value_input.keys()) - ? thrust::make_optionalview())>>(multi_gpu_major_value_map_ptr->view()) - : thrust::nullopt; + : cuda::std::nullopt; std::conditional_t, detail::kv_cuco_store_find_device_view_t> dst_key_value_map_device_view( GraphViewType::is_multi_gpu ? multi_gpu_minor_key_value_map_ptr->view() : kv_store_view); - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { auto triplet_first = thrust::make_zip_iterator( tmp_majors.begin(), tmp_minor_keys.begin(), diff --git a/cpp/src/prims/property_op_utils.cuh b/cpp/src/prims/property_op_utils.cuh index 04ad22cbf71..2cab42c1dc8 100644 --- a/cpp/src/prims/property_op_utils.cuh +++ b/cpp/src/prims/property_op_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -127,9 +127,7 @@ template typename Op> struct property_op : public Op {}; template typename Op> -struct property_op, Op> - : public thrust:: - binary_function, thrust::tuple, thrust::tuple> { +struct property_op, Op> { using Type = thrust::tuple; private: diff --git a/cpp/src/prims/transform_e.cuh b/cpp/src/prims/transform_e.cuh index aaa2703f1ae..d5d64f708ba 100644 --- a/cpp/src/prims/transform_e.cuh +++ b/cpp/src/prims/transform_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include +#include #include #include #include @@ -273,21 +274,21 @@ void transform_e(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -306,10 +307,10 @@ void transform_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; edge_partition_src_input_device_view_t edge_partition_src_value_input{}; edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; @@ -464,21 +465,21 @@ void transform_e(raft::handle_t const& handle, std::is_same_v>); using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -541,10 +542,10 @@ void transform_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; if (do_expensive_check) { CUGRAPH_EXPECTS( diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index c938b10fbbb..eef34938c57 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,6 +32,7 @@ #include +#include #include #include #include @@ -43,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -249,14 +249,14 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( using weight_t = float; // dummy using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, diff --git a/cpp/src/prims/transform_reduce_e.cuh b/cpp/src/prims/transform_reduce_e.cuh index 43722550c58..1e45fea0608 100644 --- a/cpp/src/prims/transform_reduce_e.cuh +++ b/cpp/src/prims/transform_reduce_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,11 +35,11 @@ #include +#include #include #include #include #include -#include #include #include #include @@ -68,7 +68,7 @@ __global__ static void transform_reduce_e_hypersparse( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultIterator result_iter /* size 1 */, EdgeOp e_op) { @@ -162,7 +162,7 @@ __global__ static void transform_reduce_e_low_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultIterator result_iter /* size 1 */, EdgeOp e_op) { @@ -251,7 +251,7 @@ __global__ static void transform_reduce_e_mid_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultIterator result_iter /* size 1 */, EdgeOp e_op) { @@ -329,7 +329,7 @@ __global__ static void transform_reduce_e_high_degree( EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, + cuda::std::optional edge_partition_e_mask, ResultIterator result_iter /* size 1 */, EdgeOp e_op) { @@ -445,21 +445,21 @@ T transform_reduce_e(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -486,10 +486,10 @@ T transform_reduce_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; edge_partition_src_input_device_view_t edge_partition_src_value_input{}; edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; diff --git a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh index 3abce6f8bd5..8786336bd10 100644 --- a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh +++ b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,10 +31,10 @@ #include +#include #include #include #include -#include #include #include #include @@ -108,7 +108,7 @@ __global__ static void transform_reduce_by_src_dst_key_hypersparse( EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, EdgePartitionEdgeMaskWrapper edge_partition_e_mask, - thrust::optional> + cuda::std::optional> edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, @@ -198,7 +198,7 @@ __global__ static void transform_reduce_by_src_dst_key_low_degree( EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, EdgePartitionEdgeMaskWrapper edge_partition_e_mask, - thrust::optional> + cuda::std::optional> edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, @@ -284,7 +284,7 @@ __global__ static void transform_reduce_by_src_dst_key_mid_degree( EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, EdgePartitionEdgeMaskWrapper edge_partition_e_mask, - thrust::optional> + cuda::std::optional> edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, @@ -383,7 +383,7 @@ __global__ static void transform_reduce_by_src_dst_key_high_degree( EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, EdgePartitionEdgeMaskWrapper edge_partition_e_mask, - thrust::optional> + cuda::std::optional> edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, @@ -520,21 +520,21 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeSrcValueInputWrapper::value_iterator, typename EdgeSrcValueInputWrapper::value_type>>; using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_endpoint_dummy_property_device_view_t, detail::edge_partition_endpoint_property_device_view_t< vertex_t, typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, + std::is_same_v, detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, @@ -556,10 +556,10 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; rmm::device_uvector tmp_keys(0, handle.get_stream()); std::optional> edge_offsets_with_mask{std::nullopt}; @@ -627,9 +627,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_src_dst_key_input, edge_partition_e_mask, edge_offsets_with_mask - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) - : thrust::nullopt, + : cuda::std::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -650,9 +650,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_src_dst_key_input, edge_partition_e_mask, edge_offsets_with_mask - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) - : thrust::nullopt, + : cuda::std::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -673,9 +673,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_src_dst_key_input, edge_partition_e_mask, edge_offsets_with_mask - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) - : thrust::nullopt, + : cuda::std::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -695,9 +695,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_src_dst_key_input, edge_partition_e_mask, edge_offsets_with_mask - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) - : thrust::nullopt, + : cuda::std::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -719,9 +719,9 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_src_dst_key_input, edge_partition_e_mask, edge_offsets_with_mask - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) - : thrust::nullopt, + : cuda::std::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); diff --git a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh index 87f590f571f..884079d103d 100644 --- a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh +++ b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -52,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -86,7 +86,7 @@ template && !std::is_same_v, thrust::tuple, std::conditional_t, key_t, payload_t>>> @@ -106,7 +106,7 @@ struct transform_reduce_v_frontier_call_e_op_t { thrust::get<1>(*e_op_result)); } } else { - return thrust::nullopt; + return cuda::std::nullopt; } } }; @@ -121,7 +121,7 @@ struct update_keep_flag_t { raft::device_span keep_flags{}; key_t v_range_first{}; InputKeyIterator input_key_first{}; - thrust::optional invalid_input_key{}; + cuda::std::optional invalid_input_key{}; __device__ void operator()(size_t i) const { @@ -1058,10 +1058,10 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; if constexpr (GraphViewType::is_multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); @@ -1110,9 +1110,9 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, * @brief Iterate over outgoing edges from the current vertex frontier and reduce valid edge functor * outputs by (tagged-)destination ID. * - * Edge functor outputs are thrust::optional objects and invalid if thrust::nullopt. Vertices are - * assumed to be tagged if KeyBucketType::key_type is a tuple of a vertex type and a tag - * type (KeyBucketType::key_type is identical to a vertex type otherwise). + * Edge functor outputs are cuda::std::optional objects and invalid if cuda::std::nullopt. Vertices + * are assumed to be tagged if KeyBucketType::key_type is a tuple of a vertex type and a tag type + * (KeyBucketType::key_type is identical to a vertex type otherwise). * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam KeyBucketType Type of the vertex frontier bucket class which abstracts the @@ -1141,10 +1141,10 @@ size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not * access edge property values). * @param e_op Quinary operator takes edge (tagged-)source, edge destination, property values for - * the source, destination, and edge and returns 1) thrust::nullopt (if invalid and to be - * discarded); 2) dummy (but valid) thrust::optional object (e.g. - * thrust::optional{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type is - * void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be + * the source, destination, and edge and returns 1) cuda::std::nullopt (if invalid and to be + * discarded); 2) dummy (but valid) cuda::std::optional object (e.g. + * cuda::std::optional{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type + * is void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be * reduced using the @p reduce_op (if vertices are not tagged and ReduceOp::value_type is not void); * or 5) a tuple of a tag and a value to be reduced (if vertices are tagged and ReduceOp::value_type * is not void). diff --git a/cpp/src/prims/update_v_frontier.cuh b/cpp/src/prims/update_v_frontier.cuh index a9b0a6b823b..0516ebc7d31 100644 --- a/cpp/src/prims/update_v_frontier.cuh +++ b/cpp/src/prims/update_v_frontier.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -157,10 +157,10 @@ struct check_invalid_bucket_idx_t { * graph_view.local_vertex_partition_range_size(). * @param v_op Ternary operator that takes (tagged-)vertex ID, *(@p vertex_value_input_first + i) * (where i is [0, @p graph_view.local_vertex_partition_range_size())) and the payload value for the - * (tagged-)vertex ID and returns a tuple of 1) a thrust::optional object optionally storing a - * bucket index and 2) a thrust::optional object optionally storing a new vertex property value. If - * the first element of the returned tuple is thrust::nullopt, this (tagged-)vertex won't be - * inserted to the vertex frontier. If the second element is thrust::nullopt, the vertex property + * (tagged-)vertex ID and returns a tuple of 1) a cuda::std::optional object optionally storing a + * bucket index and 2) a cuda::std::optional object optionally storing a new vertex property value. + * If the first element of the returned tuple is cuda::std::nullopt, this (tagged-)vertex won't be + * inserted to the vertex frontier. If the second element is cuda::std::nullopt, the vertex property * value for this vertex won't be updated. Note that it is currently undefined behavior if there are * multiple tagged-vertices with the same vertex ID (but with different tags) AND @p v_op results on * the tagged-vertices with the same vertex ID have more than one valid new vertex property values. @@ -286,13 +286,13 @@ void update_v_frontier(raft::handle_t const& handle, * graph_view.local_vertex_partition_range_size(). * @param v_op Binary operator that takes (tagged-)vertex ID, and *(@p vertex_value_input_first + i) * (where i is [0, @p graph_view.local_vertex_partition_range_size())) and returns a tuple of 1) a - * thrust::optional object optionally storing a bucket index and 2) a thrust::optional object + * cuda::std::optional object optionally storing a bucket index and 2) a cuda::std::optional object * optionally storing a new vertex property value. If the first element of the returned tuple is - * thrust::nullopt, this (tagged-)vertex won't be inserted to the vertex frontier. If the second - * element is thrust::nullopt, the vertex property value for this vertex won't be updated. Note that - * it is currently undefined behavior if there are multiple tagged-vertices with the same vertex ID - * (but with different tags) AND @p v_op results on the tagged-vertices with the same vertex ID have - * more than one valid new vertex property values. + * cuda::std::nullopt, this (tagged-)vertex won't be inserted to the vertex frontier. If the second + * element is cuda::std::nullopt, the vertex property value for this vertex won't be updated. Note + * that it is currently undefined behavior if there are multiple tagged-vertices with the same + * vertex ID (but with different tags) AND @p v_op results on the tagged-vertices with the same + * vertex ID have more than one valid new vertex property values. */ template -#include +#include #include namespace cugraph { @@ -40,8 +40,8 @@ struct return_edges_with_properties_e_op { template auto __host__ __device__ operator()(key_t optionally_tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, EdgeProperties edge_properties) const { static_assert(std::is_same_v || @@ -51,43 +51,43 @@ struct return_edges_with_properties_e_op { if constexpr (std::is_same_v) { vertex_t src{optionally_tagged_src}; - if constexpr (std::is_same_v) { - return thrust::make_optional(thrust::make_tuple(src, dst)); + if constexpr (std::is_same_v) { + return cuda::std::make_optional(thrust::make_tuple(src, dst)); } else if constexpr (std::is_arithmetic::value) { - return thrust::make_optional(thrust::make_tuple(src, dst, edge_properties)); + return cuda::std::make_optional(thrust::make_tuple(src, dst, edge_properties)); } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value && (thrust::tuple_size::value == 2)) { - return thrust::make_optional(thrust::make_tuple( + return cuda::std::make_optional(thrust::make_tuple( src, dst, thrust::get<0>(edge_properties), thrust::get<1>(edge_properties))); } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value && (thrust::tuple_size::value == 3)) { - return thrust::make_optional(thrust::make_tuple(src, - dst, - thrust::get<0>(edge_properties), - thrust::get<1>(edge_properties), - thrust::get<2>(edge_properties))); + return cuda::std::make_optional(thrust::make_tuple(src, + dst, + thrust::get<0>(edge_properties), + thrust::get<1>(edge_properties), + thrust::get<2>(edge_properties))); } } else if constexpr (std::is_same_v>) { vertex_t src{thrust::get<0>(optionally_tagged_src)}; int32_t label{thrust::get<1>(optionally_tagged_src)}; src = thrust::get<0>(optionally_tagged_src); - if constexpr (std::is_same_v) { - return thrust::make_optional(thrust::make_tuple(src, dst, label)); + if constexpr (std::is_same_v) { + return cuda::std::make_optional(thrust::make_tuple(src, dst, label)); } else if constexpr (std::is_arithmetic::value) { - return thrust::make_optional(thrust::make_tuple(src, dst, edge_properties, label)); + return cuda::std::make_optional(thrust::make_tuple(src, dst, edge_properties, label)); } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value && (thrust::tuple_size::value == 2)) { - return thrust::make_optional(thrust::make_tuple( + return cuda::std::make_optional(thrust::make_tuple( src, dst, thrust::get<0>(edge_properties), thrust::get<1>(edge_properties), label)); } else if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value && (thrust::tuple_size::value == 3)) { - return thrust::make_optional(thrust::make_tuple(src, - dst, - thrust::get<0>(edge_properties), - thrust::get<1>(edge_properties), - thrust::get<2>(edge_properties), - label)); + return cuda::std::make_optional(thrust::make_tuple(src, + dst, + thrust::get<0>(edge_properties), + thrust::get<1>(edge_properties), + thrust::get<2>(edge_properties), + label)); } } } diff --git a/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh b/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh index 5c04d628f09..2a6136fb96a 100644 --- a/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh +++ b/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,11 +29,12 @@ #include -#include #include #include #include +#include + namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh index f6793c4a157..9ce8edbb9c1 100644 --- a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh +++ b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,10 +20,11 @@ #include #include -#include #include #include +#include + namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu index 89634253ee7..35c2fc5abb1 100644 --- a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu +++ b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v32_e32.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,10 +21,11 @@ #include #include -#include #include #include +#include + namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu index 41cb7413bc4..b627431d53f 100644 --- a/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu +++ b/cpp/src/sampling/detail/remove_visited_vertices_from_frontier_sg_v64_e64.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,10 +21,11 @@ #include #include -#include #include #include +#include + namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/detail/sample_edges.cuh b/cpp/src/sampling/detail/sample_edges.cuh index 0c670c6507e..a4e228522aa 100644 --- a/cpp/src/sampling/detail/sample_edges.cuh +++ b/cpp/src/sampling/detail/sample_edges.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ #include -#include +#include #include #include @@ -41,12 +41,12 @@ struct sample_edges_op_t { template auto __host__ __device__ operator()(vertex_t src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, EdgeProperties edge_properties) const { // FIXME: A solution using thrust_tuple_cat would be more flexible here - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { return thrust::make_tuple(src, dst); } else if constexpr (std::is_arithmetic::value) { return thrust::make_tuple(src, dst, edge_properties); @@ -68,7 +68,7 @@ struct sample_edges_op_t { template struct sample_edge_biases_op_t { auto __host__ __device__ - operator()(vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, bias_t bias) const + operator()(vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, bias_t bias) const { return bias; } diff --git a/cpp/src/sampling/detail/sampling_utils.hpp b/cpp/src/sampling/detail/sampling_utils.hpp index 17eb8dd0873..71387eb0e63 100644 --- a/cpp/src/sampling/detail/sampling_utils.hpp +++ b/cpp/src/sampling/detail/sampling_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ #include -#include +#include namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh b/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh index 391dd99b1df..ce4888e3359 100644 --- a/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh +++ b/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,10 +32,11 @@ #include -#include #include #include +#include + namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/negative_sampling_impl.cuh b/cpp/src/sampling/negative_sampling_impl.cuh index 541eda67860..9aedc5dfc35 100644 --- a/cpp/src/sampling/negative_sampling_impl.cuh +++ b/cpp/src/sampling/negative_sampling_impl.cuh @@ -16,8 +16,11 @@ #pragma once +#include "cugraph/detail/collect_comm_wrapper.hpp" +#include "cugraph/utilities/device_comm.hpp" #include "prims/reduce_v.cuh" #include "prims/update_edge_src_dst_property.cuh" +#include "thrust/iterator/zip_iterator.h" #include "utilities/collect_comm.cuh" #include @@ -26,6 +29,10 @@ #include #include +#include +#include +#include + #include #include @@ -37,6 +44,8 @@ #include #include +#include + namespace cugraph { namespace detail { @@ -265,11 +274,19 @@ std::tuple, rmm::device_uvector> negativ bool exact_number_of_samples, bool do_expensive_check) { - rmm::device_uvector src(0, handle.get_stream()); - rmm::device_uvector dst(0, handle.get_stream()); + rmm::device_uvector srcs(0, handle.get_stream()); + rmm::device_uvector dsts(0, handle.get_stream()); // Optimistically assume we can do this in one pass - size_t samples_in_this_batch = num_samples; + size_t total_samples{num_samples}; + std::vector samples_per_gpu; + + if constexpr (multi_gpu) { + samples_per_gpu = host_scalar_allgather(handle.get_comms(), num_samples, handle.get_stream()); + total_samples = std::reduce(samples_per_gpu.begin(), samples_per_gpu.end()); + } + + size_t samples_in_this_batch = total_samples; // Normalize the biases and (for MG) determine how the biases are // distributed across the GPUs. @@ -298,16 +315,16 @@ std::tuple, rmm::device_uvector> negativ : 0); } - auto batch_src = create_local_samples( + auto batch_srcs = create_local_samples( handle, rng_state, graph_view, normalized_src_biases, gpu_src_biases, samples_in_this_batch); - auto batch_dst = create_local_samples( + auto batch_dsts = create_local_samples( handle, rng_state, graph_view, normalized_dst_biases, gpu_dst_biases, samples_in_this_batch); if constexpr (multi_gpu) { auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); - std::tie(batch_src, - batch_dst, + std::tie(batch_srcs, + batch_dsts, std::ignore, std::ignore, std::ignore, @@ -320,8 +337,8 @@ std::tuple, rmm::device_uvector> negativ int32_t, int32_t>( handle, - std::move(batch_src), - std::move(batch_dst), + std::move(batch_srcs), + std::move(batch_dsts), std::nullopt, std::nullopt, std::nullopt, @@ -333,42 +350,43 @@ std::tuple, rmm::device_uvector> negativ if (remove_existing_edges) { auto has_edge_flags = graph_view.has_edge(handle, - raft::device_span{batch_src.data(), batch_src.size()}, - raft::device_span{batch_dst.data(), batch_dst.size()}, + raft::device_span{batch_srcs.data(), batch_srcs.size()}, + raft::device_span{batch_dsts.data(), batch_dsts.size()}, do_expensive_check); - auto begin_iter = thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()); + auto begin_iter = thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()); auto new_end = thrust::remove_if(handle.get_thrust_policy(), begin_iter, - begin_iter + batch_src.size(), + begin_iter + batch_srcs.size(), has_edge_flags.begin(), thrust::identity()); - batch_src.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); - batch_dst.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); + batch_srcs.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); + batch_dsts.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); } if (remove_duplicates) { thrust::sort(handle.get_thrust_policy(), - thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), - thrust::make_zip_iterator(batch_src.end(), batch_dst.end())); + thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), + thrust::make_zip_iterator(batch_srcs.end(), batch_dsts.end())); - auto new_end = thrust::unique(handle.get_thrust_policy(), - thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), - thrust::make_zip_iterator(batch_src.end(), batch_dst.end())); + auto new_end = + thrust::unique(handle.get_thrust_policy(), + thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), + thrust::make_zip_iterator(batch_srcs.end(), batch_dsts.end())); - size_t new_size = - thrust::distance(thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), new_end); + size_t new_size = thrust::distance( + thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), new_end); - if (src.size() > 0) { - rmm::device_uvector new_src(src.size() + new_size, handle.get_stream()); - rmm::device_uvector new_dst(dst.size() + new_size, handle.get_stream()); + if (srcs.size() > 0) { + rmm::device_uvector new_src(srcs.size() + new_size, handle.get_stream()); + rmm::device_uvector new_dst(dsts.size() + new_size, handle.get_stream()); thrust::merge(handle.get_thrust_policy(), - thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), + thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), new_end, - thrust::make_zip_iterator(src.begin(), dst.begin()), - thrust::make_zip_iterator(src.end(), dst.end()), + thrust::make_zip_iterator(srcs.begin(), dsts.begin()), + thrust::make_zip_iterator(srcs.end(), dsts.end()), thrust::make_zip_iterator(new_src.begin(), new_dst.begin())); new_end = thrust::unique(handle.get_thrust_policy(), @@ -378,32 +396,32 @@ std::tuple, rmm::device_uvector> negativ new_size = thrust::distance(thrust::make_zip_iterator(new_src.begin(), new_dst.begin()), new_end); - src = std::move(new_src); - dst = std::move(new_dst); + srcs = std::move(new_src); + dsts = std::move(new_dst); } else { - src = std::move(batch_src); - dst = std::move(batch_dst); + srcs = std::move(batch_srcs); + dsts = std::move(batch_dsts); } - src.resize(new_size, handle.get_stream()); - dst.resize(new_size, handle.get_stream()); - } else if (src.size() > 0) { - size_t current_end = src.size(); + srcs.resize(new_size, handle.get_stream()); + dsts.resize(new_size, handle.get_stream()); + } else if (srcs.size() > 0) { + size_t current_end = srcs.size(); - src.resize(src.size() + batch_src.size(), handle.get_stream()); - dst.resize(dst.size() + batch_dst.size(), handle.get_stream()); + srcs.resize(srcs.size() + batch_srcs.size(), handle.get_stream()); + dsts.resize(dsts.size() + batch_dsts.size(), handle.get_stream()); thrust::copy(handle.get_thrust_policy(), - thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), - thrust::make_zip_iterator(batch_src.end(), batch_dst.end()), - thrust::make_zip_iterator(src.begin(), dst.begin()) + current_end); + thrust::make_zip_iterator(batch_srcs.begin(), batch_dsts.begin()), + thrust::make_zip_iterator(batch_srcs.end(), batch_dsts.end()), + thrust::make_zip_iterator(srcs.begin(), dsts.begin()) + current_end); } else { - src = std::move(batch_src); - dst = std::move(batch_dst); + srcs = std::move(batch_srcs); + dsts = std::move(batch_dsts); } if (exact_number_of_samples) { - size_t current_sample_size = src.size(); + size_t current_sample_size = srcs.size(); if constexpr (multi_gpu) { current_sample_size = cugraph::host_scalar_allreduce( handle.get_comms(), current_sample_size, raft::comms::op_t::SUM, handle.get_stream()); @@ -412,16 +430,142 @@ std::tuple, rmm::device_uvector> negativ // FIXME: We could oversample and discard the unnecessary samples // to reduce the number of iterations in the outer loop, but it seems like // exact_number_of_samples is an edge case not worth optimizing for at this time. - samples_in_this_batch = num_samples - current_sample_size; + samples_in_this_batch = total_samples - current_sample_size; } else { samples_in_this_batch = 0; } } - src.shrink_to_fit(handle.get_stream()); - dst.shrink_to_fit(handle.get_stream()); + srcs.shrink_to_fit(handle.get_stream()); + dsts.shrink_to_fit(handle.get_stream()); + + if constexpr (multi_gpu) { + auto const& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + // Randomly shuffle the samples so that each gpu gets their + // desired number of samples + + if (!exact_number_of_samples) { + // If we didn't force generating the exact number of samples, + // we might have fewer samples than requested. We need to + // accommodate this situation. For now we'll just + // uniformly(-ish) reduce the requested size. + size_t total_extracted = host_scalar_allreduce( + handle.get_comms(), srcs.size(), raft::comms::op_t::SUM, handle.get_stream()); + size_t reduction = total_samples - total_extracted; + + while (reduction > 0) { + size_t est_reduction_per_gpu = (reduction + comm_size - 1) / comm_size; + for (size_t i = 0; i < samples_per_gpu.size(); ++i) { + if (samples_per_gpu[i] > est_reduction_per_gpu) { + samples_per_gpu[i] -= est_reduction_per_gpu; + reduction -= est_reduction_per_gpu; + } else { + reduction -= samples_per_gpu[i]; + samples_per_gpu[i] = 0; + } + + if (reduction < est_reduction_per_gpu) est_reduction_per_gpu = reduction; + } + } + num_samples = samples_per_gpu[comm_rank]; + } + + // Mimic the logic of permute_range... + // + // 1) Randomly assign each entry to a GPU + // 2) Count how many are assigned to each GPU + // 3) Allgatherv (allgather?) to give each GPU a count for how many entries are destined for + // that GPU 4) Identify extras/deficits for each GPU, arbitrarily adjust counts to make correct + // 5) Shuffle accordingly + // + rmm::device_uvector gpu_assignment(srcs.size(), handle.get_stream()); + + cugraph::detail::uniform_random_fill(handle.get_stream(), + gpu_assignment.data(), + gpu_assignment.size(), + int{0}, + int{comm_size}, + rng_state); + + thrust::sort_by_key(handle.get_thrust_policy(), + gpu_assignment.begin(), + gpu_assignment.end(), + thrust::make_zip_iterator(srcs.begin(), dsts.begin())); + + rmm::device_uvector d_send_counts(comm_size, handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + d_send_counts.begin(), + d_send_counts.end(), + [gpu_assignment_span = raft::device_span{ + gpu_assignment.data(), gpu_assignment.size()}] __device__(size_t i) { + auto begin = thrust::lower_bound( + thrust::seq, gpu_assignment_span.begin(), gpu_assignment_span.end(), static_cast(i)); + auto end = + thrust::upper_bound(thrust::seq, begin, gpu_assignment_span.end(), static_cast(i)); + return thrust::distance(begin, end); + }); + + std::vector tx_value_counts(comm_size, 0); + raft::update_host( + tx_value_counts.data(), d_send_counts.data(), d_send_counts.size(), handle.get_stream()); + + std::forward_as_tuple(std::tie(srcs, dsts), std::ignore) = + cugraph::shuffle_values(handle.get_comms(), + thrust::make_zip_iterator(srcs.begin(), dsts.begin()), + tx_value_counts, + handle.get_stream()); + + rmm::device_uvector fractional_random_numbers(srcs.size(), handle.get_stream()); + + cugraph::detail::uniform_random_fill(handle.get_stream(), + fractional_random_numbers.data(), + fractional_random_numbers.size(), + float{0.0}, + float{1.0}, + rng_state); + thrust::sort_by_key(handle.get_thrust_policy(), + fractional_random_numbers.begin(), + fractional_random_numbers.end(), + thrust::make_zip_iterator(srcs.begin(), dsts.begin())); + + size_t nr_extras{0}; + size_t nr_deficits{0}; + if (srcs.size() > num_samples) { + nr_extras = srcs.size() - static_cast(num_samples); + } else { + nr_deficits = static_cast(num_samples) - srcs.size(); + } + + auto extra_srcs = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(srcs.data() + num_samples, nr_extras)); + // nr_extras > 0 ? nr_extras : 0)); + auto extra_dsts = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(dsts.data() + num_samples, nr_extras)); + // nr_extras > 0 ? nr_extras : 0)); + + srcs.resize(num_samples, handle.get_stream()); + dsts.resize(num_samples, handle.get_stream()); + auto deficits = + cugraph::host_scalar_allgather(handle.get_comms(), nr_deficits, handle.get_stream()); + + std::exclusive_scan(deficits.begin(), deficits.end(), deficits.begin(), vertex_t{0}); + + raft::copy(srcs.data() + num_samples - nr_deficits, + extra_srcs.begin() + deficits[comm_rank], + nr_deficits, + handle.get_stream()); + + raft::copy(dsts.data() + num_samples - nr_deficits, + extra_dsts.begin() + deficits[comm_rank], + nr_deficits, + handle.get_stream()); + } - return std::make_tuple(std::move(src), std::move(dst)); + return std::make_tuple(std::move(srcs), std::move(dsts)); } } // namespace cugraph diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index bbc0fbc17af..b759e479bc6 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -31,6 +31,7 @@ #include +#include #include namespace cugraph { @@ -123,9 +124,9 @@ neighbor_sample_impl(raft::handle_t const& handle, *edge_type_view, [valid_edge_type = i] __device__(auto src, auto dst, - thrust::nullopt_t, - thrust::nullopt_t, - /*thrust::nullopt_t*/ auto edge_type) { + cuda::std::nullopt_t, + cuda::std::nullopt_t, + /*cuda::std::nullopt_t*/ auto edge_type) { return edge_type == valid_edge_type; }, edge_mask.mutable_view(), diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 0b1d9dcdb56..440c0c7a6ec 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -43,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -216,8 +216,8 @@ struct col_indx_extract_t { ptr_d_coalesced_v = original::raw_const_ptr(d_coalesced_src_v), row_offsets = row_offsets_, col_indices = col_indices_, - values = values_ ? thrust::optional{*values_} - : thrust::nullopt] __device__(auto indx, auto col_indx) { + values = values_ ? cuda::std::optional{*values_} + : cuda::std::nullopt] __device__(auto indx, auto col_indx) { auto delta = ptr_d_sizes[indx] - 1; auto v_indx = ptr_d_coalesced_v[indx * max_depth + delta]; auto start_row = row_offsets[v_indx]; diff --git a/cpp/src/sampling/random_walks_impl.cuh b/cpp/src/sampling/random_walks_impl.cuh index 6c10fc473f3..fbf0836dac5 100644 --- a/cpp/src/sampling/random_walks_impl.cuh +++ b/cpp/src/sampling/random_walks_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ #include -#include +#include #include #include @@ -54,14 +54,14 @@ template struct sample_edges_op_t { template __device__ std::enable_if_t, vertex_t> operator()( - vertex_t, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + vertex_t, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, cuda::std::nullopt_t) const { return dst; } template __device__ std::enable_if_t, thrust::tuple> operator()( - vertex_t, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, W w) const + vertex_t, vertex_t dst, cuda::std::nullopt_t, cuda::std::nullopt_t, W w) const { return thrust::make_tuple(dst, w); } @@ -69,8 +69,8 @@ struct sample_edges_op_t { template struct biased_random_walk_e_bias_op_t { - __device__ bias_t - operator()(vertex_t, vertex_t, bias_t src_out_weight_sum, thrust::nullopt_t, bias_t weight) const + __device__ bias_t operator()( + vertex_t, vertex_t, bias_t src_out_weight_sum, cuda::std::nullopt_t, bias_t weight) const { return weight / src_out_weight_sum; } @@ -79,7 +79,7 @@ struct biased_random_walk_e_bias_op_t { template struct biased_sample_edges_op_t { __device__ thrust::tuple operator()( - vertex_t, vertex_t dst, weight_t, thrust::nullopt_t, weight_t weight) const + vertex_t, vertex_t dst, weight_t, cuda::std::nullopt_t, weight_t weight) const { return thrust::make_tuple(dst, weight); } @@ -99,9 +99,9 @@ struct node2vec_random_walk_e_bias_op_t { __device__ std::enable_if_t, bias_t> operator()( thrust::tuple tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) const + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { // Check tag (prev vert) for destination if (dst == thrust::get<1>(tagged_src)) { return 1.0 / p_; } @@ -126,8 +126,8 @@ struct node2vec_random_walk_e_bias_op_t { __device__ std::enable_if_t, bias_t> operator()( thrust::tuple tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, W) const { // Check tag (prev vert) for destination @@ -155,9 +155,9 @@ struct node2vec_sample_edges_op_t { __device__ std::enable_if_t, vertex_t> operator()( thrust::tuple tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) const + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { return dst; } @@ -166,8 +166,8 @@ struct node2vec_sample_edges_op_t { __device__ std::enable_if_t, thrust::tuple> operator()( thrust::tuple tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, W w) const { return thrust::make_tuple(dst, w); diff --git a/cpp/src/sampling/rw_traversals.hpp b/cpp/src/sampling/rw_traversals.hpp index 2c5658b32a5..d371b4141e0 100644 --- a/cpp/src/sampling/rw_traversals.hpp +++ b/cpp/src/sampling/rw_traversals.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,12 +27,12 @@ #include #include +#include #include #include #include #include #include -#include #include #include @@ -138,7 +138,7 @@ struct uniform_selector_t { { } - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( vertex_t src_v, real_t rnd_val, vertex_t = 0 /* not used*/, @@ -146,7 +146,7 @@ struct uniform_selector_t { bool = false /* not used*/) const { auto crt_out_deg = ptr_d_cache_out_degs_[src_v]; - if (crt_out_deg == 0) return thrust::nullopt; // src_v is a sink + if (crt_out_deg == 0) return cuda::std::nullopt; // src_v is a sink vertex_t v_indx = static_cast(rnd_val >= 1.0 ? crt_out_deg - 1 : rnd_val * crt_out_deg); @@ -156,7 +156,8 @@ struct uniform_selector_t { auto weight_value = (values_ == nullptr ? weight_t{1} : values_[start_row + col_indx]); // account for un-weighted graphs - return thrust::optional{thrust::make_tuple(col_indices_[start_row + col_indx], weight_value)}; + return cuda::std::optional{ + thrust::make_tuple(col_indices_[start_row + col_indx], weight_value)}; } private: @@ -211,7 +212,7 @@ struct biased_selector_t { // Sum(weights(neighborhood(src_v))) are pre-computed and // stored in ptr_d_sum_weights_ (too expensive to check, here); // - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( vertex_t src_v, real_t rnd_val, vertex_t = 0 /* not used*/, @@ -223,7 +224,7 @@ struct biased_selector_t { auto col_indx_begin = row_offsets_[src_v]; auto col_indx_end = row_offsets_[src_v + 1]; - if (col_indx_begin == col_indx_end) return thrust::nullopt; // src_v is a sink + if (col_indx_begin == col_indx_end) return cuda::std::nullopt; // src_v is a sink auto col_indx = col_indx_begin; auto prev_col_indx = col_indx; @@ -234,7 +235,7 @@ struct biased_selector_t { run_sum_w += values_[col_indx]; prev_col_indx = col_indx; } - return thrust::optional{ + return cuda::std::optional{ thrust::make_tuple(col_indices_[prev_col_indx], values_[prev_col_indx])}; } @@ -293,9 +294,9 @@ struct node2vec_selector_t { q_(q), coalesced_alpha_{ (max_degree > 0) && (num_paths > 0) && (ptr_alpha != nullptr) - ? thrust::optional>{thrust::make_tuple( + ? cuda::std::optional>{thrust::make_tuple( max_degree, num_paths, ptr_alpha)} - : thrust::nullopt} + : cuda::std::nullopt} { } @@ -324,7 +325,7 @@ struct node2vec_selector_t { } } - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( vertex_t src_v, real_t rnd_val, vertex_t prev_v, edge_t path_index, bool start_path) const { auto const offset_indx_begin = row_offsets_[src_v]; @@ -333,7 +334,7 @@ struct node2vec_selector_t { weight_t sum_scaled_weights{0}; auto offset_indx = offset_indx_begin; - if (offset_indx_begin == offset_indx_end) return thrust::nullopt; // src_v is a sink + if (offset_indx_begin == offset_indx_end) return cuda::std::nullopt; // src_v is a sink // for 1st vertex in path just use biased random selection: // @@ -359,7 +360,7 @@ struct node2vec_selector_t { run_sum_w += crt_weight; prev_offset_indx = offset_indx; } - return thrust::optional{ + return cuda::std::optional{ thrust::make_tuple(col_indices_[prev_offset_indx], values_ == nullptr ? weight_t{1} : values_[prev_offset_indx])}; } @@ -402,7 +403,7 @@ struct node2vec_selector_t { run_sum_w += ptr_d_scaled_weights[start_alpha_offset + nghbr_indx]; prev_offset_indx = offset_indx; } - return thrust::optional{ + return cuda::std::optional{ thrust::make_tuple(col_indices_[prev_offset_indx], values_ == nullptr ? weight_t{1} : values_[prev_offset_indx])}; @@ -435,7 +436,7 @@ struct node2vec_selector_t { run_sum_w += scaled_weight; prev_offset_indx = offset_indx; } - return thrust::optional{ + return cuda::std::optional{ thrust::make_tuple(col_indices_[prev_offset_indx], values_ == nullptr ? weight_t{1} : values_[prev_offset_indx])}; } @@ -459,7 +460,7 @@ struct node2vec_selector_t { // this is information related to a scratchpad buffer, used as cache, hence mutable; // (necessary, because get_strategy() is const) // - mutable thrust::optional> + mutable cuda::std::optional> coalesced_alpha_; // tuple }; diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh index 151350dad6d..ef1a31400f7 100644 --- a/cpp/src/sampling/sampling_post_processing_impl.cuh +++ b/cpp/src/sampling/sampling_post_processing_impl.cuh @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -51,9 +52,9 @@ namespace { template struct edge_order_t { - thrust::optional> edgelist_label_offsets{thrust::nullopt}; - thrust::optional> edgelist_edge_types{thrust::nullopt}; - thrust::optional> edgelist_hops{thrust::nullopt}; + cuda::std::optional> edgelist_label_offsets{cuda::std::nullopt}; + cuda::std::optional> edgelist_edge_types{cuda::std::nullopt}; + cuda::std::optional> edgelist_hops{cuda::std::nullopt}; raft::device_span edgelist_majors{}; raft::device_span edgelist_minors{}; @@ -99,8 +100,8 @@ struct edge_order_t { template struct is_first_triplet_in_run_t { - thrust::optional> edgelist_label_offsets{thrust::nullopt}; - thrust::optional> edgelist_hops{thrust::nullopt}; + cuda::std::optional> edgelist_label_offsets{cuda::std::nullopt}; + cuda::std::optional> edgelist_hops{cuda::std::nullopt}; raft::device_span edgelist_majors{}; __device__ bool operator()(size_t i) const @@ -142,7 +143,7 @@ struct compute_label_index_t { template struct optionally_compute_label_index_t { - thrust::optional> edgelist_label_offsets{thrust::nullopt}; + cuda::std::optional> edgelist_label_offsets{cuda::std::nullopt}; __device__ label_index_t operator()(size_t i) const { @@ -2370,9 +2371,9 @@ heterogeneous_renumber_sampled_edgelist( cuda::proclaim_return_type( [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), edge_types = edgelist_edge_types - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edgelist_edge_types).data(), (*edgelist_edge_types).size()) - : thrust::nullopt, + : cuda::std::nullopt, renumber_map = raft::device_span(segment_sorted_edge_id_renumber_map.data(), segment_sorted_edge_id_renumber_map.size()), @@ -2499,17 +2500,17 @@ sort_sampled_edge_tuples(raft::handle_t const& handle, handle.get_stream()); thrust::sequence(handle.get_thrust_policy(), indices.begin(), indices.end(), size_t{0}); edge_order_t edge_order_comp{ - edgelist_label_offsets ? thrust::make_optional>( + edgelist_label_offsets ? cuda::std::make_optional>( (*edgelist_label_offsets).data() + h_label_offsets[i], (h_label_offsets[i + 1] - h_label_offsets[i]) + 1) - : thrust::nullopt, + : cuda::std::nullopt, edgelist_edge_types && use_edge_type_as_sort_key - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edgelist_edge_types).data() + h_edge_offsets[i], indices.size()) - : thrust::nullopt, - edgelist_hops ? thrust::make_optional>( + : cuda::std::nullopt, + edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data() + h_edge_offsets[i], indices.size()) - : thrust::nullopt, + : cuda::std::nullopt, raft::device_span(edgelist_majors.data() + h_edge_offsets[i], indices.size()), raft::device_span(edgelist_minors.data() + h_edge_offsets[i], indices.size())}; @@ -2686,8 +2687,8 @@ renumber_and_compress_sampled_edgelist( auto label_index_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), optionally_compute_label_index_t{ - edgelist_label_offsets ? thrust::make_optional(*edgelist_label_offsets) - : thrust::nullopt}); + edgelist_label_offsets ? cuda::std::make_optional(*edgelist_label_offsets) + : cuda::std::nullopt}); auto input_key_first = thrust::make_zip_iterator(label_index_first, (*edgelist_hops).begin()); rmm::device_uvector unique_key_label_indices(min_vertices.size(), handle.get_stream()); @@ -2781,9 +2782,9 @@ renumber_and_compress_sampled_edgelist( thrust::make_counting_iterator(edgelist_majors.size()), is_first_triplet_in_run_t{ detail::to_thrust_optional(edgelist_label_offsets), - edgelist_hops ? thrust::make_optional>( + edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) - : thrust::nullopt, + : cuda::std::nullopt, raft::device_span( edgelist_majors.data(), edgelist_majors.size())}); // number of unique ((label), (hop), major) triplets @@ -2910,15 +2911,15 @@ renumber_and_compress_sampled_edgelist( major_vertex_counts.begin(), major_vertex_counts.end(), [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), - edgelist_hops = edgelist_hops ? thrust::make_optional>( + edgelist_hops = edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) - : thrust::nullopt, + : cuda::std::nullopt, edgelist_majors = raft::device_span(edgelist_majors.data(), edgelist_majors.size()), seed_vertices = renumbered_seed_vertices - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*renumbered_seed_vertices).data(), (*renumbered_seed_vertices).size()) - : thrust::nullopt, + : cuda::std::nullopt, seed_vertex_label_offsets = detail::to_thrust_optional(seed_vertex_label_offsets), num_hops, compress_per_hop] __device__(size_t i) { @@ -3045,9 +3046,9 @@ renumber_and_compress_sampled_edgelist( [major_vertex_counts = raft::device_span(major_vertex_counts.data(), major_vertex_counts.size()), minor_vertex_counts = minor_vertex_counts - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*minor_vertex_counts).data(), (*minor_vertex_counts).size()) - : thrust::nullopt, + : cuda::std::nullopt, num_hops, compress_per_hop] __device__(size_t i) { auto vertex_count = major_vertex_counts[i]; @@ -3310,9 +3311,9 @@ renumber_and_sort_sampled_edgelist( (*edgelist_label_hop_offsets).begin(), cuda::proclaim_return_type( [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), - edgelist_hops = edgelist_hops ? thrust::make_optional>( + edgelist_hops = edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) - : thrust::nullopt, + : cuda::std::nullopt, num_hops, num_edges = edgelist_majors.size()] __device__(size_t i) { size_t start_offset{0}; @@ -3500,12 +3501,12 @@ heterogeneous_renumber_and_sort_sampled_edgelist( cuda::proclaim_return_type( [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), edgelist_edge_types = edgelist_edge_types - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*edgelist_edge_types).data(), (*edgelist_edge_types).size()) - : thrust::nullopt, - edgelist_hops = edgelist_hops ? thrust::make_optional>( + : cuda::std::nullopt, + edgelist_hops = edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) - : thrust::nullopt, + : cuda::std::nullopt, num_edge_types, num_hops, num_edges = edgelist_majors.size()] __device__(size_t i) { @@ -3653,9 +3654,9 @@ sort_sampled_edgelist(raft::handle_t const& handle, (*edgelist_label_hop_offsets).begin(), cuda::proclaim_return_type( [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), - edgelist_hops = edgelist_hops ? thrust::make_optional>( + edgelist_hops = edgelist_hops ? cuda::std::make_optional>( (*edgelist_hops).data(), (*edgelist_hops).size()) - : thrust::nullopt, + : cuda::std::nullopt, num_hops, num_edges = edgelist_majors.size()] __device__(size_t i) { size_t start_offset{0}; diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 31de9b1e5d3..f526a6788e1 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -47,7 +48,6 @@ #include #include #include -#include #include #include #include @@ -126,8 +126,8 @@ rmm::device_uvector compute_major_degrees( partition.vertex_partition_range_first(major_range_vertex_partition_id); auto offsets = edge_partition_offsets[i]; - auto masks = - edge_partition_masks ? thrust::make_optional((*edge_partition_masks)[i]) : thrust::nullopt; + auto masks = edge_partition_masks ? cuda::std::make_optional((*edge_partition_masks)[i]) + : cuda::std::nullopt; auto segment_offset_size_per_partition = edge_partition_segment_offsets.size() / static_cast(minor_comm_size); auto num_local_degrees = @@ -202,7 +202,8 @@ rmm::device_uvector compute_major_degrees( handle.get_thrust_policy(), degrees.begin(), degrees.end(), - [offsets, masks = masks ? thrust::make_optional(*masks) : thrust::nullopt] __device__(auto i) { + [offsets, + masks = masks ? cuda::std::make_optional(*masks) : cuda::std::nullopt] __device__(auto i) { auto local_degree = offsets[i + 1] - offsets[i]; if (masks) { local_degree = @@ -842,10 +843,10 @@ graph_view_t(this->local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; thrust::transform(handle.get_thrust_policy(), sorted_edge_first + edge_partition_offsets[i], sorted_edge_first + edge_partition_offsets[i + 1], @@ -913,10 +914,10 @@ graph_view_t(this->local_edge_partition_view()); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, 0) - : thrust::nullopt; + : cuda::std::nullopt; thrust::transform( handle.get_thrust_policy(), edge_first, @@ -987,10 +988,10 @@ graph_view_t(this->local_edge_partition_view(i)); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, i) - : thrust::nullopt; + : cuda::std::nullopt; thrust::transform( handle.get_thrust_policy(), sorted_edge_first + edge_partition_offsets[i], @@ -1058,10 +1059,10 @@ graph_view_t(this->local_edge_partition_view()); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, 0) - : thrust::nullopt; + : cuda::std::nullopt; thrust::transform( handle.get_thrust_policy(), edge_first, diff --git a/cpp/src/structure/induced_subgraph_impl.cuh b/cpp/src/structure/induced_subgraph_impl.cuh index 3822055b037..e319dc03bbb 100644 --- a/cpp/src/structure/induced_subgraph_impl.cuh +++ b/cpp/src/structure/induced_subgraph_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,6 +34,7 @@ #include +#include #include #include #include @@ -44,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -58,7 +58,7 @@ namespace detail { template struct induced_subgraph_weighted_edge_op { - using return_type = thrust::optional>; + using return_type = cuda::std::optional>; raft::device_span dst_subgraph_offsets; raft::device_span dst_subgraph_vertices; @@ -74,15 +74,15 @@ struct induced_subgraph_weighted_edge_op { dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph], dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph + 1], dst) - ? thrust::make_optional( + ? cuda::std::make_optional( thrust::make_tuple(thrust::get<0>(tagged_src), dst, wgt, subgraph)) - : thrust::nullopt; + : cuda::std::nullopt; } }; template struct induced_subgraph_unweighted_edge_op { - using return_type = thrust::optional>; + using return_type = cuda::std::optional>; raft::device_span dst_subgraph_offsets; raft::device_span dst_subgraph_vertices; @@ -91,15 +91,16 @@ struct induced_subgraph_unweighted_edge_op { vertex_t dst, property_t sv, property_t dv, - thrust::nullopt_t) const + cuda::std::nullopt_t) const { size_t subgraph = thrust::get<1>(tagged_src); return thrust::binary_search(thrust::seq, dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph], dst_subgraph_vertices.data() + dst_subgraph_offsets[subgraph + 1], dst) - ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), dst, subgraph)) - : thrust::nullopt; + ? cuda::std::make_optional( + thrust::make_tuple(thrust::get<0>(tagged_src), dst, subgraph)) + : cuda::std::nullopt; } }; @@ -203,8 +204,8 @@ extract_induced_subgraphs( dst_subgraph_vertices = raft::device_span(dst_subgraph_vertices_v.data(), dst_subgraph_vertices_v.size()); - // 3. Call extract_transform_v_frontier_outgoing_e with a functor that returns thrust::nullopt if - // the destination vertex has a property of 0, return the edge if the destination vertex has a + // 3. Call extract_transform_v_frontier_outgoing_e with a functor that returns cuda::std::nullopt + // if the destination vertex has a property of 0, return the edge if the destination vertex has a // property of 1 vertex_frontier_t vertex_frontier(handle, 1); @@ -233,7 +234,7 @@ extract_induced_subgraphs( edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), *edge_weight_view, - detail::induced_subgraph_weighted_edge_op{ + detail::induced_subgraph_weighted_edge_op{ dst_subgraph_offsets, dst_subgraph_vertices}, do_expensive_check); @@ -253,7 +254,7 @@ extract_induced_subgraphs( edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), edge_dummy_property_t{}.view(), - detail::induced_subgraph_unweighted_edge_op{ + detail::induced_subgraph_unweighted_edge_op{ dst_subgraph_offsets, dst_subgraph_vertices}, do_expensive_check); diff --git a/cpp/src/traversal/bfs_impl.cuh b/cpp/src/traversal/bfs_impl.cuh index ba40db1f085..2c2674f5bbe 100644 --- a/cpp/src/traversal/bfs_impl.cuh +++ b/cpp/src/traversal/bfs_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,6 +31,7 @@ #include +#include #include #include #include @@ -40,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -78,20 +78,26 @@ struct topdown_e_op_t { detail::edge_partition_endpoint_property_device_view_t visited_flags{}; vertex_t dst_first{}; - __device__ thrust::optional operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + __device__ cuda::std::optional operator()(vertex_t src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { auto dst_offset = dst - dst_first; auto old = prev_visited_flags.get(dst_offset); if (!old) { old = visited_flags.atomic_or(dst_offset, true); } - return old ? thrust::nullopt : thrust::optional{src}; + return old ? cuda::std::nullopt : cuda::std::optional{src}; } }; template struct bottomup_e_op_t { - __device__ vertex_t operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + __device__ vertex_t operator()(vertex_t src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { return dst; } @@ -103,8 +109,11 @@ struct bottomup_pred_op_t { prev_visited_flags{}; // visited in the previous iterations vertex_t dst_first{}; - __device__ bool operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + __device__ bool operator()(vertex_t src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { return prev_visited_flags.get(dst - dst_first); } @@ -260,10 +269,10 @@ void bfs(raft::handle_t const& handle, auto edge_mask_view = graph_view.edge_mask_view(); auto edge_partition_e_mask = edge_mask_view - ? thrust::make_optional< + ? cuda::std::make_optional< detail::edge_partition_edge_property_device_view_t>( *edge_mask_view, partition_idx) - : thrust::nullopt; + : cuda::std::nullopt; auto high_and_mid_degree_segment_size = (*segment_offsets)[2]; // compute local degrees for high & mid degree segments only, for // low & hypersparse segments, use low_degree_threshold * diff --git a/cpp/src/traversal/extract_bfs_paths_impl.cuh b/cpp/src/traversal/extract_bfs_paths_impl.cuh index d228460bec3..d0a7979d14b 100644 --- a/cpp/src/traversal/extract_bfs_paths_impl.cuh +++ b/cpp/src/traversal/extract_bfs_paths_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ #include +#include #include #include #include @@ -52,7 +53,7 @@ template struct compute_max { vertex_t __device__ operator()(vertex_t lhs, vertex_t rhs) { - return thrust::max(lhs, rhs); + return cuda::std::max(lhs, rhs); } }; diff --git a/cpp/src/traversal/k_hop_nbrs_impl.cuh b/cpp/src/traversal/k_hop_nbrs_impl.cuh index 44fa21a5252..be462720e51 100644 --- a/cpp/src/traversal/k_hop_nbrs_impl.cuh +++ b/cpp/src/traversal/k_hop_nbrs_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,13 +30,13 @@ #include +#include #include #include #include #include #include #include -#include #include #include @@ -48,11 +48,11 @@ namespace { template struct e_op_t { - __device__ thrust::optional operator()(thrust::tuple tagged_src, - vertex_t, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) const + __device__ cuda::std::optional operator()(thrust::tuple tagged_src, + vertex_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { return thrust::get<1>(tagged_src); } diff --git a/cpp/src/traversal/od_shortest_distances_impl.cuh b/cpp/src/traversal/od_shortest_distances_impl.cuh index b3cd0d57c67..ffe706ca45d 100644 --- a/cpp/src/traversal/od_shortest_distances_impl.cuh +++ b/cpp/src/traversal/od_shortest_distances_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,12 +37,12 @@ #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -133,11 +133,11 @@ struct e_op_t { weight_t cutoff{}; weight_t invalid_distance{}; - __device__ thrust::optional> operator()( + __device__ cuda::std::optional> operator()( thrust::tuple tagged_src, vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, weight_t w) const { aggregate_vi_t aggregator{num_origins}; @@ -150,9 +150,9 @@ struct e_op_t { auto dst_val = key_to_dist_map.find(aggregator(thrust::make_tuple(dst, origin_idx))); if (dst_val != invalid_distance) { threshold = dst_val < threshold ? dst_val : threshold; } return (new_distance < threshold) - ? thrust::optional>{thrust::make_tuple(origin_idx, - new_distance)} - : thrust::nullopt; + ? cuda::std::optional>{thrust::make_tuple(origin_idx, + new_distance)} + : cuda::std::nullopt; } }; @@ -644,8 +644,8 @@ rmm::device_uvector od_shortest_distances( thrust::tuple, weight_t, vertex_t, - thrust::nullopt_t, - thrust::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t, weight_t, e_op_t> e_op_wrapper{e_op}; diff --git a/cpp/src/traversal/sssp_impl.cuh b/cpp/src/traversal/sssp_impl.cuh index 3429672b151..8006bbf4063 100644 --- a/cpp/src/traversal/sssp_impl.cuh +++ b/cpp/src/traversal/sssp_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,11 +32,11 @@ #include +#include #include #include #include #include -#include #include #include @@ -52,8 +52,8 @@ struct e_op_t { weight_t const* distances{}; weight_t cutoff{}; - __device__ thrust::optional> operator()( - vertex_t src, vertex_t dst, weight_t src_val, thrust::nullopt_t, weight_t w) const + __device__ cuda::std::optional> operator()( + vertex_t src, vertex_t dst, weight_t src_val, cuda::std::nullopt_t, weight_t w) const { auto push = true; auto new_distance = src_val + w; @@ -65,9 +65,9 @@ struct e_op_t { threshold = old_distance < threshold ? old_distance : threshold; } if (new_distance >= threshold) { push = false; } - return push ? thrust::optional>{thrust::make_tuple( + return push ? cuda::std::optional>{thrust::make_tuple( new_distance, src)} - : thrust::nullopt; + : cuda::std::nullopt; } }; @@ -223,11 +223,11 @@ void sssp(raft::handle_t const& handle, auto new_dist = thrust::get<0>(pushed_val); auto update = (new_dist < v_val); return thrust::make_tuple( - update ? thrust::optional{new_dist < near_far_threshold ? bucket_idx_next_near - : bucket_idx_far} - : thrust::nullopt, - update ? thrust::optional>{pushed_val} - : thrust::nullopt); + update ? cuda::std::optional{new_dist < near_far_threshold ? bucket_idx_next_near + : bucket_idx_far} + : cuda::std::nullopt, + update ? cuda::std::optional>{pushed_val} + : cuda::std::nullopt); }); vertex_frontier.bucket(bucket_idx_cur_near).clear(); @@ -250,9 +250,9 @@ void sssp(raft::handle_t const& handle, auto dist = *(distances + vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)); return dist >= old_near_far_threshold - ? thrust::optional{dist < near_far_threshold ? bucket_idx_cur_near - : bucket_idx_far} - : thrust::nullopt; + ? cuda::std::optional{dist < near_far_threshold ? bucket_idx_cur_near + : bucket_idx_far} + : cuda::std::nullopt; }); near_size = vertex_frontier.bucket(bucket_idx_cur_near).aggregate_size(); far_size = vertex_frontier.bucket(bucket_idx_far).aggregate_size(); diff --git a/cpp/tests/components/mg_vertex_coloring_test.cu b/cpp/tests/components/mg_vertex_coloring_test.cu index 17327e35c97..89b1df9264e 100644 --- a/cpp/tests/components/mg_vertex_coloring_test.cu +++ b/cpp/tests/components/mg_vertex_coloring_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,6 +34,8 @@ #include +#include + #include #include @@ -130,7 +132,7 @@ class Tests_MGGraphColoring : cugraph::detail::edge_minor_property_view_t( d_colors.data(), vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + [] __device__(auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) { if ((src != dst) && (src_color == dst_color)) { return uint8_t{1}; } else { @@ -168,7 +170,7 @@ class Tests_MGGraphColoring d_colors.begin(), vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [renumber_map = (*mg_renumber_map).data()] __device__( - auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) { if ((src != dst) && (src_color == dst_color)) { return vertex_t{1}; } else { diff --git a/cpp/tests/components/vertex_coloring_test.cu b/cpp/tests/components/vertex_coloring_test.cu index fed64f272d7..cf55146c5ba 100644 --- a/cpp/tests/components/vertex_coloring_test.cu +++ b/cpp/tests/components/vertex_coloring_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,8 @@ #include +#include + #include #include @@ -107,7 +109,7 @@ class Tests_SGGraphColoring cugraph::detail::edge_minor_property_view_t(d_colors.data(), vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + [] __device__(auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) { if ((src != dst) && (src_color == dst_color)) { return uint8_t{1}; } else { @@ -142,7 +144,7 @@ class Tests_SGGraphColoring vertex_t{0}), cugraph::edge_dummy_property_t{}.view(), [renumber_map = (*sg_renumber_map).data()] __device__( - auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + auto src, auto dst, auto src_color, auto dst_color, cuda::std::nullopt_t) { if ((src != dst) && (src_color == dst_color)) { return vertex_t{1}; } else { diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu index 63a785fb182..8796383f45d 100644 --- a/cpp/tests/prims/mg_count_if_e.cu +++ b/cpp/tests/prims/mg_count_if_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,11 +37,11 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -127,15 +127,16 @@ class Tests_MGCountIfE hr_timer.start("MG count_if_e"); } - auto result = count_if_e( - *handle_, - mg_graph_view, - mg_src_prop.view(), - mg_dst_prop.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto row, auto col, auto src_property, auto dst_property, thrust::nullopt_t) { - return src_property < dst_property; - }); + auto result = + count_if_e(*handle_, + mg_graph_view, + mg_src_prop.view(), + mg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + auto row, auto col, auto src_property, auto dst_property, cuda::std::nullopt_t) { + return src_property < dst_property; + }); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -179,7 +180,7 @@ class Tests_MGCountIfE sg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto row, auto col, auto src_property, auto dst_property, thrust::nullopt_t) { + auto row, auto col, auto src_property, auto dst_property, cuda::std::nullopt_t) { return src_property < dst_property; }); ASSERT_TRUE(expected_result == result); diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu index d3d6524cbdb..27e3f471c5b 100644 --- a/cpp/tests/prims/mg_extract_transform_e.cu +++ b/cpp/tests/prims/mg_extract_transform_e.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,11 +40,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -65,12 +65,12 @@ struct e_op_t { std::is_same_v>); using return_type = - thrust::optional, - thrust::tuple, - thrust::tuple>>; + cuda::std::optional, + thrust::tuple, + thrust::tuple>>; __device__ return_type operator()( - vertex_t src, vertex_t dst, property_t src_val, property_t dst_val, thrust::nullopt_t) const + vertex_t src, vertex_t dst, property_t src_val, property_t dst_val, cuda::std::nullopt_t) const { auto output_payload = static_cast(1); if (src_val < dst_val) { @@ -82,7 +82,7 @@ struct e_op_t { src, dst, thrust::get<0>(output_payload), thrust::get<1>(output_payload)); } } else { - return thrust::nullopt; + return cuda::std::nullopt; } } }; diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu index a8393d84e43..0c625da0a6d 100644 --- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu +++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,11 +39,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -65,7 +65,7 @@ struct e_op_t { static_assert(std::is_same_v || std::is_same_v>); - using return_type = thrust::optional, std::conditional_t, thrust::tuple, @@ -78,7 +78,7 @@ struct e_op_t { vertex_t dst, property_t src_val, property_t dst_val, - thrust::nullopt_t) const + cuda::std::nullopt_t) const { auto output_payload = static_cast(1); if (src_val < dst_val) { @@ -109,7 +109,7 @@ struct e_op_t { } } } else { - return thrust::nullopt; + return cuda::std::nullopt; } } }; diff --git a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu index 386fce24a87..30a53cd15a4 100644 --- a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,9 +41,9 @@ #include +#include #include #include -#include #include #include @@ -53,7 +53,7 @@ template struct e_bias_op_t { __device__ bias_t - operator()(vertex_t, vertex_t, thrust::nullopt_t, thrust::nullopt_t, bias_t bias) const + operator()(vertex_t, vertex_t, cuda::std::nullopt_t, cuda::std::nullopt_t, bias_t bias) const { return bias; } @@ -65,8 +65,11 @@ struct e_op_t { cugraph::to_thrust_tuple(property_t{}), cugraph::to_thrust_tuple(property_t{}))); - __device__ result_t operator()( - vertex_t src, vertex_t dst, property_t src_prop, property_t dst_prop, thrust::nullopt_t) const + __device__ result_t operator()(vertex_t src, + vertex_t dst, + property_t src_prop, + property_t dst_prop, + cuda::std::nullopt_t) const { if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value) { static_assert(thrust::tuple_size::value == size_t{2}); @@ -401,18 +404,19 @@ class Tests_MGPerVRandomSelectTransformOutgoingE thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(mg_aggregate_frontier_vertices.size()), [frontier_vertex_first = mg_aggregate_frontier_vertices.begin(), - sample_offsets = mg_aggregate_sample_offsets ? thrust::make_optional( + sample_offsets = mg_aggregate_sample_offsets ? cuda::std::make_optional( (*mg_aggregate_sample_offsets).data()) - : thrust::nullopt, + : cuda::std::nullopt, sample_e_op_result_first = cugraph::get_dataframe_buffer_begin(mg_aggregate_sample_e_op_results), sg_offsets = sg_offsets.begin(), sg_indices = sg_indices.begin(), - sg_biases = sg_biases ? thrust::make_optional((*sg_biases).begin()) : thrust::nullopt, - K = prims_usecase.K, + sg_biases = + sg_biases ? cuda::std::make_optional((*sg_biases).begin()) : cuda::std::nullopt, + K = prims_usecase.K, with_replacement = prims_usecase.with_replacement, - invalid_value = - invalid_value ? thrust::make_optional(*invalid_value) : thrust::nullopt, + invalid_value = invalid_value ? cuda::std::make_optional(*invalid_value) + : cuda::std::nullopt, property_transform = cugraph::test::detail::vertex_property_transform{ hash_bin_count}] __device__(size_t i) { @@ -461,8 +465,8 @@ class Tests_MGPerVRandomSelectTransformOutgoingE auto sg_nbr_first = sg_indices + *(sg_offsets + sg_src); auto sg_nbr_last = sg_indices + *(sg_offsets + (sg_src + vertex_t{1})); auto sg_nbr_bias_first = - sg_biases ? thrust::make_optional((*sg_biases) + *(sg_offsets + sg_src)) - : thrust::nullopt; + sg_biases ? cuda::std::make_optional((*sg_biases) + *(sg_offsets + sg_src)) + : cuda::std::nullopt; if (sg_src != v) { return true; } if (sg_nbr_bias_first) { diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu index 3dd256544b4..040e0a6d716 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,12 +40,12 @@ #include #include +#include #include #include #include #include #include -#include #include #include @@ -218,7 +218,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), mg_dst_key.view(), mg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::plus{}, cugraph::get_dataframe_buffer_begin(mg_results[i])); @@ -245,7 +245,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), mg_dst_key.view(), mg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::elementwise_minimum{}, cugraph::get_dataframe_buffer_begin(mg_results[i])); @@ -272,7 +272,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), mg_dst_key.view(), mg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::elementwise_maximum{}, cugraph::get_dataframe_buffer_begin(mg_results[i])); @@ -414,7 +414,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), sg_dst_key.view(), sg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::plus{}, cugraph::get_dataframe_buffer_begin(global_result)); @@ -441,7 +441,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), sg_dst_key.view(), sg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::elementwise_minimum{}, cugraph::get_dataframe_buffer_begin(global_result)); @@ -468,7 +468,7 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE cugraph::edge_dummy_property_t{}.view(), sg_dst_key.view(), sg_kv_store.view(), - key_aggregated_e_op_t{}, + key_aggregated_e_op_t{}, property_initial_value, cugraph::reduce_op::elementwise_maximum{}, cugraph::get_dataframe_buffer_begin(global_result)); diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu index 41830b3017c..57d77f6c4bd 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,12 +40,12 @@ #include #include +#include #include #include #include #include #include -#include #include #include @@ -62,7 +62,7 @@ struct e_op_t { vertex_t dst, result_t src_property, result_t dst_property, - thrust::nullopt_t) const + cuda::std::nullopt_t) const { if (src_property < dst_property) { return src_property; diff --git a/cpp/tests/prims/mg_transform_e.cu b/cpp/tests/prims/mg_transform_e.cu index 3984c7cd86b..c94637cc657 100644 --- a/cpp/tests/prims/mg_transform_e.cu +++ b/cpp/tests/prims/mg_transform_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,10 +37,10 @@ #include +#include #include #include #include -#include #include #include @@ -177,7 +177,8 @@ class Tests_MGTransformE mg_src_prop.view(), mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { @@ -192,7 +193,8 @@ class Tests_MGTransformE mg_src_prop.view(), mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu index e290f05e9e4..b5dcfaa7aa7 100644 --- a/cpp/tests/prims/mg_transform_reduce_e.cu +++ b/cpp/tests/prims/mg_transform_reduce_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,11 +38,11 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -138,7 +138,8 @@ class Tests_MGTransformReduceE mg_src_prop.view(), mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { @@ -189,7 +190,7 @@ class Tests_MGTransformReduceE sg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), [] __device__( - auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { diff --git a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu index b050e314a15..830b48acade 100644 --- a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu +++ b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,11 +39,11 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -149,7 +149,8 @@ class Tests_MGTransformReduceEBySrcDstKey mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), mg_src_key.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { @@ -179,7 +180,8 @@ class Tests_MGTransformReduceEBySrcDstKey mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), mg_dst_key.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { @@ -279,7 +281,7 @@ class Tests_MGTransformReduceEBySrcDstKey cugraph::edge_dummy_property_t{}.view(), sg_src_key.view(), [] __device__( - auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { @@ -301,7 +303,7 @@ class Tests_MGTransformReduceEBySrcDstKey cugraph::edge_dummy_property_t{}.view(), sg_dst_key.view(), [] __device__( - auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + auto src, auto dst, auto src_property, auto dst_property, cuda::std::nullopt_t) { if (src_property < dst_property) { return src_property; } else { diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu index 085077017b3..acc89491e56 100644 --- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu +++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,11 +38,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -62,25 +62,25 @@ struct e_op_t { vertex_t dst, property_t src_val, property_t dst_val, - thrust::nullopt_t) const + cuda::std::nullopt_t) const { if constexpr (std::is_same_v) { if constexpr (std::is_same_v) { - return src_val < dst_val ? thrust::optional{std::byte{0}} /* dummy */ - : thrust::nullopt; + return src_val < dst_val ? cuda::std::optional{std::byte{0}} /* dummy */ + : cuda::std::nullopt; } else { - return src_val < dst_val ? thrust::optional{static_cast(1)} - : thrust::nullopt; + return src_val < dst_val ? cuda::std::optional{static_cast(1)} + : cuda::std::nullopt; } } else { auto tag = thrust::get<1>(optionally_tagged_src); if constexpr (std::is_same_v) { - return src_val < dst_val ? thrust::optional{tag} : thrust::nullopt; + return src_val < dst_val ? cuda::std::optional{tag} : cuda::std::nullopt; } else { return src_val < dst_val - ? thrust::optional>{thrust::make_tuple( + ? cuda::std::optional>{thrust::make_tuple( tag, static_cast(1))} - : thrust::nullopt; + : cuda::std::nullopt; } } } diff --git a/cpp/tests/prims/result_compare.cuh b/cpp/tests/prims/result_compare.cuh index 5a1abb90e3c..7ee87d402cd 100644 --- a/cpp/tests/prims/result_compare.cuh +++ b/cpp/tests/prims/result_compare.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,8 @@ #include +#include #include -#include #include #include @@ -36,7 +36,7 @@ namespace detail { template __host__ __device__ bool compare_arithmetic_scalar(T val0, T val1, - thrust::optional threshold_ratio) + cuda::std::optional threshold_ratio) { if (threshold_ratio) { return std::abs(val0 - val1) <= (std::max(std::abs(val0), std::abs(val1)) * *threshold_ratio); @@ -58,15 +58,16 @@ struct comparator { return detail::compare_arithmetic_scalar( t0, t1, - std::is_floating_point_v ? thrust::optional{threshold_ratio} : thrust::nullopt); + std::is_floating_point_v ? cuda::std::optional{threshold_ratio} : cuda::std::nullopt); } else { - auto val0 = thrust::get<0>(t0); - auto val1 = thrust::get<0>(t1); - auto passed = detail::compare_arithmetic_scalar( - val0, - val1, - std::is_floating_point_v ? thrust::optional{threshold_ratio} - : thrust::nullopt); + auto val0 = thrust::get<0>(t0); + auto val1 = thrust::get<0>(t1); + auto passed = + detail::compare_arithmetic_scalar(val0, + val1, + std::is_floating_point_v + ? cuda::std::optional{threshold_ratio} + : cuda::std::nullopt); if (!passed) return false; if constexpr (thrust::tuple_size::value >= 2) { @@ -76,8 +77,8 @@ struct comparator { detail::compare_arithmetic_scalar(val0, val1, std::is_floating_point_v - ? thrust::optional{threshold_ratio} - : thrust::nullopt); + ? cuda::std::optional{threshold_ratio} + : cuda::std::nullopt); if (!passed) return false; } if constexpr (thrust::tuple_size::value >= 3) { diff --git a/cpp/tests/sampling/detail/sampling_post_processing_validate.cu b/cpp/tests/sampling/detail/sampling_post_processing_validate.cu index a0babc3b921..ac0523bbce7 100644 --- a/cpp/tests/sampling/detail/sampling_post_processing_validate.cu +++ b/cpp/tests/sampling/detail/sampling_post_processing_validate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -383,26 +384,27 @@ bool compare_heterogeneous_edgelist( this_label_org_sorted_indices.begin(), this_label_org_sorted_indices.end(), [edge_types = org_edgelist_edge_types - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*org_edgelist_edge_types).data() + label_start_offset, label_end_offset - label_start_offset) - : thrust::nullopt, - hops = org_edgelist_hops ? thrust::make_optional>( + : cuda::std::nullopt, + hops = org_edgelist_hops ? cuda::std::make_optional>( (*org_edgelist_hops).data() + label_start_offset, label_end_offset - label_start_offset) - : thrust::nullopt, + : cuda::std::nullopt, srcs = raft::device_span(org_edgelist_srcs.data() + label_start_offset, label_end_offset - label_start_offset), dsts = raft::device_span(org_edgelist_dsts.data() + label_start_offset, label_end_offset - label_start_offset), - weights = org_edgelist_weights ? thrust::make_optional>( + weights = org_edgelist_weights ? cuda::std::make_optional>( (*org_edgelist_weights).data() + label_start_offset, label_end_offset - label_start_offset) - : thrust::nullopt, - edge_ids = org_edgelist_edge_ids ? thrust::make_optional>( - (*org_edgelist_edge_ids).data() + label_start_offset, - label_end_offset - label_start_offset) - : thrust::nullopt] __device__(size_t l_idx, size_t r_idx) { + : cuda::std::nullopt, + edge_ids = org_edgelist_edge_ids + ? cuda::std::make_optional>( + (*org_edgelist_edge_ids).data() + label_start_offset, + label_end_offset - label_start_offset) + : cuda::std::nullopt] __device__(size_t l_idx, size_t r_idx) { edge_type_t l_edge_type{0}; edge_type_t r_edge_type{0}; if (edge_types) { @@ -673,15 +675,15 @@ bool compare_heterogeneous_edgelist( raft::device_span(this_edge_type_unrenumbered_edgelist_dsts.data(), this_edge_type_unrenumbered_edgelist_dsts.size()), weights = renumbered_edgelist_weights - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*renumbered_edgelist_weights).data() + edge_type_start_offset, edge_type_end_offset - edge_type_start_offset) - : thrust::nullopt, + : cuda::std::nullopt, edge_ids = renumbered_edgelist_edge_ids - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*renumbered_edgelist_edge_ids).data() + edge_type_start_offset, edge_type_end_offset - edge_type_start_offset) - : thrust::nullopt] __device__(size_t l_idx, size_t r_idx) { + : cuda::std::nullopt] __device__(size_t l_idx, size_t r_idx) { vertex_t l_src = srcs[l_idx]; vertex_t r_src = srcs[r_idx]; @@ -721,15 +723,15 @@ bool compare_heterogeneous_edgelist( raft::device_span(org_edgelist_dsts.data() + label_start_offset, label_end_offset - label_start_offset), org_weights = org_edgelist_weights - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*org_edgelist_weights).data() + label_start_offset, label_end_offset - label_start_offset) - : thrust::nullopt, + : cuda::std::nullopt, org_edge_ids = org_edgelist_edge_ids - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*org_edgelist_edge_ids).data() + label_start_offset, label_end_offset - label_start_offset) - : thrust::nullopt, + : cuda::std::nullopt, unrenumbered_srcs = raft::device_span(this_edge_type_unrenumbered_edgelist_srcs.data(), this_edge_type_unrenumbered_edgelist_srcs.size()), @@ -738,16 +740,16 @@ bool compare_heterogeneous_edgelist( this_edge_type_unrenumbered_edgelist_dsts.size()), unrenumbered_weights = renumbered_edgelist_weights - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*renumbered_edgelist_weights).data() + edge_type_start_offset, edge_type_end_offset - edge_type_start_offset) - : thrust::nullopt, + : cuda::std::nullopt, unrenumbered_edge_ids = unrenumbered_edgelist_edge_ids - ? thrust::make_optional>( + ? cuda::std::make_optional>( (*unrenumbered_edgelist_edge_ids).data(), (*unrenumbered_edgelist_edge_ids).size()) - : thrust:: + : cuda::std:: nullopt] __device__(size_t org_idx /* from label_start_offset */, size_t unrenumbered_idx /* from edge_type_start_offset */) { diff --git a/cpp/tests/sampling/mg_negative_sampling.cpp b/cpp/tests/sampling/mg_negative_sampling.cpp index 7c64bb7fbbb..eb9f4fbb394 100644 --- a/cpp/tests/sampling/mg_negative_sampling.cpp +++ b/cpp/tests/sampling/mg_negative_sampling.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "cugraph/utilities/host_scalar_comm.hpp" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" #include "utilities/property_generator_utilities.hpp" @@ -85,8 +86,9 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParamview()); } - size_t num_samples = - graph_view.compute_number_of_edges(*handle_) * negative_sampling_usecase.sample_multiplier; + size_t num_samples = graph_view.compute_number_of_edges(*handle_) * + negative_sampling_usecase.sample_multiplier / + handle_->get_comms().get_size(); rmm::device_uvector src_bias_v(0, handle_->get_stream()); rmm::device_uvector dst_bias_v(0, handle_->get_stream()); @@ -150,26 +152,8 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParam{src_out.data(), src_out.size()}, raft::device_span{dst_out.data(), dst_out.size()}); - // TODO: Move this to validation_utilities... - auto h_vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); - rmm::device_uvector d_vertex_partition_range_lasts( - h_vertex_partition_range_lasts.size(), handle_->get_stream()); - raft::update_device(d_vertex_partition_range_lasts.data(), - h_vertex_partition_range_lasts.data(), - h_vertex_partition_range_lasts.size(), - handle_->get_stream()); - - size_t error_count = cugraph::test::count_edges_on_wrong_int_gpu( - *handle_, - raft::device_span{src_out.data(), src_out.size()}, - raft::device_span{dst_out.data(), dst_out.size()}, - raft::device_span{d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()}); - - ASSERT_EQ(error_count, 0) << "generate edges out of range > 0"; - if ((negative_sampling_usecase.remove_duplicates) && (src_out.size() > 0)) { - error_count = cugraph::test::count_duplicate_vertex_pairs_sorted( + size_t error_count = cugraph::test::count_duplicate_vertex_pairs_sorted( *handle_, raft::device_span{src_out.data(), src_out.size()}, raft::device_span{dst_out.data(), dst_out.size()}); @@ -184,7 +168,7 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParam( *handle_, graph_view, std::nullopt, std::nullopt, std::nullopt, std::nullopt); - error_count = cugraph::test::count_intersection( + size_t error_count = cugraph::test::count_intersection( *handle_, raft::device_span{graph_src.data(), graph_src.size()}, raft::device_span{graph_dst.data(), graph_dst.size()}, @@ -202,7 +186,9 @@ class Tests_MGNegative_Sampling : public ::testing::TestWithParamget_comms(), src_out.size(), raft::comms::op_t::SUM, handle_->get_stream()); - ASSERT_EQ(sz, num_samples) << "Expected exact number of samples"; + size_t aggregate_sample_count = cugraph::host_scalar_allreduce( + handle_->get_comms(), num_samples, raft::comms::op_t::SUM, handle_->get_stream()); + ASSERT_EQ(sz, aggregate_sample_count) << "Expected exact number of samples"; } // TBD: How do we determine if we have properly reflected the biases? diff --git a/cpp/tests/utilities/check_utilities.hpp b/cpp/tests/utilities/check_utilities.hpp index a22d95c87de..6974d14be04 100644 --- a/cpp/tests/utilities/check_utilities.hpp +++ b/cpp/tests/utilities/check_utilities.hpp @@ -97,7 +97,7 @@ struct device_nearly_equal { bool __device__ operator()(type_t lhs, type_t rhs) const { return std::abs(lhs - rhs) < - cuda::std::max(thrust::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + cuda::std::max(cuda::std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); } }; diff --git a/cpp/tests/utilities/property_generator_kernels.cuh b/cpp/tests/utilities/property_generator_kernels.cuh index 78b22e0dac2..5c4bc00cdfa 100644 --- a/cpp/tests/utilities/property_generator_kernels.cuh +++ b/cpp/tests/utilities/property_generator_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ #include "prims/update_edge_src_dst_property.cuh" #include "utilities/property_generator_utilities.hpp" -#include +#include #include #include @@ -69,8 +69,11 @@ template struct edge_property_transform { int32_t mod{}; - constexpr __device__ property_t operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + constexpr __device__ property_t operator()(vertex_t src, + vertex_t dst, + cuda::std::nullopt_t, + cuda::std::nullopt_t, + cuda::std::nullopt_t) const { static_assert(cugraph::is_thrust_tuple_of_arithmetic::value || std::is_arithmetic_v); diff --git a/dependencies.yaml b/dependencies.yaml index 02fa03cff70..5419be9beda 100755 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -3,7 +3,7 @@ files: all: output: [conda] matrix: - cuda: ["11.8", "12.5"] + cuda: ["11.8", "12.8"] arch: [x86_64] includes: - checks @@ -279,6 +279,10 @@ dependencies: cuda: "12.5" packages: - cuda-version=12.5 + - matrix: + cuda: "12.8" + packages: + - cuda-version=12.8 cuda: specific: - output_types: [conda] @@ -300,7 +304,7 @@ dependencies: - cuda-nvtx common_build: common: - - output_types: [conda, pyproject] + - output_types: [conda, requirements, pyproject] packages: - &cmake_ver cmake>=3.26.4,!=3.30.0 - ninja diff --git a/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py b/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py index 2edafe95716..6ac4ca142ee 100644 --- a/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py +++ b/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py @@ -223,6 +223,7 @@ def __get_call_groups( input_id: TensorType, seeds_per_call: int, assume_equal_input_size: bool = False, + label: Optional[TensorType] = None, ): torch = import_optional("torch") @@ -231,6 +232,8 @@ def __get_call_groups( # many batches. seeds_call_groups = torch.split(seeds, seeds_per_call, dim=-1) index_call_groups = torch.split(input_id, seeds_per_call, dim=-1) + if label is not None: + label_call_groups = torch.split(label, seeds_per_call, dim=-1) # Need to add empties to the list of call groups to handle the case # where not all ranks have the same number of call groups. This @@ -251,8 +254,16 @@ def __get_call_groups( [torch.tensor([], dtype=torch.int64, device=input_id.device)] * (int(num_call_groups) - len(index_call_groups)) ) + if label is not None: + label_call_groups = list(label_call_groups) + ( + [torch.tensor([], dtype=label.dtype, device=label.device)] + * (int(num_call_groups) - len(label_call_groups)) + ) - return seeds_call_groups, index_call_groups + if label is not None: + return seeds_call_groups, index_call_groups, label_call_groups + else: + return seeds_call_groups, index_call_groups def sample_from_nodes( self, @@ -344,7 +355,7 @@ def sample_from_nodes( def __sample_from_edges_func( self, call_id: int, - current_seeds_and_ix: Tuple["torch.Tensor", "torch.Tensor"], + current_seeds_and_ix: Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor"], batch_id_start: int, batch_size: int, batches_per_call: int, @@ -353,7 +364,7 @@ def __sample_from_edges_func( ) -> Union[None, Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]]: torch = import_optional("torch") - current_seeds, current_ix = current_seeds_and_ix + current_seeds, current_ix, current_label = current_seeds_and_ix num_seed_edges = current_ix.numel() # The index gets stored as-is regardless of what makes it into @@ -468,6 +479,7 @@ def __sample_from_edges_func( random_state=random_state, ) minibatch_dict["input_index"] = current_ix.cuda() + minibatch_dict["input_label"] = current_label.cuda() minibatch_dict["input_offsets"] = input_offsets minibatch_dict[ "edge_inverse" @@ -505,6 +517,7 @@ def sample_from_edges( random_state: int = 62, assume_equal_input_size: bool = False, input_id: Optional[TensorType] = None, + input_label: Optional[TensorType] = None, ) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]: """ Performs sampling starting from seed edges. @@ -527,6 +540,10 @@ def sample_from_edges( Input ids corresponding to the original batch tensor, if it was permuted prior to calling this function. If present, will be saved with the samples. + input_label: Optional[TensorType] + Input labels corresponding to the input seeds. Typically used + for link prediction sampling. If present, will be saved with + the samples. Generally not compatible with negative sampling. """ torch = import_optional("torch") @@ -545,12 +562,20 @@ def sample_from_edges( local_num_batches, assume_equal_input_size=assume_equal_input_size ) - edges_call_groups, index_call_groups = self.__get_call_groups( + groups = self.__get_call_groups( edges, input_id, actual_seed_edges_per_call, assume_equal_input_size=input_size_is_equal, + label=input_label, ) + if len(groups) == 2: + edges_call_groups, index_call_groups = groups + label_call_groups = [torch.tensor([], dtype=torch.int32)] * len( + edges_call_groups + ) + else: + edges_call_groups, index_call_groups, label_call_groups = groups sample_args = [ batch_id_start, @@ -563,14 +588,14 @@ def sample_from_edges( if self.__writer is None: # Buffered sampling return BufferedSampleReader( - zip(edges_call_groups, index_call_groups), + zip(edges_call_groups, index_call_groups, label_call_groups), self.__sample_from_edges_func, *sample_args, ) else: # Unbuffered sampling for i, current_seeds_and_ix in enumerate( - zip(edges_call_groups, index_call_groups) + zip(edges_call_groups, index_call_groups, label_call_groups) ): sample_args[0] = self.__sample_from_edges_func( i, diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml index dfe3b085fdf..060d4ee1e99 100644 --- a/python/cugraph/pyproject.toml +++ b/python/cugraph/pyproject.toml @@ -72,7 +72,7 @@ build-dir = "build/{wheel_tag}" cmake.build-type = "Release" cmake.version = "CMakeLists.txt" minimum-version = "build-system.requires" -ninja.make-fallback = true +ninja.make-fallback = false sdist.reproducible = true wheel.packages = ["cugraph"] diff --git a/python/libcugraph/pyproject.toml b/python/libcugraph/pyproject.toml index a6191e28000..9d85bfa5dac 100644 --- a/python/libcugraph/pyproject.toml +++ b/python/libcugraph/pyproject.toml @@ -53,14 +53,14 @@ select = [ ] # detect when package size grows significantly -max_allowed_size_compressed = '1.2G' +max_allowed_size_compressed = '1.4G' [tool.scikit-build] build-dir = "build/{wheel_tag}" cmake.build-type = "Release" cmake.version = "CMakeLists.txt" minimum-version = "build-system.requires" -ninja.make-fallback = true +ninja.make-fallback = false sdist.reproducible = true wheel.packages = ["libcugraph"] wheel.install-dir = "libcugraph" diff --git a/python/pylibcugraph/pyproject.toml b/python/pylibcugraph/pyproject.toml index ac124e1fd5f..3c50a79bfa3 100644 --- a/python/pylibcugraph/pyproject.toml +++ b/python/pylibcugraph/pyproject.toml @@ -57,7 +57,7 @@ build-dir = "build/{wheel_tag}" cmake.build-type = "Release" cmake.version = "CMakeLists.txt" minimum-version = "build-system.requires" -ninja.make-fallback = true +ninja.make-fallback = false sdist.reproducible = true wheel.packages = ["pylibcugraph"]