Skip to content

Commit

Permalink
Example play with CUDA.
Browse files Browse the repository at this point in the history
  • Loading branch information
larshg committed Sep 20, 2024
1 parent 9f6b2dc commit 1081b1c
Show file tree
Hide file tree
Showing 3 changed files with 307 additions and 0 deletions.
13 changes: 13 additions & 0 deletions examples/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

find_package(Thrust REQUIRED CONFIG)
thrust_create_target(Thrust)

if(Thrust_FOUND)
message("Thrust found")
endif()

if(TARGET Thrust)
message("Thrust target exist")
endif()

PCL_ADD_EXAMPLE(pcl_example_cuda_common FILES example_cuda_common.cu LINK_WITH pcl_common pcl_io pcl_filters Thrust CUDA::cudart CUDA)
160 changes: 160 additions & 0 deletions examples/cuda/example_cuda_common.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#include <pcl/console/parse.h>
#include <pcl/point_cloud.h>
#include <pcl/point_types.h>
#include <pcl/io/pcd_io.h>
#include <pcl/common/time.h>
#include <pcl/common/point_tests.h>

#include <pcl/filters/passthrough.h>
#include <pcl/filters/experimental/functor_filter.h>

#include <pcl/cuda/point_cloud.h>

#include <thrust/host_vector.h>
#include <thrust/device_vector.h>

#include <chrono>

struct passthrough {

const float maxZ_;

passthrough(const float maxZ) : maxZ_{maxZ} {}

__host__ __device__
bool
operator()(const pcl::PointXYZ pt)
{
return isfinite(pt.x) && isfinite(pt.y) &&
isfinite(pt.z) && pt.z < maxZ_;
}
};

__global__ void
kernel_cudaWarmUpGPU()
{
int ind = blockIdx.x * blockDim.x + threadIdx.x;
ind = ind + 1;
}

cudaError_t
cudaWarmUpGPU()
{
kernel_cudaWarmUpGPU<<<1, 1>>>();
cudaDeviceSynchronize();
return cudaGetLastError();
}

void
warmUpGPU()
{
cudaError_t err = ::cudaSuccess;
err = cudaSetDevice(0);
if (err != ::cudaSuccess)
return;

err = cudaWarmUpGPU();
if (err != ::cudaSuccess)
return;
}


thrust::host_vector<pcl::PointXYZ>
filterGPU(pcl::PointCloud<pcl::PointXYZ>::Ptr cloud, float maxZ)
{

thrust::device_vector<pcl::PointXYZ> d_vec;
{
pcl::ScopeTime copyToDevice("Copying to device");
d_vec = cloud->points;
}
thrust::device_vector<pcl::PointXYZ> d_result(d_vec.size());


size_t resultCount;
{
pcl::ScopeTime cudaCall("Copy if");

resultCount =
thrust::copy_if(d_vec.begin(), d_vec.end(), d_result.begin(), passthrough(maxZ)) -
d_result.begin();

}
d_result.resize(resultCount);

thrust::host_vector<pcl::PointXYZ> h_result;

{
pcl::ScopeTime copyFromDeivce("Copy to host");
h_result = d_result;
}

return h_result;
}

pcl::PointCloud<pcl::PointXYZ>::Ptr
filterCPUpassthrough(pcl::PointCloud<pcl::PointXYZ>::Ptr cloud, float maxZ)
{
auto result = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();
pcl::PassThrough<pcl::PointXYZ> pass;
pass.setInputCloud(cloud);
pass.setFilterFieldName("z");
pass.setFilterLimits(0, maxZ);
pass.filter(*result);
return result;
}

pcl::PointCloud<pcl::PointXYZ>::Ptr
filterCPUfunctor(pcl::PointCloud<pcl::PointXYZ>::Ptr cloud, float maxZ)
{
auto result = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();

pcl::experimental::FilterFunction<pcl::PointXYZ> filter;
filter = [=](const pcl::PointCloud<pcl::PointXYZ>& cloud, pcl::index_t idx) {

return (pcl::isXYZFinite(cloud[idx]) && cloud[idx].z < maxZ);
};
// build the filter
pcl::experimental::FunctionFilter<pcl::PointXYZ> func_filter(filter);
func_filter.setInputCloud(cloud);
func_filter.filter(*result);

return result;
}

int
main(int argc, char** argv)
{
auto cloud = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();

std::string fileName;
pcl::console::parse<std::string>(argc, argv, "-f", fileName);

float maxZ;
pcl::console::parse(argc, argv, "-maxZ", maxZ);

pcl::io::loadPCDFile(fileName, *cloud);

warmUpGPU();
thrust::host_vector<pcl::PointXYZ> filteredCloud;
{
pcl::ScopeTime filterGPUtime("Filter GPU");
filteredCloud = filterGPU(cloud, maxZ);
}
std::cout << "Original cloud: " << cloud->size() << "\n";
std::cout << "GPU Filter" << filteredCloud.size() << "\n";

{
pcl::ScopeTime filterCPUTime("Filter CPU");
auto cpufilteredCloud = filterCPUpassthrough(cloud, maxZ);
std::cout << "Passthrough: " << cpufilteredCloud->size() << "\n";
}

{
pcl::ScopeTime filterFunctor("Filter Functor CPU");
auto cpuFilterFuncCloud = filterCPUfunctor(cloud, maxZ);
std::cout << "Func CPU: " << cpuFilterFuncCloud->size() << "\n";
}

return 0;
}
134 changes: 134 additions & 0 deletions examples/cuda/example_float3.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#include <pcl/common/time.h>
#include <pcl/console/parse.h>
#include <pcl/cuda/point_cloud.h>
#include <pcl/filters/passthrough.h>
#include <pcl/io/pcd_io.h>
#include <pcl/point_cloud.h>
#include <pcl/point_types.h>

#include <thrust/device_vector.h>
#include <thrust/host_vector.h>

#include <chrono>

struct passthrough {

float maxZ_;

passthrough(float maxZ) : maxZ_{maxZ} {}

__host__ __device__ bool
operator()(const float3 pt)
{
return pt.z < maxZ_;
}
};

__global__ void
kernel_cudaWarmUpGPU()
{
int ind = blockIdx.x * blockDim.x + threadIdx.x;
ind = ind + 1;
}

cudaError_t
cudaWarmUpGPU()
{
kernel_cudaWarmUpGPU<<<1, 1>>>();
cudaDeviceSynchronize();
return cudaGetLastError();
}

void
warmUpGPU()
{
cudaError_t err = ::cudaSuccess;
err = cudaSetDevice(0);
if (err != ::cudaSuccess)
return;

err = cudaWarmUpGPU();
if (err != ::cudaSuccess)
return;
}

thrust::host_vector<float3>
filterGPU(pcl::PointCloud<pcl::PointXYZ>::Ptr cloud, float maxZ)
{
thrust::host_vector<float3> vec;
vec.reserve(cloud->size());

{
pcl::ScopeTime hostvector("Fill Host vector");
for each (const auto& pt in* cloud) {
vec.push_back({pt.x, pt.y, pt.z});
}
}
thrust::device_vector<float3> d_vec;
{
pcl::ScopeTime copyToDevice("Copying to device");
d_vec = vec;
}
thrust::device_vector<float3> d_result(d_vec.size());

size_t resultCount;
{
pcl::ScopeTime cudaCall("Copy if");

resultCount = thrust::copy_if(
d_vec.begin(), d_vec.end(), d_result.begin(), passthrough(maxZ)) -
d_result.begin();
}
d_result.resize(resultCount);

thrust::host_vector<float3> h_result;

{
pcl::ScopeTime copyFromDeivce("Copy to host");
h_result = d_result;
}

return h_result;
}

pcl::PointCloud<pcl::PointXYZ>::Ptr
filterCPUpassthrough(pcl::PointCloud<pcl::PointXYZ>::Ptr cloud, float maxZ)
{
auto result = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();
pcl::PassThrough<pcl::PointXYZ> pass;
pass.setInputCloud(cloud);
pass.setFilterFieldName("z");
pass.setFilterLimits(0, maxZ);
pass.filter(*result);
return result;
}

int
main(int argc, char** argv)
{
auto cloud = pcl::make_shared<pcl::PointCloud<pcl::PointXYZ>>();

std::string fileName;
pcl::console::parse<std::string>(argc, argv, "-f", fileName);

float maxZ;
pcl::console::parse(argc, argv, "-maxZ", maxZ);

pcl::io::loadPCDFile(fileName, *cloud);

warmUpGPU();
thrust::host_vector<float3> filteredCloud;
{
pcl::ScopeTime filterGPUtime("Filter GPU");
filteredCloud = filterGPU(cloud, maxZ);
}
std::cout << filteredCloud.size() << "\n";

//{
// pcl::ScopeTime filterCPUTime("Filter CPU");
// auto cpufilteredCloud = filterCPUpassthrough(cloud, maxZ);
// std::cout << cpufilteredCloud->size() << "\n";
//}

return 0;
}

0 comments on commit 1081b1c

Please sign in to comment.