From ad39515994487c32ea9a0535b7ca014d8373470b Mon Sep 17 00:00:00 2001 From: Mateusz Szczygielski <112629916+msz-rai@users.noreply.github.com> Date: Wed, 19 Apr 2023 11:40:30 +0200 Subject: [PATCH] Improve temporal merging performance (#141) * Double capacity if is running out * Format PCL point cloud on CPU * After review --- src/VArray.cpp | 7 +++++++ src/VArray.hpp | 1 + src/api/apiPcl.cpp | 14 ++++++++------ src/graph/TemporalMergePointsNode.cpp | 2 ++ 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/VArray.cpp b/src/VArray.cpp index aa9a6cc0..6108c748 100644 --- a/src/VArray.cpp +++ b/src/VArray.cpp @@ -100,6 +100,13 @@ void VArray::reserve(std::size_t newCapacity, bool preserveData) current().elemCapacity = newCapacity; } +void VArray::doubleCapacityIfRunningOut(float runningOutThreshold) +{ + if (getElemCapacity() * runningOutThreshold < getElemCount()) { + reserve(getElemCapacity() * 2, true); + } +} + VArray::~VArray() { for (auto&& [location, state] : instance) { diff --git a/src/VArray.hpp b/src/VArray.hpp index 9dde568e..b1cf23fe 100644 --- a/src/VArray.hpp +++ b/src/VArray.hpp @@ -82,6 +82,7 @@ struct VArray : std::enable_shared_from_this<VArray> void insertData(const void *src, std::size_t elements, std::size_t offset); void resize(std::size_t newCount, bool zeroInit=true, bool preserveData=true); void reserve(std::size_t newCapacity, bool preserveData=true); + void doubleCapacityIfRunningOut(float runningOutThreshold=0.9); std::size_t getElemSize() const { return sizeOfType; } std::size_t getElemCount() const { return current().elemCount; } std::size_t getElemCapacity() const { return current().elemCapacity; } diff --git a/src/api/apiPcl.cpp b/src/api/apiPcl.cpp index 35ee6c9b..8a87179f 100644 --- a/src/api/apiPcl.cpp +++ b/src/api/apiPcl.cpp @@ -42,16 +42,18 @@ rgl_graph_write_pcd_file(rgl_node_t node, const char* file_path) throw InvalidAPIObject(fmt::format("Saving PCD file {} failed - requested node does not have field XYZ.", file_path)); } - // Get formatted data - VArray::Ptr rglCloud = VArray::create<char>(); - // TODO(msz-rai): CudaStream for formatAsync: nullptr or pointCloudNode->getGraph()->getStream()? - FormatPointsNode::formatAsync(rglCloud, pointCloudNode, {XYZ_F32, PADDING_32}, nullptr); + // We are not using format node to avoid transferring huge point cloud to GPU (risk of cuda out of memory error) + // We are formatting manually on the CPU instead. + // TODO(msz-rai): CudaStream for getFieldDataTyped: nullptr or pointCloudNode->getGraph()->getStream()? + auto xyzTypedArray = pointCloudNode->getFieldDataTyped<XYZ_F32>(nullptr); + auto xyzData = xyzTypedArray->getReadPtr(MemLoc::Host); // Convert to PCL cloud pcl::PointCloud<pcl::PointXYZ> pclCloud; - const pcl::PointXYZ* data = reinterpret_cast<const pcl::PointXYZ*>(rglCloud->getReadPtr(MemLoc::Host)); pclCloud.resize(pointCloudNode->getWidth(), pointCloudNode->getHeight()); - pclCloud.assign(data, data + pclCloud.size(), pointCloudNode->getWidth()); + for (int i = 0; i < xyzTypedArray->getCount(); ++i) { + pclCloud[i] = pcl::PointXYZ(xyzData[i].x(), xyzData[i].y(), xyzData[i].z()); + } pclCloud.is_dense = pointCloudNode->isDense(); // Save to PCD file diff --git a/src/graph/TemporalMergePointsNode.cpp b/src/graph/TemporalMergePointsNode.cpp index 7638b621..f258ba31 100644 --- a/src/graph/TemporalMergePointsNode.cpp +++ b/src/graph/TemporalMergePointsNode.cpp @@ -61,6 +61,8 @@ void TemporalMergePointsNode::schedule(cudaStream_t stream) size_t pointCount = input->getPointCount(); const auto toMergeData = input->getFieldData(field, stream); data->insertData(toMergeData->getReadPtr(MemLoc::Device), pointCount, width); + // Double capacity of VArray if is close to run out. It prevents reallocating memory every insertion. + data->doubleCapacityIfRunningOut(); } width += input->getWidth(); }