diff --git a/src/VArray.cpp b/src/VArray.cpp index aa9a6cc0..6108c748 100644 --- a/src/VArray.cpp +++ b/src/VArray.cpp @@ -100,6 +100,13 @@ void VArray::reserve(std::size_t newCapacity, bool preserveData) current().elemCapacity = newCapacity; } +void VArray::doubleCapacityIfRunningOut(float runningOutThreshold) +{ + if (getElemCapacity() * runningOutThreshold < getElemCount()) { + reserve(getElemCapacity() * 2, true); + } +} + VArray::~VArray() { for (auto&& [location, state] : instance) { diff --git a/src/VArray.hpp b/src/VArray.hpp index 9dde568e..b1cf23fe 100644 --- a/src/VArray.hpp +++ b/src/VArray.hpp @@ -82,6 +82,7 @@ struct VArray : std::enable_shared_from_this void insertData(const void *src, std::size_t elements, std::size_t offset); void resize(std::size_t newCount, bool zeroInit=true, bool preserveData=true); void reserve(std::size_t newCapacity, bool preserveData=true); + void doubleCapacityIfRunningOut(float runningOutThreshold=0.9); std::size_t getElemSize() const { return sizeOfType; } std::size_t getElemCount() const { return current().elemCount; } std::size_t getElemCapacity() const { return current().elemCapacity; } diff --git a/src/api/apiPcl.cpp b/src/api/apiPcl.cpp index 35ee6c9b..8a87179f 100644 --- a/src/api/apiPcl.cpp +++ b/src/api/apiPcl.cpp @@ -42,16 +42,18 @@ rgl_graph_write_pcd_file(rgl_node_t node, const char* file_path) throw InvalidAPIObject(fmt::format("Saving PCD file {} failed - requested node does not have field XYZ.", file_path)); } - // Get formatted data - VArray::Ptr rglCloud = VArray::create(); - // TODO(msz-rai): CudaStream for formatAsync: nullptr or pointCloudNode->getGraph()->getStream()? - FormatPointsNode::formatAsync(rglCloud, pointCloudNode, {XYZ_F32, PADDING_32}, nullptr); + // We are not using format node to avoid transferring huge point cloud to GPU (risk of cuda out of memory error) + // We are formatting manually on the CPU instead. + // TODO(msz-rai): CudaStream for getFieldDataTyped: nullptr or pointCloudNode->getGraph()->getStream()? + auto xyzTypedArray = pointCloudNode->getFieldDataTyped(nullptr); + auto xyzData = xyzTypedArray->getReadPtr(MemLoc::Host); // Convert to PCL cloud pcl::PointCloud pclCloud; - const pcl::PointXYZ* data = reinterpret_cast(rglCloud->getReadPtr(MemLoc::Host)); pclCloud.resize(pointCloudNode->getWidth(), pointCloudNode->getHeight()); - pclCloud.assign(data, data + pclCloud.size(), pointCloudNode->getWidth()); + for (int i = 0; i < xyzTypedArray->getCount(); ++i) { + pclCloud[i] = pcl::PointXYZ(xyzData[i].x(), xyzData[i].y(), xyzData[i].z()); + } pclCloud.is_dense = pointCloudNode->isDense(); // Save to PCD file diff --git a/src/graph/TemporalMergePointsNode.cpp b/src/graph/TemporalMergePointsNode.cpp index 7638b621..f258ba31 100644 --- a/src/graph/TemporalMergePointsNode.cpp +++ b/src/graph/TemporalMergePointsNode.cpp @@ -61,6 +61,8 @@ void TemporalMergePointsNode::schedule(cudaStream_t stream) size_t pointCount = input->getPointCount(); const auto toMergeData = input->getFieldData(field, stream); data->insertData(toMergeData->getReadPtr(MemLoc::Device), pointCount, width); + // Double capacity of VArray if is close to run out. It prevents reallocating memory every insertion. + data->doubleCapacityIfRunningOut(); } width += input->getWidth(); }