From e9e8140548c8f26936cbcdb18a65c964998923e9 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Fri, 31 Jan 2025 18:00:20 +0900 Subject: [PATCH] feat: improved data handling in the ogm. When using the full concatenated pointcloud the processing time decreases from 8ms to 4ms Signed-off-by: Kenzo Lobos-Tsunekawa --- .../costmap_2d/occupancy_grid_map_base.hpp | 2 + .../utils/cuda_pointcloud.hpp | 32 ++++++---- .../costmap_2d/occupancy_grid_map_base.cpp | 18 ++++-- .../costmap_2d/occupancy_grid_map_fixed.cpp | 16 ++--- .../updater/binary_bayes_filter_updater.cpp | 10 +--- .../updater/log_odds_bayes_filter_updater.cpp | 6 -- ...intcloud_based_occupancy_grid_map_node.cpp | 60 ++++++++++++------- ...intcloud_based_occupancy_grid_map_node.hpp | 21 +++---- 8 files changed, 88 insertions(+), 77 deletions(-) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp index c77345a973c50..6e476e607b3e0 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp @@ -102,6 +102,8 @@ class OccupancyGridMapInterface : public nav2_costmap_2d::Costmap2D bool isCudaEnabled() const; + void setCudaStream(const cudaStream_t & stream); + const autoware::cuda_utils::CudaUniquePtr & getDeviceCostmap() const; void copyDeviceCostmapToHost() const; diff --git a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp index 8c78e2ad915b5..7eb27605e053b 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp @@ -24,31 +24,37 @@ class CudaPointCloud2 : public sensor_msgs::msg::PointCloud2 { public: - void fromROSMsgAsync(const sensor_msgs::msg::PointCloud2 & msg) + void fromROSMsgAsync(const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_ptr) { // cSpell: ignore knzo25 // NOTE(knzo25): replace this with the cuda blackboard later - header = msg.header; - fields = msg.fields; - height = msg.height; - width = msg.width; - is_bigendian = msg.is_bigendian; - point_step = msg.point_step; - row_step = msg.row_step; - is_dense = msg.is_dense; - - if (!data || capacity_ < msg.data.size()) { - capacity_ = 1024 * (msg.data.size() / 1024 + 1); + cudaStreamSynchronize(stream); + internal_msg_ = msg_ptr; + + header = msg_ptr->header; + fields = msg_ptr->fields; + height = msg_ptr->height; + width = msg_ptr->width; + is_bigendian = msg_ptr->is_bigendian; + point_step = msg_ptr->point_step; + row_step = msg_ptr->row_step; + is_dense = msg_ptr->is_dense; + + if (!data || capacity_ < msg_ptr->data.size()) { + const int factor = 4096 * point_step; + capacity_ = factor * (msg_ptr->data.size() / factor + 1); data = autoware::cuda_utils::make_unique(capacity_); } - cudaMemcpyAsync(data.get(), msg.data.data(), msg.data.size(), cudaMemcpyHostToDevice, stream); + cudaMemcpyAsync( + data.get(), msg_ptr->data.data(), msg_ptr->data.size(), cudaMemcpyHostToDevice, stream); } autoware::cuda_utils::CudaUniquePtr data; cudaStream_t stream; private: + sensor_msgs::msg::PointCloud2::ConstSharedPtr internal_msg_; std::size_t capacity_{0}; }; diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp index f4d290bd70d7c..0c9e2fde8d6a8 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp @@ -90,7 +90,6 @@ OccupancyGridMapInterface::OccupancyGridMapInterface( const auto num_cells_x = this->getSizeInCellsX(); const auto num_cells_y = this->getSizeInCellsY(); - cudaStreamCreate(&stream_); device_costmap_ = autoware::cuda_utils::make_unique(num_cells_x * num_cells_y); device_costmap_aux_ = autoware::cuda_utils::make_unique(num_cells_x * num_cells_y); @@ -136,8 +135,9 @@ void OccupancyGridMapInterface::updateOrigin(double new_origin_x, double new_ori device_costmap_.get(), lower_left_x, lower_left_y, size_x_, size_y_, device_costmap_aux_.get(), 0, 0, cell_size_x, cell_size_y, cell_size_x, cell_size_y, stream_); - cudaMemset( - device_costmap_.get(), cost_value::NO_INFORMATION, size_x_ * size_y_ * sizeof(std::uint8_t)); + cudaMemsetAsync( + device_costmap_.get(), cost_value::NO_INFORMATION, size_x_ * size_y_ * sizeof(std::uint8_t), + stream_); } else { local_map = new unsigned char[cell_size_x * cell_size_y]; @@ -207,6 +207,13 @@ bool OccupancyGridMapInterface::isCudaEnabled() const return use_cuda_; } +void OccupancyGridMapInterface::setCudaStream(const cudaStream_t & stream) +{ + if (isCudaEnabled()) { + stream_ = stream; + } +} + const autoware::cuda_utils::CudaUniquePtr & OccupancyGridMapInterface::getDeviceCostmap() const { @@ -215,9 +222,10 @@ OccupancyGridMapInterface::getDeviceCostmap() const void OccupancyGridMapInterface::copyDeviceCostmapToHost() const { - cudaMemcpy( + cudaMemcpyAsync( costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t), - cudaMemcpyDeviceToHost); + cudaMemcpyDeviceToHost, stream_); + cudaStreamSynchronize(stream_); } } // namespace costmap_2d diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp index 97f36f08c48bd..b74d9e0c25d52 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp @@ -125,14 +125,12 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud( const std::size_t num_raw_points = raw_pointcloud.width * raw_pointcloud.height; float range_resolution_inv = 1.0 / map_res; - cudaStreamSynchronize(stream_); - map_fixed::prepareTensorLaunch( reinterpret_cast(raw_pointcloud.data.get()), num_raw_points, raw_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_, max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(), device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(), - raw_points_tensor_.get(), raw_pointcloud.stream); + raw_points_tensor_.get(), stream_); const std::size_t num_obstacle_points = obstacle_pointcloud.width * obstacle_pointcloud.height; @@ -141,27 +139,23 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud( obstacle_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_, max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(), device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(), - obstacle_points_tensor_.get(), obstacle_pointcloud.stream); + obstacle_points_tensor_.get(), stream_); map_fixed::fillEmptySpaceLaunch( raw_points_tensor_.get(), angle_bin_size, range_bin_size, range_resolution_inv, scan_origin.position.x, scan_origin.position.y, origin_x_, origin_y_, num_cells_x, num_cells_y, - cost_value::FREE_SPACE, device_costmap_.get(), raw_pointcloud.stream); - - cudaStreamSynchronize(obstacle_pointcloud.stream); + cost_value::FREE_SPACE, device_costmap_.get(), stream_); map_fixed::fillUnknownSpaceLaunch( raw_points_tensor_.get(), obstacle_points_tensor_.get(), distance_margin_, angle_bin_size, range_bin_size, range_resolution_inv, scan_origin.position.x, scan_origin.position.y, origin_x_, origin_y_, num_cells_x, num_cells_y, cost_value::FREE_SPACE, cost_value::NO_INFORMATION, - device_costmap_.get(), raw_pointcloud.stream); + device_costmap_.get(), stream_); map_fixed::fillObstaclesLaunch( obstacle_points_tensor_.get(), distance_margin_, angle_bin_size, range_bin_size, range_resolution_inv, origin_x_, origin_y_, num_cells_x, num_cells_y, - cost_value::LETHAL_OBSTACLE, device_costmap_.get(), raw_pointcloud.stream); - - cudaStreamSynchronize(raw_pointcloud.stream); + cost_value::LETHAL_OBSTACLE, device_costmap_.get(), stream_); } void OccupancyGridMapFixedBlindSpot::initRosParam(rclcpp::Node & node) diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater.cpp index cdcb0677b7243..ab252a987e64e 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater.cpp @@ -56,9 +56,9 @@ void OccupancyGridMapBBFUpdater::initRosParam(rclcpp::Node & node) } } - cudaMemcpy( + cudaMemcpyAsync( device_probability_matrix_.get(), probability_matrix_vector.data(), - sizeof(float) * Index::NUM_STATES * Index::NUM_STATES, cudaMemcpyHostToDevice); + sizeof(float) * Index::NUM_STATES * Index::NUM_STATES, cudaMemcpyHostToDevice, stream_); } inline unsigned char OccupancyGridMapBBFUpdater::applyBBF( @@ -103,12 +103,6 @@ bool OccupancyGridMapBBFUpdater::update( Index::NUM_STATES, Index::FREE, Index::OCCUPIED, cost_value::FREE_SPACE, cost_value::LETHAL_OBSTACLE, cost_value::NO_INFORMATION, v_ratio_, getSizeInCellsX() * getSizeInCellsY(), device_costmap_.get(), stream_); - - cudaMemcpy( - costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t), - cudaMemcpyDeviceToHost); - - cudaStreamSynchronize(stream_); } else { for (unsigned int x = 0; x < getSizeInCellsX(); x++) { for (unsigned int y = 0; y < getSizeInCellsY(); y++) { diff --git a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater.cpp b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater.cpp index b1a8ff937c495..4cfa78e5afeb4 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater.cpp @@ -73,12 +73,6 @@ bool OccupancyGridMapLOBFUpdater::update( applyLOBFLaunch( single_frame_occupancy_grid_map.getDeviceCostmap().get(), cost_value::NO_INFORMATION, getSizeInCellsX() * getSizeInCellsY(), device_costmap_.get(), stream_); - - cudaMemcpy( - costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t), - cudaMemcpyDeviceToHost); - - cudaStreamSynchronize(stream_); } else { for (unsigned int x = 0; x < getSizeInCellsX(); x++) { for (unsigned int y = 0; y < getSizeInCellsY(); y++) { diff --git a/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp b/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp index ba24e29284825..1d45bfb642b37 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp @@ -71,15 +71,13 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode( const double map_resolution = this->declare_parameter("map_resolution"); /* Subscriber and publisher */ - obstacle_pointcloud_sub_.subscribe( - this, "~/input/obstacle_pointcloud", - rclcpp::SensorDataQoS{}.keep_last(1).get_rmw_qos_profile()); - raw_pointcloud_sub_.subscribe( - this, "~/input/raw_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1).get_rmw_qos_profile()); - sync_ptr_ = std::make_shared(SyncPolicy(5), obstacle_pointcloud_sub_, raw_pointcloud_sub_); - - sync_ptr_->registerCallback( - std::bind(&PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw, this, _1, _2)); + obstacle_pointcloud_sub_ptr_ = this->create_subscription( + "~/input/obstacle_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1), + std::bind(&PointcloudBasedOccupancyGridMapNode::obstaclePointcloudCallback, this, _1)); + raw_pointcloud_sub_ptr_ = this->create_subscription( + "~/input/raw_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1), + std::bind(&PointcloudBasedOccupancyGridMapNode::rawPointcloudCallback, this, _1)); + occupancy_grid_map_pub_ = create_publisher("~/output/occupancy_grid_map", 1); const std::string updater_type = this->declare_parameter("updater_type"); @@ -94,7 +92,6 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode( occupancy_grid_map_updater_ptr_ = std::make_unique( true, map_length / map_resolution, map_length / map_resolution, map_resolution); } - occupancy_grid_map_updater_ptr_->initRosParam(*this); const std::string grid_map_type = this->declare_parameter("grid_map_type"); @@ -118,7 +115,15 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode( occupancy_grid_map_updater_ptr_->getSizeInCellsY(), occupancy_grid_map_updater_ptr_->getResolution()); } + + cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking); + raw_pointcloud_.stream = stream_; + obstacle_pointcloud_.stream = stream_; + occupancy_grid_map_ptr_->setCudaStream(stream_); + occupancy_grid_map_updater_ptr_->setCudaStream(stream_); + occupancy_grid_map_ptr_->initRosParam(*this); + occupancy_grid_map_updater_ptr_->initRosParam(*this); // initialize debug tool { @@ -140,14 +145,29 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode( time_keeper_ = std::make_shared(time_keeper); } } +} + +void PointcloudBasedOccupancyGridMapNode::obstaclePointcloudCallback( + const PointCloud2::ConstSharedPtr & input_obstacle_msg) +{ + obstacle_pointcloud_.fromROSMsgAsync(input_obstacle_msg); - cudaStreamCreate(&raw_pointcloud_.stream); - cudaStreamCreate(&obstacle_pointcloud_.stream); + if (obstacle_pointcloud_.header.stamp == raw_pointcloud_.header.stamp) { + onPointcloudWithObstacleAndRaw(); + } } -void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( - const PointCloud2::ConstSharedPtr & input_obstacle_msg, +void PointcloudBasedOccupancyGridMapNode::rawPointcloudCallback( const PointCloud2::ConstSharedPtr & input_raw_msg) +{ + raw_pointcloud_.fromROSMsgAsync(input_raw_msg); + + if (obstacle_pointcloud_.header.stamp == raw_pointcloud_.header.stamp) { + onPointcloudWithObstacleAndRaw(); + } +} + +void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw() { std::unique_ptr st_ptr; if (time_keeper_) st_ptr = std::make_unique(__func__, *time_keeper_); @@ -156,9 +176,6 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( stop_watch_ptr_->toc("processing_time", true); } - raw_pointcloud_.fromROSMsgAsync(*input_raw_msg); - obstacle_pointcloud_.fromROSMsgAsync(*input_obstacle_msg); - // if scan_origin_frame_ is "", replace it with raw_pointcloud_.header.frame_id if (scan_origin_frame_.empty()) { scan_origin_frame_ = raw_pointcloud_.header.frame_id; @@ -172,7 +189,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( return; } } - if (input_obstacle_msg->header.frame_id != base_link_frame_) { + if (obstacle_pointcloud_.header.frame_id != base_link_frame_) { if (!utils::transformPointcloudAsync(obstacle_pointcloud_, *tf2_, base_link_frame_)) { return; } @@ -221,7 +238,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( // publish occupancy_grid_map_pub_->publish(OccupancyGridMapToMsgPtr( - map_frame_, input_raw_msg->header.stamp, robot_pose.position.z, + map_frame_, raw_pointcloud_.header.stamp, robot_pose.position.z, *occupancy_grid_map_ptr_)); // (todo) robot_pose may be altered with gridmap_origin } else { std::unique_ptr inner_st_ptr; @@ -231,10 +248,11 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( // Update with bayes filter occupancy_grid_map_updater_ptr_->update(*occupancy_grid_map_ptr_); + occupancy_grid_map_updater_ptr_->copyDeviceCostmapToHost(); // publish occupancy_grid_map_pub_->publish(OccupancyGridMapToMsgPtr( - map_frame_, input_raw_msg->header.stamp, robot_pose.position.z, + map_frame_, raw_pointcloud_.header.stamp, robot_pose.position.z, *occupancy_grid_map_updater_ptr_)); } @@ -244,7 +262,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw( const double pipeline_latency_ms = std::chrono::duration( std::chrono::nanoseconds( - (this->get_clock()->now() - input_raw_msg->header.stamp).nanoseconds())) + (this->get_clock()->now() - raw_pointcloud_.header.stamp).nanoseconds())) .count(); debug_publisher_ptr_->publish( "debug/cyclic_time_ms", cyclic_time_ms); diff --git a/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.hpp b/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.hpp index c985414ffdf6b..eb85e8a468ee3 100644 --- a/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.hpp +++ b/perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.hpp @@ -30,10 +30,7 @@ #include #include -#include -#include -#include -#include +#include #include #include @@ -59,30 +56,28 @@ class PointcloudBasedOccupancyGridMapNode : public rclcpp::Node explicit PointcloudBasedOccupancyGridMapNode(const rclcpp::NodeOptions & node_options); private: - void onPointcloudWithObstacleAndRaw( - const PointCloud2::ConstSharedPtr & input_obstacle_msg, - const PointCloud2::ConstSharedPtr & input_raw_msg); + void obstaclePointcloudCallback(const PointCloud2::ConstSharedPtr & input_obstacle_msg); + void rawPointcloudCallback(const PointCloud2::ConstSharedPtr & input_raw_msg); + void onPointcloudWithObstacleAndRaw(); + OccupancyGrid::UniquePtr OccupancyGridMapToMsgPtr( const std::string & frame_id, const Time & stamp, const float & robot_pose_z, const Costmap2D & occupancy_grid_map); private: rclcpp::Publisher::SharedPtr occupancy_grid_map_pub_; - message_filters::Subscriber obstacle_pointcloud_sub_; - message_filters::Subscriber raw_pointcloud_sub_; + rclcpp::Subscription::SharedPtr obstacle_pointcloud_sub_ptr_; + rclcpp::Subscription::SharedPtr raw_pointcloud_sub_ptr_; std::unique_ptr> stop_watch_ptr_{}; std::unique_ptr debug_publisher_ptr_{}; std::shared_ptr tf2_{std::make_shared(get_clock())}; std::shared_ptr tf2_listener_{std::make_shared(*tf2_)}; - using SyncPolicy = message_filters::sync_policies::ExactTime; - using Sync = message_filters::Synchronizer; - std::shared_ptr sync_ptr_; - std::unique_ptr occupancy_grid_map_ptr_; std::unique_ptr occupancy_grid_map_updater_ptr_; + cudaStream_t stream_; CudaPointCloud2 raw_pointcloud_; CudaPointCloud2 obstacle_pointcloud_;