From b4f3901988a37f1a70d912382b1625985c3b0f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Fatih=20C=C4=B1r=C4=B1t?= Date: Fri, 18 Nov 2022 20:42:08 +0300 Subject: [PATCH] fix(tensorrt): update tensorrt code of traffic_light_ssd_fine_detector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: M. Fatih Cırıt --- .../lib/include/trt_ssd.hpp | 4 +++- .../lib/src/trt_ssd.cpp | 24 ++++++++++++------- .../src/nodelet.cpp | 3 +-- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/perception/traffic_light_ssd_fine_detector/lib/include/trt_ssd.hpp b/perception/traffic_light_ssd_fine_detector/lib/include/trt_ssd.hpp index cfb6c8a6d5754..a478b14180125 100644 --- a/perception/traffic_light_ssd_fine_detector/lib/include/trt_ssd.hpp +++ b/perception/traffic_light_ssd_fine_detector/lib/include/trt_ssd.hpp @@ -72,7 +72,7 @@ class Net void save(const std::string & path); // Infer using pre-allocated GPU buffers {data, scores, boxes} - void infer(std::vector & buffers, const int batch_size); + void infer(const int batch_size); // Get (c, h, w) size of the fixed input std::vector getInputSize(); @@ -90,6 +90,8 @@ class Net unique_ptr plan_ = nullptr; unique_ptr engine_ = nullptr; unique_ptr context_ = nullptr; + std::string name_tensor_in_; + std::string name_tensor_out_; cudaStream_t stream_ = nullptr; void load(const std::string & path); diff --git a/perception/traffic_light_ssd_fine_detector/lib/src/trt_ssd.cpp b/perception/traffic_light_ssd_fine_detector/lib/src/trt_ssd.cpp index d810eb5275058..2ec820db02872 100644 --- a/perception/traffic_light_ssd_fine_detector/lib/src/trt_ssd.cpp +++ b/perception/traffic_light_ssd_fine_detector/lib/src/trt_ssd.cpp @@ -55,6 +55,8 @@ Net::Net(const std::string & path, bool verbose) runtime_ = unique_ptr(nvinfer1::createInferRuntime(logger)); load(path); prepare(); + name_tensor_in_ = engine_->getIOTensorName(0); + name_tensor_out_ = engine_->getIOTensorName(engine_->getNbIOTensors() - 1); } Net::~Net() @@ -155,6 +157,8 @@ Net::Net( std::cout << "Fail to create context" << std::endl; return; } + name_tensor_in_ = engine_->getIOTensorName(0); + name_tensor_out_ = engine_->getIOTensorName(engine_->getNbIOTensors() - 1); } void Net::save(const std::string & path) @@ -164,35 +168,37 @@ void Net::save(const std::string & path) file.write(reinterpret_cast(plan_->data()), plan_->size()); } -void Net::infer(std::vector & buffers, const int batch_size) +void Net::infer(const int batch_size) { if (!context_) { throw std::runtime_error("Fail to create context"); } - auto input_dims = engine_->getBindingDimensions(0); - context_->setBindingDimensions( - 0, nvinfer1::Dims4(batch_size, input_dims.d[1], input_dims.d[2], input_dims.d[3])); - context_->enqueueV2(buffers.data(), stream_, nullptr); + const auto input_dims = engine_->getTensorShape(name_tensor_in_.c_str()); + context_->setInputShape( + name_tensor_in_.c_str(), + nvinfer1::Dims4(batch_size, input_dims.d[1], input_dims.d[2], input_dims.d[3])); + context_->enqueueV3(stream_); cudaStreamSynchronize(stream_); } std::vector Net::getInputSize() { - auto dims = engine_->getBindingDimensions(0); + const auto dims = engine_->getTensorShape(name_tensor_in_.c_str()); return {dims.d[1], dims.d[2], dims.d[3]}; } std::vector Net::getOutputScoreSize() { - auto dims = engine_->getBindingDimensions(1); + const auto dims = engine_->getTensorShape(name_tensor_out_.c_str()); return {dims.d[1], dims.d[2]}; } int Net::getMaxBatchSize() { - return engine_->getProfileDimensions(0, 0, nvinfer1::OptProfileSelector::kMAX).d[0]; + return engine_->getProfileShape(name_tensor_in_.c_str(), 0, nvinfer1::OptProfileSelector::kMAX) + .d[0]; } -int Net::getMaxDetections() { return engine_->getBindingDimensions(1).d[1]; } +int Net::getMaxDetections() { return engine_->getTensorShape(name_tensor_in_.c_str()).d[1]; } } // namespace ssd diff --git a/perception/traffic_light_ssd_fine_detector/src/nodelet.cpp b/perception/traffic_light_ssd_fine_detector/src/nodelet.cpp index 5e5c2126fb5cd..554147a4d4a1a 100644 --- a/perception/traffic_light_ssd_fine_detector/src/nodelet.cpp +++ b/perception/traffic_light_ssd_fine_detector/src/nodelet.cpp @@ -144,7 +144,6 @@ void TrafficLightSSDFineDetectorNodelet::callback( auto data_d = cuda::make_unique(num_infer * channel_ * width_ * height_); auto scores_d = cuda::make_unique(num_infer * detection_per_class_ * class_num_); auto boxes_d = cuda::make_unique(num_infer * detection_per_class_ * 4); - std::vector buffers = {data_d.get(), scores_d.get(), boxes_d.get()}; std::vector lts, rbs; std::vector cropped_imgs; @@ -168,7 +167,7 @@ void TrafficLightSSDFineDetectorNodelet::callback( cudaMemcpy(data_d.get(), data.data(), data.size() * sizeof(float), cudaMemcpyHostToDevice); try { - net_ptr_->infer(buffers, num_infer); + net_ptr_->infer(num_infer); } catch (std::exception & e) { RCLCPP_ERROR(this->get_logger(), "%s", e.what()); return;