Skip to content

Commit

Permalink
dav2: Get rid of vis topic
Browse files Browse the repository at this point in the history
  • Loading branch information
marcojob committed Nov 12, 2024
1 parent 2d5a149 commit 063990b
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 79 deletions.
103 changes: 35 additions & 68 deletions include/usb_cam/learning/depth_anything_v2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,110 +9,77 @@

class DepthAnythingV2 : public LearningInterface {
public:
DepthAnythingV2(ros::NodeHandle* nh, std::string& model_path, std::string& metric_topic, std::string& vis_topic) {
_INPUT_SIZE = cv::Size(_HEIGHT, _WIDTH);
DepthAnythingV2(ros::NodeHandle* nh, std::string& model_path, std::string& metric_topic) {
_model_path = model_path;
_load_model();

if (nh != nullptr) {
if (!metric_topic.empty()) {
_depth_metric_pub = nh->advertise<sensor_msgs::Image>(metric_topic, 1);
}
if (!vis_topic.empty()) {
_depth_vis_pub = nh->advertise<sensor_msgs::Image>(vis_topic, 1);
}

if (nh != nullptr && !metric_topic.empty()) {
_depth_pub = nh->advertise<sensor_msgs::Image>(metric_topic, 1);
}
}

void set_input(sensor_msgs::Image& msg) override {
// Keep track of input image size, we want to get the same output image dimensions
_output_image_w = msg.width;
_output_image_h = msg.height;

cv_bridge::CvImagePtr cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::RGB8);
cv::Mat image = cv_ptr->image;

// Change size to 518x518 (still uint8)
cv::Mat resized_image;
cv::resize(image, resized_image, _INPUT_SIZE);
cv::resize(image, resized_image, cv::Size(_input_w, _input_h));

std::vector<float> input = _preprocess(resized_image);
memcpy(_input_data, input.data(), _input_size_float);
}

void publish() override {
cv::Mat depth_prediction(_HEIGHT, _WIDTH, CV_32FC1, _output_data);
if (_depth_metric_pub.getTopic() != "") {
// Raw depth prediction (in meters, CV_32FC1)
if (_depth_pub.getTopic() != "") {
cv::Mat depth_prediction(_input_w, _input_h, CV_32FC1, _output_data);
cv::Mat depth_resized;
cv::resize(depth_prediction, depth_resized, cv::Size(_output_image_w, _output_image_h));
cv_bridge::CvImage depth_image;
depth_image.header.stamp = ros::Time::now();
depth_image.header.frame_id = "depth_frame";
depth_image.encoding = sensor_msgs::image_encodings::TYPE_32FC1;
depth_image.image = depth_prediction;
_depth_metric_pub.publish(depth_image.toImageMsg());
}

if (_depth_vis_pub.getTopic() != "") {
cv::normalize(depth_prediction, depth_prediction, 0, 255, cv::NORM_MINMAX, CV_8UC1);
cv::Mat colormap;
cv::applyColorMap(depth_prediction, colormap, cv::COLORMAP_INFERNO);
cv::resize(colormap, colormap, cv::Size(_WIDTH, _HEIGHT));

cv_bridge::CvImage visualized_image;
visualized_image.header.stamp = ros::Time::now();
visualized_image.header.frame_id = "depth_frame";
visualized_image.encoding = sensor_msgs::image_encodings::MONO8;
visualized_image.image = colormap;
_depth_vis_pub.publish(visualized_image.toImageMsg());
depth_image.image = depth_resized;
_depth_pub.publish(depth_image.toImageMsg());
}
}

private:
const size_t _HEIGHT = 518;
const size_t _WIDTH = 518;
// TODO: Not so nice magic numbers from the paper implementation
const float mean[3] = { 123.675f, 116.28f, 103.53f };
const float std[3] = { 58.395f, 57.12f, 57.375f };
cv::Size _INPUT_SIZE;
ros::Publisher _depth_metric_pub;
ros::Publisher _depth_vis_pub;
ros::Publisher _depth_pub;
uint32_t _output_image_w, _output_image_h;

std::vector<float> _preprocess(cv::Mat& image) {
std::tuple<cv::Mat, int, int> resized = _resize_depth(image, _input_w, _input_h);
cv::Mat resized_image = std::get<0>(resized);
// Determine the resized dimensions
const int iw = image.cols;
const int ih = image.rows;
const float aspect_ratio = static_cast<float>(iw) / ih;
const int nw = (aspect_ratio >= 1) ? _input_w : static_cast<int>(_input_w * aspect_ratio);
const int nh = (aspect_ratio >= 1) ? static_cast<int>(_input_h / aspect_ratio) : _input_h;

// Resize image and pad if necessary
cv::Mat resized_image;
cv::resize(image, resized_image, cv::Size(nw, nh), 0, 0, cv::INTER_LINEAR);
cv::Mat padded_image = cv::Mat::ones(cv::Size(_input_w, _input_h), CV_8UC3) * 128;
resized_image.copyTo(padded_image(cv::Rect((_input_w - nw) / 2, (_input_h - nh) / 2, nw, nh)));

// Normalize and flatten the image to a 1D tensor
std::vector<float> input_tensor;
for (int k = 0; k < 3; k++) {
for (int i = 0; i < resized_image.rows; i++) {
for (int j = 0; j < resized_image.cols; j++) {
input_tensor.emplace_back(((float)resized_image.at<cv::Vec3b>(i, j)[k] - mean[k]) / std[k]);
for (int i = 0; i < padded_image.rows; i++) {
for (int j = 0; j < padded_image.cols; j++) {
input_tensor.emplace_back((padded_image.at<cv::Vec3b>(i, j)[k] - mean[k]) / std[k]);
}
}
}
return input_tensor;
}

std::tuple<cv::Mat, int, int> _resize_depth(cv::Mat& img, int w, int h) {
cv::Mat result;
int nw, nh;
int ih = img.rows;
int iw = img.cols;
float aspectRatio = (float)img.cols / (float)img.rows;

if (aspectRatio >= 1) {
nw = w;
nh = int(h / aspectRatio);
} else {
nw = int(w * aspectRatio);
nh = h;
}
cv::resize(img, img, cv::Size(nw, nh));
result = cv::Mat::ones(cv::Size(w, h), CV_8UC1) * 128;
cv::cvtColor(result, result, cv::COLOR_GRAY2RGB);
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);

cv::Mat re(h, w, CV_8UC3);
cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
cv::Mat out(h, w, CV_8UC3, 0.0);
re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));

std::tuple<cv::Mat, int, int> res_tuple = std::make_tuple(out, (w - nw) / 2, (h - nh) / 2);
return res_tuple;
return input_tensor;
}
};

Expand Down
3 changes: 1 addition & 2 deletions include/usb_cam/usb_cam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,7 @@ typedef struct
std::string pixel_format_name;
std::string av_device_format;
std::string dav2_file;
std::string dav2_metric_topic;
std::string dav2_vis_topic;
std::string dav2_topic;
int image_width;
int image_height;
int framerate;
Expand Down
9 changes: 4 additions & 5 deletions launch/two_mipi_rgb.launch
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
<param name="wb_red_gain" value="1.18"/>

<param name="dav2_file" value="/home/asl/Workspace/src/v4l2_camera/test/resources/depth_anything_v2_vits.onnx" />
<param name="dav2_metric_topic" value="/depth0/relative" />
<param name="dav2_vis_topic" value="/depth0/vis" />
<param name="dav2_topic" value="/cam0/depth/relative" />
</node>

<node name="cam1" pkg="usb_cam" type="usb_cam_node" output="screen" >
Expand All @@ -40,8 +39,8 @@
<param name="wb_green_gain" value="0.88"/>
<param name="wb_red_gain" value="1.18"/>

<param name="dav2_file" value="" />
<param name="dav2_metric_topic" value="/depth1/relative" />
<param name="dav2_vis_topic" value="/depth1/vis" />
<param name="dav2_file" value="/home/asl/Workspace/src/v4l2_camera/test/resources/depth_anything_v2_vits.onnx" />
<!-- Downfacing cam needs no depth prediction (yet) -->
<param name="dav2_topic" value="" />
</node>
</launch>
7 changes: 3 additions & 4 deletions src/usb_cam_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,10 @@ class UsbCamNode {

// Setup the network that outputs derivates of the image captured
m_node.param("dav2_file", m_parameters.dav2_file, std::string(""));
m_node.param("dav2_metric_topic", m_parameters.dav2_metric_topic, std::string(""));
m_node.param("dav2_vis_topic", m_parameters.dav2_vis_topic, std::string(""));
m_node.param("dav2_topic", m_parameters.dav2_topic, std::string(""));

if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_metric_topic.empty()) {
networks.push_back(std::make_unique<DepthAnythingV2>(&m_node, m_parameters.dav2_file, m_parameters.dav2_metric_topic, m_parameters.dav2_vis_topic));
if (!m_parameters.dav2_file.empty() && !m_parameters.dav2_topic.empty()) {
networks.push_back(std::make_unique<DepthAnythingV2>(&m_node, m_parameters.dav2_file, m_parameters.dav2_topic));
}

ROS_INFO("Starting '%s' (%s) at %dx%d via %s (%s) at %i FPS",
Expand Down

0 comments on commit 063990b

Please sign in to comment.