-
Notifications
You must be signed in to change notification settings - Fork 63
/
Copy pathFinalProject_Camera.cpp
341 lines (265 loc) · 15.4 KB
/
FinalProject_Camera.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
/* INCLUDES FOR THIS PROJECT */
#include <iostream>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <vector>
#include <cmath>
#include <limits>
#include <opencv2/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/features2d.hpp>
#include <opencv2/xfeatures2d.hpp>
#include <opencv2/xfeatures2d/nonfree.hpp>
#include "dataStructures.h"
#include "matching2D.hpp"
#include "objectDetection2D.hpp"
#include "lidarData.hpp"
#include "camFusion.hpp"
using namespace std;
/* MAIN PROGRAM */
int main(int argc, const char *argv[])
{
/* INIT VARIABLES AND DATA STRUCTURES */
// data location
string dataPath = "../";
// camera
string imgBasePath = dataPath + "images/";
string imgPrefix = "KITTI/2011_09_26/image_02/data/000000"; // left camera, color
string imgFileType = ".png";
int imgStartIndex = 0; // first file index to load (assumes Lidar and camera names have identical naming convention)
int imgEndIndex = 18; // last file index to load
int imgStepWidth = 1;
int imgFillWidth = 4; // no. of digits which make up the file index (e.g. img-0001.png)
// object detection
string yoloBasePath = dataPath + "dat/yolo/";
string yoloClassesFile = yoloBasePath + "coco.names";
string yoloModelConfiguration = yoloBasePath + "yolov3.cfg";
string yoloModelWeights = yoloBasePath + "yolov3.weights";
// Lidar
string lidarPrefix = "KITTI/2011_09_26/velodyne_points/data/000000";
string lidarFileType = ".bin";
// calibration data for camera and lidar
cv::Mat P_rect_00(3,4,cv::DataType<double>::type); // 3x4 projection matrix after rectification
cv::Mat R_rect_00(4,4,cv::DataType<double>::type); // 3x3 rectifying rotation to make image planes co-planar
cv::Mat RT(4,4,cv::DataType<double>::type); // rotation matrix and translation vector
RT.at<double>(0,0) = 7.533745e-03; RT.at<double>(0,1) = -9.999714e-01; RT.at<double>(0,2) = -6.166020e-04; RT.at<double>(0,3) = -4.069766e-03;
RT.at<double>(1,0) = 1.480249e-02; RT.at<double>(1,1) = 7.280733e-04; RT.at<double>(1,2) = -9.998902e-01; RT.at<double>(1,3) = -7.631618e-02;
RT.at<double>(2,0) = 9.998621e-01; RT.at<double>(2,1) = 7.523790e-03; RT.at<double>(2,2) = 1.480755e-02; RT.at<double>(2,3) = -2.717806e-01;
RT.at<double>(3,0) = 0.0; RT.at<double>(3,1) = 0.0; RT.at<double>(3,2) = 0.0; RT.at<double>(3,3) = 1.0;
R_rect_00.at<double>(0,0) = 9.999239e-01; R_rect_00.at<double>(0,1) = 9.837760e-03; R_rect_00.at<double>(0,2) = -7.445048e-03; R_rect_00.at<double>(0,3) = 0.0;
R_rect_00.at<double>(1,0) = -9.869795e-03; R_rect_00.at<double>(1,1) = 9.999421e-01; R_rect_00.at<double>(1,2) = -4.278459e-03; R_rect_00.at<double>(1,3) = 0.0;
R_rect_00.at<double>(2,0) = 7.402527e-03; R_rect_00.at<double>(2,1) = 4.351614e-03; R_rect_00.at<double>(2,2) = 9.999631e-01; R_rect_00.at<double>(2,3) = 0.0;
R_rect_00.at<double>(3,0) = 0; R_rect_00.at<double>(3,1) = 0; R_rect_00.at<double>(3,2) = 0; R_rect_00.at<double>(3,3) = 1;
P_rect_00.at<double>(0,0) = 7.215377e+02; P_rect_00.at<double>(0,1) = 0.000000e+00; P_rect_00.at<double>(0,2) = 6.095593e+02; P_rect_00.at<double>(0,3) = 0.000000e+00;
P_rect_00.at<double>(1,0) = 0.000000e+00; P_rect_00.at<double>(1,1) = 7.215377e+02; P_rect_00.at<double>(1,2) = 1.728540e+02; P_rect_00.at<double>(1,3) = 0.000000e+00;
P_rect_00.at<double>(2,0) = 0.000000e+00; P_rect_00.at<double>(2,1) = 0.000000e+00; P_rect_00.at<double>(2,2) = 1.000000e+00; P_rect_00.at<double>(2,3) = 0.000000e+00;
// misc
double sensorFrameRate = 10.0 / imgStepWidth; // frames per second for Lidar and camera
int dataBufferSize = 2; // no. of images which are held in memory (ring buffer) at the same time
vector<DataFrame> dataBuffer; // list of data frames which are held in memory at the same time
bool bVis = false; // visualize results
/* MAIN LOOP OVER ALL IMAGES */
for (size_t imgIndex = 0; imgIndex <= imgEndIndex - imgStartIndex; imgIndex+=imgStepWidth)
{
/* LOAD IMAGE INTO BUFFER */
// assemble filenames for current index
ostringstream imgNumber;
imgNumber << setfill('0') << setw(imgFillWidth) << imgStartIndex + imgIndex;
string imgFullFilename = imgBasePath + imgPrefix + imgNumber.str() + imgFileType;
// load image from file
cv::Mat img = cv::imread(imgFullFilename);
// push image into data frame buffer
DataFrame frame;
frame.cameraImg = img;
dataBuffer.push_back(frame);
cout << "#1 : LOAD IMAGE INTO BUFFER done" << endl;
/* DETECT & CLASSIFY OBJECTS */
float confThreshold = 0.2;
float nmsThreshold = 0.4;
detectObjects((dataBuffer.end() - 1)->cameraImg, (dataBuffer.end() - 1)->boundingBoxes, confThreshold, nmsThreshold,
yoloBasePath, yoloClassesFile, yoloModelConfiguration, yoloModelWeights, bVis);
cout << "#2 : DETECT & CLASSIFY OBJECTS done" << endl;
/* CROP LIDAR POINTS */
// load 3D Lidar points from file
string lidarFullFilename = imgBasePath + lidarPrefix + imgNumber.str() + lidarFileType;
std::vector<LidarPoint> lidarPoints;
loadLidarFromFile(lidarPoints, lidarFullFilename);
// remove Lidar points based on distance properties
float minZ = -1.5, maxZ = -0.9, minX = 2.0, maxX = 20.0, maxY = 2.0, minR = 0.1; // focus on ego lane
cropLidarPoints(lidarPoints, minX, maxX, maxY, minZ, maxZ, minR);
(dataBuffer.end() - 1)->lidarPoints = lidarPoints;
cout << "#3 : CROP LIDAR POINTS done" << endl;
/* CLUSTER LIDAR POINT CLOUD */
// associate Lidar points with camera-based ROI
float shrinkFactor = 0.10; // shrinks each bounding box by the given percentage to avoid 3D object merging at the edges of an ROI
clusterLidarWithROI((dataBuffer.end()-1)->boundingBoxes, (dataBuffer.end() - 1)->lidarPoints, shrinkFactor, P_rect_00, R_rect_00, RT);
// Visualize 3D objects
bVis = false;
if(bVis)
{
show3DObjects((dataBuffer.end()-1)->boundingBoxes, cv::Size(4.0, 20.0), cv::Size(2000, 2000), true);
}
bVis = false;
cout << "#4 : CLUSTER LIDAR POINT CLOUD done" << endl;
/* DETECT IMAGE KEYPOINTS */
// convert current image to grayscale
cv::Mat imgGray;
cv::cvtColor((dataBuffer.end()-1)->cameraImg, imgGray, cv::COLOR_BGR2GRAY);
// extract 2D keypoints from current image
vector<cv::KeyPoint> keypoints; // create empty feature list for current image
string detectorType = "SHITOMASI"; // SHITOMASI, HARRIS, FAST, BRISK, ORB, AKAZE, SIFT
if (detectorType.compare("SHITOMASI") == 0)
{
detKeypointsShiTomasi(keypoints, imgGray, false);
}
else if (detectorType.compare("HARRIS") == 0)
{
detKeypointsHarris(keypoints, imgGray, false, false);
}
else if ((detectorType.compare("FAST") == 0) || (detectorType.compare("BRISK") == 0) || (detectorType.compare("ORB") == 0) || (detectorType.compare("AKAZE") == 0) || (detectorType.compare("SIFT") == 0))
{
detKeypointsModern(keypoints, imgGray, detectorType, false);
}
else
{
cerr << "#5 : DETECT KEYPOINTS failed. Wrong detectorType - " << detectorType << ". Use one of the following detectors: SHITOMASI, HARRIS, FAST, BRISK, ORB, AKAZE, SIFT" << endl;
exit(-1);
}
// only keep keypoints on the preceding vehicle
bool bFocusOnVehicle = true;
cv::Rect vehicleRect(535, 180, 180, 150);
if (bFocusOnVehicle)
{
vector<cv::KeyPoint> keypointsROI;
for (auto it = keypoints.begin(); it != keypoints.end(); ++it)
{
if (vehicleRect.contains(it->pt))
{
keypointsROI.push_back(*it);
}
}
keypoints = keypointsROI;
cout << detectorType << " detector with n=" << keypoints.size() << " keypoints in the rectangle ROI" << endl;
}
// optional : limit number of keypoints (helpful for debugging and learning)
bool bLimitKpts = false;
if (bLimitKpts)
{
int maxKeypoints = 50;
if (detectorType.compare("SHITOMASI") == 0)
{ // there is no response info, so keep the first 50 as they are sorted in descending quality order
keypoints.erase(keypoints.begin() + maxKeypoints, keypoints.end());
}
cv::KeyPointsFilter::retainBest(keypoints, maxKeypoints);
cout << " NOTE: Keypoints have been limited!" << endl;
}
// push keypoints and descriptor for current frame to end of data buffer
(dataBuffer.end() - 1)->keypoints = keypoints;
cout << "#5 : DETECT KEYPOINTS done" << endl;
/* EXTRACT KEYPOINT DESCRIPTORS */
cv::Mat descriptors;
string descriptorType = "BRISK"; // BRISK, BRIEF, ORB, FREAK, AKAZE, SIFT
descKeypoints((dataBuffer.end() - 1)->keypoints, (dataBuffer.end() - 1)->cameraImg, descriptors, descriptorType);
// push descriptors for current frame to end of data buffer
(dataBuffer.end() - 1)->descriptors = descriptors;
cout << "#6 : EXTRACT DESCRIPTORS done" << endl;
if (dataBuffer.size() > 1) // wait until at least two images have been processed
{
/* MATCH KEYPOINT DESCRIPTORS */
vector<cv::DMatch> matches;
string matcherType = "MAT_BF"; // MAT_BF, MAT_FLANN
string descriptorTypeClass = "DES_BINARY"; // DES_BINARY, DES_HOG
string selectorType = "SEL_NN"; // SEL_NN, SEL_KNN
matchDescriptors((dataBuffer.end() - 2)->keypoints, (dataBuffer.end() - 1)->keypoints,
(dataBuffer.end() - 2)->descriptors, (dataBuffer.end() - 1)->descriptors,
matches, descriptorTypeClass, matcherType, selectorType);
// store matches in current data frame
(dataBuffer.end() - 1)->kptMatches = matches;
cout << "#7 : MATCH KEYPOINT DESCRIPTORS done" << endl;
// visualize matches between current and previous image
bVis = false;
if (bVis)
{
cv::Mat matchImg = ((dataBuffer.end() - 1)->cameraImg).clone();
cv::drawMatches((dataBuffer.end() - 2)->cameraImg, (dataBuffer.end() - 2)->keypoints,
(dataBuffer.end() - 1)->cameraImg, (dataBuffer.end() - 1)->keypoints,
matches, matchImg,
cv::Scalar::all(-1), cv::Scalar::all(-1),
vector<char>(), cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
string windowName = "Matching keypoints between two camera images";
cv::namedWindow(windowName, 0);
cv::imshow(windowName, matchImg);
}
bVis = false;
/* TRACK 3D OBJECT BOUNDING BOXES */
// TASK FP.1 -> match list of 3D objects (vector<BoundingBox>) between current and previous frame (implement ->matchBoundingBoxes)
// associate bounding boxes between current and previous frame using keypoint matches
map<int, int> bbBestMatches;
matchBoundingBoxes(matches, bbBestMatches, *(dataBuffer.end()-2), *(dataBuffer.end()-1));
// store matches in current data frame
(dataBuffer.end()-1)->bbMatches = bbBestMatches;
cout << "#8 : TRACK 3D OBJECT BOUNDING BOXES done" << endl;
/* COMPUTE TTC ON OBJECT IN FRONT */
// loop over all BB match pairs
for (auto it1 = (dataBuffer.end() - 1)->bbMatches.begin(); it1 != (dataBuffer.end() - 1)->bbMatches.end(); ++it1)
{
// find bounding boxes associates with current match
BoundingBox *prevBB, *currBB;
for (auto it2 = (dataBuffer.end() - 1)->boundingBoxes.begin(); it2 != (dataBuffer.end() - 1)->boundingBoxes.end(); ++it2)
{
if (it1->second == it2->boxID) // check whether current match partner corresponds to this BB
{
currBB = &(*it2);
}
}
for (auto it2 = (dataBuffer.end() - 2)->boundingBoxes.begin(); it2 != (dataBuffer.end() - 2)->boundingBoxes.end(); ++it2)
{
if (it1->first == it2->boxID) // check whether current match partner corresponds to this BB
{
prevBB = &(*it2);
}
}
// compute TTC for current match
if( currBB->lidarPoints.size()>0 && prevBB->lidarPoints.size()>0 ) // only compute TTC if we have Lidar points
{
// TASK FP.2 -> compute time-to-collision based on Lidar data (implement -> computeTTCLidar)
double ttcLidar;
computeTTCLidar(prevBB->lidarPoints, currBB->lidarPoints, sensorFrameRate, ttcLidar);
// TASK FP.3 -> assign enclosed keypoint matches to bounding box (implement -> clusterKptMatchesWithROI)
double ttcCamera;
clusterKptMatchesWithROI(*currBB, (dataBuffer.end() - 2)->keypoints, (dataBuffer.end() - 1)->keypoints, (dataBuffer.end() - 1)->kptMatches);
// TASK FP.4 -> compute time-to-collision based on camera (implement -> computeTTCCamera)
computeTTCCamera((dataBuffer.end() - 2)->keypoints, (dataBuffer.end() - 1)->keypoints, currBB->kptMatches, sensorFrameRate, ttcCamera);
bVis = true;
if (bVis)
{
cv::Mat visImg = (dataBuffer.end() - 1)->cameraImg.clone();
showLidarImgOverlay(visImg, currBB->lidarPoints, P_rect_00, R_rect_00, RT, &visImg);
cv::rectangle(visImg, cv::Point(currBB->roi.x, currBB->roi.y), cv::Point(currBB->roi.x + currBB->roi.width, currBB->roi.y + currBB->roi.height), cv::Scalar(0, 255, 0), 2);
char str[200];
sprintf(str, "TTC Lidar : %f s, TTC Camera : %f s", ttcLidar, ttcCamera);
putText(visImg, str, cv::Point2f(80, 50), cv::FONT_HERSHEY_PLAIN, 2, cv::Scalar(0,0,255));
string windowName = "Final Results : TTC";
cv::namedWindow(windowName, 1);
cv::imshow(windowName, visImg);
bool bLog = false;
if (bLog) {
string imgFileType = ".png";
string resultImg = "../results/" + detectorType + "-" + descriptorType + "-img" + imgNumber.str() + imgFileType;
cv::imwrite(resultImg, visImg);
}
else
{
cout << "Press key to continue to next image" << endl;
cv::waitKey(0); // wait for key to be pressed
}
}
bVis = false;
} // eof TTC computation
} // eof loop over all BB matches
}
} // eof loop over all images
return 0;
}