Visual Servoing Platform  version 3.6.1 under development (2025-02-20)
tutorial-mb-generic-tracker-rgbd.cpp
1 #include <iostream>
3 
4 #include <visp3/core/vpConfig.h>
5 #include <visp3/core/vpDisplay.h>
6 #include <visp3/core/vpIoTools.h>
7 #include <visp3/gui/vpDisplayFactory.h>
8 #include <visp3/io/vpImageIo.h>
10 #include <visp3/mbt/vpMbGenericTracker.h>
12 
13 #if defined(VISP_HAVE_PCL) && defined(VISP_HAVE_PCL_COMMON)
14 #include <pcl/common/common.h>
15 #include <pcl/point_cloud.h>
16 #include <pcl/point_types.h>
17 
18 #ifdef ENABLE_VISP_NAMESPACE
19 using namespace VISP_NAMESPACE_NAME;
20 #endif
21 
22 namespace
23 {
24 struct vpRealsenseIntrinsics_t
25 {
26  float ppx;
28  float ppy;
30  float fx;
32  float fy;
34  float coeffs[5];
35 };
36 
37 void rs_deproject_pixel_to_point(float point[3], const vpRealsenseIntrinsics_t &intrin, const float pixel[2], float depth)
38 {
39  float x = (pixel[0] - intrin.ppx) / intrin.fx;
40  float y = (pixel[1] - intrin.ppy) / intrin.fy;
41 
42  float r2 = x * x + y * y;
43  float f = 1 + intrin.coeffs[0] * r2 + intrin.coeffs[1] * r2 * r2 + intrin.coeffs[4] * r2 * r2 * r2;
44  float ux = x * f + 2 * intrin.coeffs[2] * x * y + intrin.coeffs[3] * (r2 + 2 * x * x);
45  float uy = y * f + 2 * intrin.coeffs[3] * x * y + intrin.coeffs[2] * (r2 + 2 * y * y);
46 
47  x = ux;
48  y = uy;
49 
50  point[0] = depth * x;
51  point[1] = depth * y;
52  point[2] = depth;
53 }
54 
56 bool read_data(unsigned int cpt, const std::string &input_directory, vpImage<vpRGBa> &I_color,
57  vpImage<uint16_t> &I_depth_raw, pcl::PointCloud<pcl::PointXYZ>::Ptr &pointcloud)
59 {
60  char buffer[FILENAME_MAX];
61  // Read color
62  std::stringstream ss;
63  ss << input_directory << "/color_image_%04d.jpg";
64  snprintf(buffer, FILENAME_MAX, ss.str().c_str(), cpt);
65  std::string filename_color = buffer;
66 
67  if (!vpIoTools::checkFilename(filename_color)) {
68  std::cerr << "Cannot read: " << filename_color << std::endl;
69  return false;
70  }
71  vpImageIo::read(I_color, filename_color);
72 
73  // Read raw depth
74  ss.str("");
75  ss << input_directory << "/depth_image_%04d.bin";
76  snprintf(buffer, FILENAME_MAX, ss.str().c_str(), cpt);
77  std::string filename_depth = buffer;
78 
79  std::ifstream file_depth(filename_depth.c_str(), std::ios::in | std::ios::binary);
80  if (!file_depth.is_open()) {
81  return false;
82  }
83 
84  unsigned int height = 0, width = 0;
85  vpIoTools::readBinaryValueLE(file_depth, height);
86  vpIoTools::readBinaryValueLE(file_depth, width);
87  I_depth_raw.resize(height, width);
88 
89  uint16_t depth_value = 0;
90  for (unsigned int i = 0; i < height; i++) {
91  for (unsigned int j = 0; j < width; j++) {
92  vpIoTools::readBinaryValueLE(file_depth, depth_value);
93  I_depth_raw[i][j] = depth_value;
94  }
95  }
96 
97  // Transform pointcloud
98  pointcloud->width = width;
99  pointcloud->height = height;
100  pointcloud->resize((size_t)width * height);
101 
102  // Only for Creative SR300
103  const float depth_scale = 0.00100000005f;
104  vpRealsenseIntrinsics_t depth_intrinsic;
105  depth_intrinsic.ppx = 320.503509521484f;
106  depth_intrinsic.ppy = 235.602951049805f;
107  depth_intrinsic.fx = 383.970001220703f;
108  depth_intrinsic.fy = 383.970001220703f;
109  depth_intrinsic.coeffs[0] = 0.0f;
110  depth_intrinsic.coeffs[1] = 0.0f;
111  depth_intrinsic.coeffs[2] = 0.0f;
112  depth_intrinsic.coeffs[3] = 0.0f;
113  depth_intrinsic.coeffs[4] = 0.0f;
114 
115  for (unsigned int i = 0; i < height; i++) {
116  for (unsigned int j = 0; j < width; j++) {
117  float scaled_depth = I_depth_raw[i][j] * depth_scale;
118  float point[3];
119  float pixel[2] = { (float)j, (float)i };
120  rs_deproject_pixel_to_point(point, depth_intrinsic, pixel, scaled_depth);
121  pointcloud->points[(size_t)(i * width + j)].x = point[0];
122  pointcloud->points[(size_t)(i * width + j)].y = point[1];
123  pointcloud->points[(size_t)(i * width + j)].z = point[2];
124  }
125  }
126 
127  return true;
128 }
129 } // namespace
130 
131 int main(int argc, char *argv[])
132 {
133  std::string input_directory = "data"; // location of the data (images, depth_map, point_cloud)
134  std::string config_color = "model/cube/cube.xml", config_depth = "model/cube/cube_depth.xml";
135  std::string model_color = "model/cube/cube.cao", model_depth = "model/cube/cube.cao";
136  std::string init_file = "model/cube/cube.init";
137  unsigned int frame_cpt = 0;
138  bool disable_depth = false;
139 
140  for (int i = 1; i < argc; i++) {
141  if (std::string(argv[i]) == "--input_directory" && i + 1 < argc) {
142  input_directory = std::string(argv[i + 1]);
143  }
144  else if (std::string(argv[i]) == "--config_color" && i + 1 < argc) {
145  config_color = std::string(argv[i + 1]);
146  }
147  else if (std::string(argv[i]) == "--config_depth" && i + 1 < argc) {
148  config_depth = std::string(argv[i + 1]);
149  }
150  else if (std::string(argv[i]) == "--model_color" && i + 1 < argc) {
151  model_color = std::string(argv[i + 1]);
152  }
153  else if (std::string(argv[i]) == "--model_depth" && i + 1 < argc) {
154  model_depth = std::string(argv[i + 1]);
155  }
156  else if (std::string(argv[i]) == "--init_file" && i + 1 < argc) {
157  init_file = std::string(argv[i + 1]);
158  }
159  else if (std::string(argv[i]) == "--disable_depth") {
160  disable_depth = true;
161  }
162  else if (std::string(argv[i]) == "--help" || std::string(argv[i]) == "-h") {
163  std::cout << "Usage: \n"
164  << argv[0]
165  << " --input_directory <data directory> --config_color <object.xml> --config_depth <object.xml>"
166  " --model_color <object.cao> --model_depth <object.cao> --init_file <object.init> --disable_depth"
167  << std::endl;
168  std::cout
169  << "\nExample:\n"
170  << argv[0]
171  << " --config_color model/cube/cube.xml --config_depth model/cube/cube.xml"
172  " --model_color model/cube/cube.cao --model_depth model/cube/cube.cao --init_file model/cube/cube.init\n"
173  << std::endl;
174  return EXIT_SUCCESS;
175  }
176  }
177 
178  std::cout << "Tracked features: " << std::endl;
179 #if defined(VISP_HAVE_OPENCV)
180  std::cout << " Use edges : 1" << std::endl;
181 #if defined(HAVE_OPENCV_IMGPROC) && defined(HAVE_OPENCV_VIDEO)
182  std::cout << " Use klt : 1" << std::endl;
183 #else
184  std::cout << " Use klt : 0" << std::endl;
185 #endif
186  std::cout << " Use depth : " << !disable_depth << std::endl;
187 #else
188  std::cout << " Use edges : 1" << std::endl;
189  std::cout << " Use klt : 0" << std::endl;
190  std::cout << " Use depth : 0" << std::endl;
191 #endif
192  std::cout << "Config files: " << std::endl;
193  std::cout << " Input directory: "
194  << "\"" << input_directory << "\"" << std::endl;
195  std::cout << " Config color: "
196  << "\"" << config_color << "\"" << std::endl;
197  std::cout << " Config depth: "
198  << "\"" << config_depth << "\"" << std::endl;
199  std::cout << " Model color : "
200  << "\"" << model_color << "\"" << std::endl;
201  std::cout << " Model depth : "
202  << "\"" << model_depth << "\"" << std::endl;
203  std::cout << " Init file : "
204  << "\"" << init_file << "\"" << std::endl;
205 
206  vpImage<vpRGBa> I_color;
208  vpImage<unsigned char> I_gray, I_depth;
210  vpImage<uint16_t> I_depth_raw;
212  pcl::PointCloud<pcl::PointXYZ>::Ptr pointcloud(new pcl::PointCloud<pcl::PointXYZ>());
214  vpCameraParameters cam_color, cam_depth;
215 
216  read_data(frame_cpt, input_directory, I_color, I_depth_raw, pointcloud);
217  vpImageConvert::convert(I_color, I_gray);
218  vpImageConvert::createDepthHistogram(I_depth_raw, I_depth);
219 
220 #if defined(VISP_HAVE_DISPLAY)
221 #if (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_11)
222  std::shared_ptr<vpDisplay> display1 = vpDisplayFactory::createDisplay();
223  std::shared_ptr<vpDisplay> display2 = vpDisplayFactory::createDisplay();
224 #else
227 #endif
228  unsigned int _posx = 100, _posy = 50, _posdx = 10;
229  display1->init(I_gray, _posx, _posy, "Color stream");
230  display2->init(I_depth, _posx + I_gray.getWidth() + _posdx, _posy, "Depth stream");
231 #endif
232 
233  vpDisplay::display(I_gray);
234  vpDisplay::display(I_depth);
235  vpDisplay::flush(I_gray);
236  vpDisplay::flush(I_depth);
237 
239  std::vector<int> trackerTypes;
240 #if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGPROC) && defined(HAVE_OPENCV_VIDEO)
241  trackerTypes.push_back(vpMbGenericTracker::EDGE_TRACKER | vpMbGenericTracker::KLT_TRACKER);
242 #else
243  trackerTypes.push_back(vpMbGenericTracker::EDGE_TRACKER);
244 #endif
245  trackerTypes.push_back(vpMbGenericTracker::DEPTH_DENSE_TRACKER);
246  vpMbGenericTracker tracker(trackerTypes);
248 #if defined(VISP_HAVE_PUGIXML)
250  tracker.loadConfigFile(config_color, config_depth);
252 #else
253  {
254  vpCameraParameters cam_color, cam_depth;
255  cam_color.initPersProjWithoutDistortion(614.9, 614.9, 320.2, 241.5);
256  cam_depth.initPersProjWithoutDistortion(384.0, 384.0, 320.5, 235.6);
257  tracker.setCameraParameters(cam_color, cam_depth);
258  }
259 
260  // Edge
261  vpMe me;
262  me.setMaskSize(5);
263  me.setMaskNumber(180);
264  me.setRange(7);
266  me.setThreshold(10);
267  me.setMu1(0.5);
268  me.setMu2(0.5);
269  me.setSampleStep(4);
270  tracker.setMovingEdge(me);
271 
272  // Klt
273 #if defined(VISP_HAVE_MODULE_KLT) && defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGPROC) && defined(HAVE_OPENCV_VIDEO)
274  vpKltOpencv klt;
275  tracker.setKltMaskBorder(5);
276  klt.setMaxFeatures(300);
277  klt.setWindowSize(5);
278  klt.setQuality(0.01);
279  klt.setMinDistance(5);
280  klt.setHarrisFreeParameter(0.01);
281  klt.setBlockSize(3);
282  klt.setPyramidLevels(3);
283 
284  tracker.setKltOpencv(klt);
285 #endif
286 
287  // Depth
288  tracker.setDepthNormalFeatureEstimationMethod(vpMbtFaceDepthNormal::ROBUST_FEATURE_ESTIMATION);
289  tracker.setDepthNormalPclPlaneEstimationMethod(2);
290  tracker.setDepthNormalPclPlaneEstimationRansacMaxIter(200);
291  tracker.setDepthNormalPclPlaneEstimationRansacThreshold(0.001);
292  tracker.setDepthNormalSamplingStep(2, 2);
293 
294  tracker.setDepthDenseSamplingStep(4, 4);
295 
296  tracker.setAngleAppear(vpMath::rad(80.0));
297  tracker.setAngleDisappear(vpMath::rad(85.0));
298  tracker.setNearClippingDistance(0.001);
299  tracker.setFarClippingDistance(5.0);
300  tracker.setClipping(tracker.getClipping() | vpMbtPolygon::FOV_CLIPPING);
301 #endif
303  tracker.loadModel(model_color, model_depth);
305 
306  tracker.getCameraParameters(cam_color, cam_depth);
307 
308  std::cout << "Camera parameters for color camera (from XML file): " << cam_color << std::endl;
309  std::cout << "Camera parameters for depth camera (from XML file): " << cam_depth << std::endl;
310 
312  tracker.setDisplayFeatures(true);
314 
316  vpHomogeneousMatrix depth_M_color;
317  {
318  std::ifstream file((std::string(input_directory + "/depth_M_color.txt")).c_str());
319  depth_M_color.load(file);
320  file.close();
321  }
322  std::map<std::string, vpHomogeneousMatrix> mapOfCameraTransformations;
323  mapOfCameraTransformations["Camera1"] = vpHomogeneousMatrix();
324  mapOfCameraTransformations["Camera2"] = depth_M_color;
325  tracker.setCameraTransformationMatrix(mapOfCameraTransformations);
327  std::cout << "depth_M_color: \n" << depth_M_color << std::endl;
328 
330  std::map<std::string, const vpImage<unsigned char> *> mapOfImages;
331  mapOfImages["Camera1"] = &I_gray;
332  mapOfImages["Camera2"] = &I_depth;
334 
336  std::map<std::string, std::string> mapOfInitFiles;
337  mapOfInitFiles["Camera1"] = init_file;
338  tracker.initClick(mapOfImages, mapOfInitFiles, true);
340 
341  mapOfImages.clear();
342  pcl::PointCloud<pcl::PointXYZ>::Ptr empty_pointcloud(new pcl::PointCloud<pcl::PointXYZ>);
343  std::vector<double> times_vec;
344 
345  try {
346  bool quit = false;
347  while (!quit) {
348  double t = vpTime::measureTimeMs();
349  quit = !read_data(frame_cpt, input_directory, I_color, I_depth_raw, pointcloud);
350  vpImageConvert::convert(I_color, I_gray);
351  vpImageConvert::createDepthHistogram(I_depth_raw, I_depth);
352 
353  vpDisplay::display(I_gray);
354  vpDisplay::display(I_depth);
355 
356  mapOfImages["Camera1"] = &I_gray;
357  std::map<std::string, pcl::PointCloud<pcl::PointXYZ>::ConstPtr> mapOfPointclouds;
358  if (disable_depth)
359  mapOfPointclouds["Camera2"] = empty_pointcloud;
360  else
361  mapOfPointclouds["Camera2"] = pointcloud;
362 
364  tracker.track(mapOfImages, mapOfPointclouds);
366 
368  vpHomogeneousMatrix cMo = tracker.getPose();
370 
371  std::cout << "iter: " << frame_cpt << " cMo:\n" << cMo << std::endl;
372 
374  tracker.display(I_gray, I_depth, cMo, depth_M_color * cMo, cam_color, cam_depth, vpColor::red, 3);
375  vpDisplay::displayFrame(I_gray, cMo, cam_color, 0.05, vpColor::none, 3);
376  vpDisplay::displayFrame(I_depth, depth_M_color * cMo, cam_depth, 0.05, vpColor::none, 3);
378 
379  t = vpTime::measureTimeMs() - t;
380  times_vec.push_back(t);
381 
382  std::stringstream ss;
383  ss << "Computation time: " << t << " ms";
384  vpDisplay::displayText(I_gray, 20, 20, ss.str(), vpColor::red);
385  {
386  std::stringstream ss;
387  ss << "Nb features: " << tracker.getError().size();
388  vpDisplay::displayText(I_gray, I_gray.getHeight() - 50, 20, ss.str(), vpColor::red);
389  }
390  {
391  std::stringstream ss;
392  ss << "Features: edges " << tracker.getNbFeaturesEdge() << ", klt " << tracker.getNbFeaturesKlt() << ", depth "
393  << tracker.getNbFeaturesDepthDense();
394  vpDisplay::displayText(I_gray, I_gray.getHeight() - 30, 20, ss.str(), vpColor::red);
395  }
396 
397  vpDisplay::flush(I_gray);
398  vpDisplay::flush(I_depth);
399 
401  if (vpDisplay::getClick(I_gray, button, false)) {
402  quit = true;
403  }
404 
405  frame_cpt++;
406  }
407  }
408  catch (const vpException &e) {
409  std::cout << "Catch exception: " << e.getStringMessage() << std::endl;
410  }
411 
412 #if (VISP_CXX_STANDARD < VISP_CXX_STANDARD_11) && defined(VISP_HAVE_DISPLAY)
413  if (display1 != nullptr) {
414  delete display1;
415  }
416  if (display2 != nullptr) {
417  delete display2;
418  }
419 #endif
420 
421  std::cout << "\nProcessing time, Mean: " << vpMath::getMean(times_vec)
422  << " ms ; Median: " << vpMath::getMedian(times_vec) << " ; Std: " << vpMath::getStdev(times_vec) << " ms"
423  << std::endl;
424 
425  vpDisplay::displayText(I_gray, 60, 20, "Click to quit", vpColor::red);
426  vpDisplay::flush(I_gray);
427  vpDisplay::getClick(I_gray);
428 
429  return EXIT_SUCCESS;
430 }
431 #else
432 int main()
433 {
434  std::cout << "To run this tutorial, ViSP should be build with PCL library."
435  " Install libpcl, configure and build again ViSP..."
436  << std::endl;
437  return EXIT_SUCCESS;
438 }
439 #endif
Generic class defining intrinsic camera parameters.
void initPersProjWithoutDistortion(double px, double py, double u0, double v0)
static const vpColor red
Definition: vpColor.h:198
static const vpColor none
Definition: vpColor.h:210
Class that defines generic functionalities for display.
Definition: vpDisplay.h:178
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void display(const vpImage< unsigned char > &I)
static void displayFrame(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &cMo, const vpCameraParameters &cam, double size, const vpColor &color=vpColor::none, unsigned int thickness=1, const vpImagePoint &offset=vpImagePoint(0, 0), const std::string &frameName="", const vpColor &textColor=vpColor::black, const vpImagePoint &textOffset=vpImagePoint(15, 15))
static void flush(const vpImage< unsigned char > &I)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
Definition: vpException.h:60
const std::string & getStringMessage() const
Definition: vpException.cpp:67
Implementation of an homogeneous matrix and operations on such kind of matrices.
void load(std::ifstream &f)
static void createDepthHistogram(const vpImage< uint16_t > &src_depth, vpImage< vpRGBa > &dest_rgba)
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
static void read(vpImage< unsigned char > &I, const std::string &filename, int backend=IO_DEFAULT_BACKEND)
Definition: vpImageIo.cpp:147
unsigned int getWidth() const
Definition: vpImage.h:242
void resize(unsigned int h, unsigned int w)
resize the image : Image initialization
Definition: vpImage.h:544
unsigned int getHeight() const
Definition: vpImage.h:181
static bool checkFilename(const std::string &filename)
Definition: vpIoTools.cpp:786
static void readBinaryValueLE(std::ifstream &file, int16_t &short_value)
Wrapper for the KLT (Kanade-Lucas-Tomasi) feature tracker implemented in OpenCV. Thus to enable this ...
Definition: vpKltOpencv.h:79
void setBlockSize(int blockSize)
Definition: vpKltOpencv.h:272
void setQuality(double qualityLevel)
Definition: vpKltOpencv.h:361
void setHarrisFreeParameter(double harris_k)
Definition: vpKltOpencv.h:280
void setMaxFeatures(int maxCount)
Definition: vpKltOpencv.h:320
void setMinDistance(double minDistance)
Definition: vpKltOpencv.h:329
void setWindowSize(int winSize)
Definition: vpKltOpencv.h:382
void setPyramidLevels(int pyrMaxLevel)
Definition: vpKltOpencv.h:348
static double rad(double deg)
Definition: vpMath.h:129
static double getMedian(const std::vector< double > &v)
Definition: vpMath.cpp:322
static double getStdev(const std::vector< double > &v, bool useBesselCorrection=false)
Definition: vpMath.cpp:353
static double getMean(const std::vector< double > &v)
Definition: vpMath.cpp:302
Real-time 6D object pose tracking using its CAD model.
@ ROBUST_FEATURE_ESTIMATION
Robust scheme to estimate the normal of the plane.
Definition: vpMe.h:134
void setMu1(const double &mu_1)
Definition: vpMe.h:385
void setRange(const unsigned int &range)
Definition: vpMe.h:415
void setLikelihoodThresholdType(const vpLikelihoodThresholdType likelihood_threshold_type)
Definition: vpMe.h:505
void setMaskNumber(const unsigned int &mask_number)
Definition: vpMe.cpp:552
void setThreshold(const double &threshold)
Definition: vpMe.h:466
void setSampleStep(const double &sample_step)
Definition: vpMe.h:422
void setMaskSize(const unsigned int &mask_size)
Definition: vpMe.cpp:560
void setMu2(const double &mu_2)
Definition: vpMe.h:392
@ NORMALIZED_THRESHOLD
Definition: vpMe.h:145
std::shared_ptr< vpDisplay > createDisplay()
Return a smart pointer vpDisplay specialization if a GUI library is available or nullptr otherwise.
vpDisplay * allocateDisplay()
Return a newly allocated vpDisplay specialization if a GUI library is available or nullptr otherwise.
VISP_EXPORT double measureTimeMs()