Visual Servoing Platform  version 3.6.1 under development (2024-07-27)
tutorial-pose-from-planar-object.cpp
1 
3 // Core
4 #include <visp3/core/vpColorDepthConversion.h>
5 #include <visp3/core/vpConfig.h>
6 #include <visp3/core/vpIoTools.h>
7 #include <visp3/core/vpMeterPixelConversion.h>
8 #include <visp3/core/vpXmlParserCamera.h>
9 
10 // Vision
11 #include <visp3/vision/vpPlaneEstimation.h>
12 #include <visp3/vision/vpPose.h>
13 
14 // IO
15 #include <visp3/io/vpImageIo.h>
16 
17 // GUI
18 #include <visp3/gui/vpDisplayD3D.h>
19 #include <visp3/gui/vpDisplayGDI.h>
20 #include <visp3/gui/vpDisplayGTK.h>
21 #include <visp3/gui/vpDisplayOpenCV.h>
22 #include <visp3/gui/vpDisplayX.h>
23 
24 // Check if std:c++17 or higher
25 #if ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L))) && defined(VISP_HAVE_DISPLAY) && defined(VISP_HAVE_PUGIXML)
26 
27 #ifdef ENABLE_VISP_NAMESPACE
28 using namespace VISP_NAMESPACE_NAME;
29 #endif
30 
31 // Local helper
32 namespace
33 {
34 // Display
35 #if defined(VISP_HAVE_X11)
36 using Display = vpDisplayX;
37 #elif defined(VISP_HAVE_GDI)
38 using Display = vpDisplayGDI;
39 #elif defined(VISP_HAVE_OPENCV)
40 using Display = vpDisplayOpenCV;
41 #elif defined(VISP_HAVE_GTK)
42 using Display = vpDisplayGTK;
43 #elif defined(VISP_HAVE_D3D9)
44 using Display = vpDisplayD3D;
45 #endif
46 
47 constexpr auto DispScaleType { vpDisplay::SCALE_AUTO };
48 
49 // Model
50 constexpr auto ModelCommentHeader { "#" };
51 constexpr auto ModelKeypointsHeader { "Keypoints" };
52 constexpr auto ModelBoundsHeader { "Bounds" };
53 constexpr auto ModelDataHeader { "data:" };
54 
55 // Depth
56 constexpr auto DepthScale { 0.001 };
57 } // namespace
58 
59 #ifndef DOXYGEN_SHOULD_SKIP_THIS
60 class Model
61 {
62 public:
63  Model() = delete;
64  ~Model() = default;
65  Model(const Model &) = default;
66  Model(Model &&) = default;
67  Model &operator=(const Model &) = default;
68  Model &operator=(Model &&) = default;
69 
70  explicit Model(const std::string &model_filename);
71 
72 public:
73  using Id = unsigned long int;
74  static inline std::string to_string(const Id &id) { return std::to_string(id); };
75 
76  std::map<Id, vpPoint> keypoints(const vpHomogeneousMatrix &cMo = {}) const;
77  std::map<Id, vpPoint> bounds(const vpHomogeneousMatrix &cMo = {}) const;
78 
79 private:
80  std::map<Id, vpPoint> m_keypoints {};
81  std::map<Id, vpPoint> m_bounds {};
82 };
83 
84 inline Model::Model(const std::string &model_filename)
85 {
86  std::fstream file;
87  file.open(model_filename.c_str(), std::fstream::in);
88 
89  std::string line {}, subs {};
90  bool in_model_bounds { false };
91  bool in_model_keypoints { false };
92  unsigned int data_curr_line { 0 };
93  unsigned int data_line_start_pos { 0 };
94 
95  auto reset = [&]() {
96  in_model_bounds = false;
97  in_model_keypoints = false;
98  data_curr_line = 0;
99  data_line_start_pos = 0;
100  };
101 
102  while (getline(file, line)) {
103  if (line.substr(0, std::string(ModelCommentHeader).size()) == ModelCommentHeader || line == ModelDataHeader) {
104  continue;
105  }
106  else if (line == ModelBoundsHeader) {
107  reset();
108  in_model_bounds = true;
109  continue;
110  }
111  else if (line == ModelKeypointsHeader) {
112  reset();
113  in_model_keypoints = true;
114  continue;
115  }
116 
117  if (data_curr_line == 0) {
118  // Get indentation level which is common to all lines
119  data_line_start_pos = (unsigned int)line.find("[") + 1;
120  }
121 
122  try {
123  std::stringstream ss(line.substr(data_line_start_pos, line.find("]") - data_line_start_pos));
124  unsigned int data_on_curr_line = 0;
125  vpColVector oXYZ({ 0, 0, 0, 1 });
126  while (getline(ss, subs, ',')) {
127  oXYZ[data_on_curr_line++] = std::atof(subs.c_str());
128  }
129  if (in_model_bounds) {
130  m_bounds.try_emplace(data_curr_line, oXYZ[0], oXYZ[1], oXYZ[2]);
131  }
132  else if (in_model_keypoints) {
133  m_keypoints.try_emplace(data_curr_line, oXYZ[0], oXYZ[1], oXYZ[2]);
134  }
135  data_curr_line++;
136  }
137  catch (...) {
138  // Line is empty or incomplete. We skeep it
139  }
140  }
141 
142  file.close();
143 }
144 
145 inline std::map<Model::Id, vpPoint> Model::bounds(const vpHomogeneousMatrix &cMo) const
146 {
147  auto bounds = m_bounds;
148  std::for_each(begin(bounds), end(bounds), [&cMo](auto &bound) { bound.second.project(cMo); });
149 
150  return bounds;
151 }
152 
153 inline std::map<Model::Id, vpPoint> Model::keypoints(const vpHomogeneousMatrix &cMo) const
154 {
155  auto keypoints = m_keypoints;
156  std::for_each(begin(keypoints), end(keypoints), [&cMo](auto &keypoint) { keypoint.second.project(cMo); });
157 
158  return keypoints;
159 }
160 
161 std::ostream &operator<<(std::ostream &os, const Model &model)
162 {
163  os << "-Bounds:" << std::endl;
164  for (const auto &[id, bound] : model.bounds()) {
165  // clang-format off
166  os << std::setw(4) << std::setfill(' ') << id << ": "
167  << std::setw(6) << std::setfill(' ') << bound.get_X() << ", "
168  << std::setw(6) << std::setfill(' ') << bound.get_Y() << ", "
169  << std::setw(6) << std::setfill(' ') << bound.get_Z() << std::endl;
170  // clang-format on
171  }
172 
173  os << "-Keypoints:" << std::endl;
174  for (const auto &[id, keypoint] : model.keypoints()) {
175  // clang-format off
176  os << std::setw(4) << std::setfill(' ') << id << ": "
177  << std::setw(6) << std::setfill(' ') << keypoint.get_X() << ", "
178  << std::setw(6) << std::setfill(' ') << keypoint.get_Y() << ", "
179  << std::setw(6) << std::setfill(' ') << keypoint.get_Z() << std::endl;
180  // clang-format on
181  }
182 
183  return os;
184 }
185 
187 readData(const std::string &input_directory, const unsigned int cpt = 0)
188 {
189  char buffer[FILENAME_MAX];
190  std::stringstream ss;
191  ss << input_directory << "/color_image_%04d.jpg";
192  snprintf(buffer, FILENAME_MAX, ss.str().c_str(), cpt);
193  const std::string filename_color = buffer;
194 
195  ss.str("");
196  ss << input_directory << "/depth_image_%04d.bin";
197  snprintf(buffer, FILENAME_MAX, ss.str().c_str(), cpt);
198  const std::string filename_depth = buffer;
199 
200  // Read color
201  vpImage<vpRGBa> I_color {};
202  vpImageIo::read(I_color, filename_color);
203 
204  // Read raw depth
205  vpImage<uint16_t> I_depth_raw {};
206  std::ifstream file_depth(filename_depth.c_str(), std::ios::in | std::ios::binary);
207  if (file_depth.is_open()) {
208  unsigned int height = 0, width = 0;
209  vpIoTools::readBinaryValueLE(file_depth, height);
210  vpIoTools::readBinaryValueLE(file_depth, width);
211  I_depth_raw.resize(height, width);
212 
213  for (auto i = 0u; i < height; i++) {
214  for (auto j = 0u; j < width; j++) {
215  vpIoTools::readBinaryValueLE(file_depth, I_depth_raw[i][j]);
216  }
217  }
218  }
219 
220  // Read camera parameters (intrinsics)
221  ss.str("");
222  ss << input_directory << "/camera.xml";
223 
224  vpXmlParserCamera parser {};
225  vpCameraParameters color_param {}, depth_param {};
226  parser.parse(color_param, ss.str(), "color_camera", vpCameraParameters::perspectiveProjWithDistortion);
227  parser.parse(depth_param, ss.str(), "depth_camera", vpCameraParameters::perspectiveProjWithDistortion);
228 
229  // Read camera parameters (extrinsics)
230  ss.str("");
231  ss << input_directory << "/depth_M_color.txt";
232  std::ifstream file_depth_M_color(ss.str().c_str(), std::ios::in | std::ios::binary);
233 
234  vpHomogeneousMatrix depth_M_color {};
235  depth_M_color.load(file_depth_M_color);
236 
237  return { I_color, I_depth_raw, color_param, depth_param, depth_M_color };
238 }
239 
240 std::vector<vpImagePoint> getRoiFromUser(vpImage<vpRGBa> color_img)
241 {
242  // Init displays
243  Display disp_color(color_img, 0, 0, "Roi bounds", DispScaleType);
244  disp_color.display(color_img);
245  disp_color.flush(color_img);
246 
247  std::vector<vpImagePoint> v_ip {};
248  do {
249  // Prepare display
250  disp_color.display(color_img);
251  auto disp_lane { 0 };
252 
253  vpDisplay::displayText(color_img, 15 * ++disp_lane, 15, "Select point along the d435 box boundary", vpColor::green);
254  vpDisplay::displayText(color_img, 15 * ++disp_lane, 15, "Left click to select a point", vpColor::green);
255  vpDisplay::displayText(color_img, 15 * ++disp_lane, 15, "Middle click to remove the last point", vpColor::green);
256  vpDisplay::displayText(color_img, 15 * ++disp_lane, 15, "Right click to finish/quit", vpColor::green);
257 
258  // Display already selected points
259  for (auto j = 0u; j < v_ip.size(); j++) {
260  vpDisplay::displayCross(color_img, v_ip.at(j), 15, vpColor::green);
261  vpDisplay::displayText(color_img, v_ip.at(j) + vpImagePoint(10, 10), std::to_string(j), vpColor::green);
262  }
263  disp_color.flush(color_img);
264 
265  // Wait for new point
266  vpImagePoint ip {};
268  vpDisplay::getClick(color_img, ip, button, true);
269 
270  switch (button) {
271  case vpMouseButton::button2: {
272  if (v_ip.size() > 0) {
273  v_ip.erase(std::prev(end(v_ip)));
274  }
275  break;
276  }
277 
278  case vpMouseButton::button3: {
279  return v_ip;
280  }
281 
282  default: {
283  v_ip.push_back(ip);
284  break;
285  }
286  }
287 
288  } while (1);
289 }
290 
291 std::map<Model::Id, vpImagePoint> getKeypointsFromUser(vpImage<vpRGBa> color_img, const Model &model,
292  const std::string &parent_data)
293 {
294  // Init displays
295  Display disp_color(color_img, 0, 0, "Keypoints", DispScaleType);
296  disp_color.display(color_img);
297  disp_color.flush(color_img);
298 
299  vpImage<vpRGBa> I_help {};
300  vpImageIo::read(I_help, parent_data + "/data/d435_box_keypoints_user_helper.png");
301  Display disp_help(I_help, disp_color.getWindowXPosition() + color_img.getWidth(), disp_color.getWindowYPosition(),
302  "Keypoints [help]", DispScaleType);
303  disp_help.display(I_help);
304  disp_help.flush(I_help);
305 
306  std::map<Model::Id, vpImagePoint> keypoints {};
307  // - The next line produces an internal compiler error with Visual Studio 2017:
308  // tutorial-pose-from-planar-object.cpp(304): fatal error C1001: An internal error has occurred in the compiler.
309  // [C:\projects\visp\build\tutorial\computer-vision\tutorial-pose-from-planar-object.vcxproj] (compiler file
310  // 'd:\agent\_work\8\s\src\vctools\compiler\cxxfe\sl\p1\cxx\grammar.y', line 12721)
311  // To work around this problem, try simplifying or changing the program near the locations listed above.
312  // Please choose the Technical Support command on the Visual C++
313  // Help menu, or open the Technical Support help file for more information
314  // - Note that the next line builds with Visual Studio 2022.
315  // for ([[maybe_unused]] const auto &[id, _] : model.keypoints()) {
316  for (const auto &[id, ip_unused] : model.keypoints()) {
317  (void)ip_unused;
318  // Prepare display
319  disp_color.display(color_img);
320  auto disp_lane { 0 };
321 
322  vpDisplay::displayText(color_img, 15 * ++disp_lane, 15, "Select the keypoints " + Model::to_string(id),
324  vpDisplay::displayText(color_img, 15 * ++disp_lane, 15, "Click to select a point", vpColor::green);
325 
326  // Display already selected points
327  for (const auto &[id, keypoint] : keypoints) {
328  vpDisplay::displayCross(color_img, keypoint, 15, vpColor::green);
329  vpDisplay::displayText(color_img, keypoint + vpImagePoint(10, 10), Model::to_string(id), vpColor::green);
330  }
331  disp_color.flush(color_img);
332 
333  // Wait for new point
334  vpImagePoint ip {};
335  vpDisplay::getClick(color_img, ip, true);
336  keypoints.try_emplace(id, ip);
337  }
338 
339  return keypoints;
340 }
341 #endif // DOXYGEN_SHOULD_SKIP_THIS
342 #endif
343 
344 int main(int, char *argv[])
345 {
346 #if ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
347 
348 #if defined(VISP_HAVE_DISPLAY) && defined(VISP_HAVE_PUGIXML)
349 
350  // Get prior data
352  auto [color_img, depth_raw, color_param, depth_param, depth_M_color] =
353  readData(vpIoTools::getParent(argv[0]) + "/data/d435_not_align_depth", 0);
354  const auto model = Model(vpIoTools::getParent(argv[0]) + "/data/d435_box.model");
356 
357  std::cout << "color_param:" << std::endl << color_param << std::endl;
358  std::cout << "depth_param:" << std::endl << depth_param << std::endl;
359  std::cout << "depth_M_color:" << std::endl << depth_M_color << std::endl;
360  std::cout << std::endl << "Model:" << std::endl << model << std::endl;
361 
362  // Init display
363  Display display_color(color_img, 0, 0, "Color", DispScaleType);
364  display_color.display(color_img);
365  display_color.flush(color_img);
366 
367  vpImage<unsigned char> depth_img {};
368  vpImageConvert::createDepthHistogram(depth_raw, depth_img);
369  Display display_depth(depth_img, display_color.getWindowXPosition() + display_color.getWidth(), 0, "Depth",
370  DispScaleType);
371  display_depth.display(depth_img);
372  display_depth.flush(depth_img);
373 
374  // Ask roi for plane estimation
376  vpPolygon roi_color_img {};
377  roi_color_img.build(getRoiFromUser(color_img), true);
378 
379  std::vector<vpImagePoint> roi_corners_depth_img {};
380  std::transform(
381  cbegin(roi_color_img.getCorners()), cend(roi_color_img.getCorners()), std::back_inserter(roi_corners_depth_img),
382  std::bind((vpImagePoint(*)(const vpImage<uint16_t> &, double, double, double, const vpCameraParameters &,
384  const vpImagePoint &)) &
386  depth_raw, DepthScale, 0.1, 0.6, depth_param, color_param, depth_M_color.inverse(), depth_M_color,
387  std::placeholders::_1));
388  const vpPolygon roi_depth_img { roi_corners_depth_img };
390 
391  vpDisplay::displayPolygon(depth_img, roi_depth_img.getCorners(), vpColor::green);
392  display_depth.flush(depth_img);
393 
394  // Estimate the plane
395  vpImage<vpRGBa> heat_map {};
397  const auto obj_plane_in_depth =
398  vpPlaneEstimation::estimatePlane(depth_raw, DepthScale, depth_param, roi_depth_img, 1000, heat_map);
399  if (!obj_plane_in_depth) {
400  return EXIT_FAILURE;
401  }
402 
403  // Get the plane in color frame
404  auto obj_plane_in_color = *obj_plane_in_depth;
405  obj_plane_in_color.changeFrame(depth_M_color.inverse());
407 
408  Display display_heat_map(heat_map, display_depth.getWindowXPosition(),
409  display_depth.getWindowYPosition() + display_depth.getHeight(), "Plane Estimation Heat map",
410  DispScaleType);
411  display_heat_map.display(heat_map);
412  display_heat_map.flush(heat_map);
413 
414  // Ask user to click on keypoints
415  const auto keypoint_color_img = getKeypointsFromUser(color_img, model, vpIoTools::getParent(argv[0]));
416 
418  const auto cMo = vpPose::computePlanarObjectPoseWithAtLeast3Points(obj_plane_in_color, model.keypoints(),
419  keypoint_color_img, color_param);
420  if (!cMo) {
421  return EXIT_FAILURE;
422  }
424 
425  // Display the model
426  std::vector<vpImagePoint> d435_box_bound {};
427  // - The next line produces an internal compiler error with Visual Studio 2017:
428  // tutorial-pose-from-planar-object.cpp(428): fatal error C1001: An internal error has occurred in the compiler.
429  // [C:\projects\visp\build\tutorial\computer-vision\tutorial-pose-from-planar-object.vcxproj] (compiler file
430  // 'd:\agent\_work\8\s\src\vctools\compiler\cxxfe\sl\p1\cxx\grammar.y', line 12721)
431  // To work around this problem, try simplifying or changing the program near the locations listed above.
432  // Please choose the Technical Support command on the Visual C++
433  // Help menu, or open the Technical Support help file for more information
434  // - Note that the next line builds with Visual Studio 2022.
435  //
436  // for ([[maybe_unused]] const auto &[_, bound] : model.bounds(*cMo)) {
437  for (const auto &[id_unused, bound] : model.bounds(*cMo)) {
438  (void)id_unused;
439  vpImagePoint ip {};
440  vpMeterPixelConversion::convertPoint(color_param, bound.get_x(), bound.get_y(), ip);
441  d435_box_bound.push_back(ip);
442  }
443  vpDisplay::displayPolygon(color_img, d435_box_bound, vpColor::blue);
444 
445  for (const auto &[id, keypoint] : model.keypoints(*cMo)) {
446  vpImagePoint ip {};
447  vpMeterPixelConversion::convertPoint(color_param, keypoint.get_x(), keypoint.get_y(), ip);
448  vpDisplay::displayCross(color_img, ip, 15, vpColor::orange);
449  vpDisplay::displayText(color_img, ip + vpImagePoint(10, 10), Model::to_string(id), vpColor::orange);
450  }
451  vpDisplay::flush(color_img);
452 
453  // Display the frame
454  vpDisplay::displayFrame(color_img, *cMo, color_param, 0.05, vpColor::none, 3);
455 
456  // Wait before exiting
457  auto disp_lane { 0 };
458  vpDisplay::displayText(color_img, 15 * ++disp_lane, 15, "D435 box boundary [from model]", vpColor::blue);
459  vpDisplay::displayText(color_img, 15 * ++disp_lane, 15, "Keypoints [from model]", vpColor::orange);
460  vpDisplay::displayText(color_img, 15 * ++disp_lane, 15, "Click to quit", vpColor::green);
461  vpDisplay::flush(color_img);
462 
463  vpDisplay::getClick(color_img);
464 
465 #else
466  (void)argv;
467  std::cout << "There is no display and pugixml available to run this tutorial." << std::endl;
468 #endif // defined(VISP_HAVE_DISPLAY)
469 #else
470  (void)argv;
471  std::cout << "c++17 should be enabled to run this tutorial." << std::endl;
472 #endif
473 
474  return EXIT_SUCCESS;
475 }
Generic class defining intrinsic camera parameters.
@ perspectiveProjWithDistortion
Perspective projection with distortion model.
Implementation of column vector and the associated operations.
Definition: vpColVector.h:191
static vpImagePoint projectColorToDepth(const vpImage< uint16_t > &I_depth, double depth_scale, double depth_min, double depth_max, const vpCameraParameters &depth_intrinsics, const vpCameraParameters &color_intrinsics, const vpHomogeneousMatrix &color_M_depth, const vpHomogeneousMatrix &depth_M_color, const vpImagePoint &from_pixel)
static const vpColor none
Definition: vpColor.h:229
static const vpColor orange
Definition: vpColor.h:227
static const vpColor blue
Definition: vpColor.h:223
static const vpColor green
Definition: vpColor.h:220
Display for windows using Direct3D 3rd party. Thus to enable this class Direct3D should be installed....
Definition: vpDisplayD3D.h:106
Display for windows using GDI (available on any windows 32 platform).
Definition: vpDisplayGDI.h:130
The vpDisplayGTK allows to display image using the GTK 3rd party library. Thus to enable this class G...
Definition: vpDisplayGTK.h:133
The vpDisplayOpenCV allows to display image using the OpenCV library. Thus to enable this class OpenC...
Use the X11 console to display images on unix-like OS. Thus to enable this class X11 should be instal...
Definition: vpDisplayX.h:135
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void displayFrame(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &cMo, const vpCameraParameters &cam, double size, const vpColor &color=vpColor::none, unsigned int thickness=1, const vpImagePoint &offset=vpImagePoint(0, 0), const std::string &frameName="", const vpColor &textColor=vpColor::black, const vpImagePoint &textOffset=vpImagePoint(15, 15))
static void displayCross(const vpImage< unsigned char > &I, const vpImagePoint &ip, unsigned int size, const vpColor &color, unsigned int thickness=1)
static void flush(const vpImage< unsigned char > &I)
@ SCALE_AUTO
Definition: vpDisplay.h:184
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
static void displayPolygon(const vpImage< unsigned char > &I, const std::vector< vpImagePoint > &vip, const vpColor &color, unsigned int thickness=1, bool closed=true)
Implementation of an homogeneous matrix and operations on such kind of matrices.
void load(std::ifstream &f)
static void createDepthHistogram(const vpImage< uint16_t > &src_depth, vpImage< vpRGBa > &dest_rgba)
static void read(vpImage< unsigned char > &I, const std::string &filename, int backend=IO_DEFAULT_BACKEND)
Definition: vpImageIo.cpp:147
Class that defines a 2D point in an image. This class is useful for image processing and stores only ...
Definition: vpImagePoint.h:82
Definition of the vpImage class member functions.
Definition: vpImage.h:131
unsigned int getWidth() const
Definition: vpImage.h:242
static void readBinaryValueLE(std::ifstream &file, int16_t &short_value)
static std::string getParent(const std::string &pathname)
Definition: vpIoTools.cpp:1314
static void convertPoint(const vpCameraParameters &cam, const double &x, const double &y, double &u, double &v)
static std::optional< vpPlane > estimatePlane(const vpImage< uint16_t > &I_depth_raw, double depth_scale, const vpCameraParameters &depth_intrinsics, const vpPolygon &roi, const unsigned int avg_nb_of_pts_to_estimate=500, std::optional< std::reference_wrapper< vpImage< vpRGBa > > > heat_map={})
Defines a generic 2D polygon.
Definition: vpPolygon.h:103
vpPolygon & build(const std::vector< vpImagePoint > &corners, const bool &create_convex_hull=false)
Definition: vpPolygon.cpp:244
static std::optional< vpHomogeneousMatrix > computePlanarObjectPoseWithAtLeast3Points(const vpPlane &plane_in_camera_frame, const std::map< DataId, vpPoint > &pts, const std::map< DataId, vpImagePoint > &ips, const vpCameraParameters &camera_intrinsics, std::optional< vpHomogeneousMatrix > cMo_init=std::nullopt, bool enable_vvs=true)
Definition: vpPose.h:694
XML parser to load and save intrinsic camera parameters.