Visual Servoing Platform  version 3.6.1 under development (2024-07-27)
tutorial-apriltag-detector-live-rgbd-realsense.cpp
1 #include <visp3/core/vpConfig.h>
3 #ifdef VISP_HAVE_MODULE_SENSOR
4 #include <visp3/sensor/vpRealSense2.h>
5 #endif
7 #include <visp3/detection/vpDetectorAprilTag.h>
9 #include <visp3/core/vpImageConvert.h>
10 #include <visp3/gui/vpDisplayGDI.h>
11 #include <visp3/gui/vpDisplayOpenCV.h>
12 #include <visp3/gui/vpDisplayX.h>
13 #include <visp3/vision/vpPose.h>
14 
15 int main(int argc, const char **argv)
16 {
18 #if defined(VISP_HAVE_APRILTAG) && defined(VISP_HAVE_REALSENSE2)
20 #ifdef ENABLE_VISP_NAMESPACE
21  using namespace VISP_NAMESPACE_NAME;
22 #endif
23 
26  double tagSize = 0.053;
27  float quad_decimate = 1.0;
28  int nThreads = 1;
29  bool display_tag = false;
30  int color_id = -1;
31  unsigned int thickness = 2;
32  bool align_frame = false;
33  bool opt_verbose = false;
34 
35 #if !(defined(VISP_HAVE_X11) || defined(VISP_HAVE_GDI) || defined(VISP_HAVE_OPENCV))
36  bool display_off = true;
37  std::cout << "Warning: There is no 3rd party (X11, GDI or openCV) to dislay images..." << std::endl;
38 #else
39  bool display_off = false;
40 #endif
41 
42  for (int i = 1; i < argc; i++) {
43  if (std::string(argv[i]) == "--pose_method" && i + 1 < argc) {
44  poseEstimationMethod = (vpDetectorAprilTag::vpPoseEstimationMethod)atoi(argv[i + 1]);
45  }
46  else if (std::string(argv[i]) == "--tag_size" && i + 1 < argc) {
47  tagSize = atof(argv[i + 1]);
48  }
49  else if (std::string(argv[i]) == "--quad_decimate" && i + 1 < argc) {
50  quad_decimate = (float)atof(argv[i + 1]);
51  }
52  else if (std::string(argv[i]) == "--nthreads" && i + 1 < argc) {
53  nThreads = atoi(argv[i + 1]);
54  }
55  else if (std::string(argv[i]) == "--display_tag") {
56  display_tag = true;
57  }
58  else if (std::string(argv[i]) == "--display_off") {
59  display_off = true;
60  }
61  else if (std::string(argv[i]) == "--color" && i + 1 < argc) {
62  color_id = atoi(argv[i + 1]);
63  }
64  else if (std::string(argv[i]) == "--thickness" && i + 1 < argc) {
65  thickness = (unsigned int)atoi(argv[i + 1]);
66  }
67  else if (std::string(argv[i]) == "--tag_family" && i + 1 < argc) {
68  tagFamily = (vpDetectorAprilTag::vpAprilTagFamily)atoi(argv[i + 1]);
69  }
70  else if (std::string(argv[i]) == "--z_aligned") {
71  align_frame = true;
72  }
73  else if (std::string(argv[i]) == "--verbose" || std::string(argv[i]) == "-v") {
74  opt_verbose = true;
75  }
76  else if (std::string(argv[i]) == "--help" || std::string(argv[i]) == "-h") {
77  std::cout << "Usage: " << argv[0]
78  << " [--tag_size <tag_size in m> (default: 0.053)]"
79  " [--quad_decimate <quad_decimate> (default: 1)]"
80  " [--nthreads <nb> (default: 1)]"
81  " [--pose_method <method> (0: HOMOGRAPHY, 1: HOMOGRAPHY_VIRTUAL_VS, "
82  " 2: DEMENTHON_VIRTUAL_VS, 3: LAGRANGE_VIRTUAL_VS, "
83  " 4: BEST_RESIDUAL_VIRTUAL_VS, 5: HOMOGRAPHY_ORTHOGONAL_ITERATION) (default: 0)]"
84  " [--tag_family <family> (0: TAG_36h11, 1: TAG_36h10 (DEPRECATED), 2: TAG_36ARTOOLKIT (DEPRECATED),"
85  " 3: TAG_25h9, 4: TAG_25h7 (DEPRECATED), 5: TAG_16h5, 6: TAG_CIRCLE21h7, 7: TAG_CIRCLE49h12,"
86  " 8: TAG_CUSTOM48h12, 9: TAG_STANDARD41h12, 10: TAG_STANDARD52h13) (default: 0)]"
87  " [--display_tag] [--z_aligned]";
88 #if (defined(VISP_HAVE_X11) || defined(VISP_HAVE_GDI) || defined(VISP_HAVE_OPENCV))
89  std::cout << " [--display_off] [--color <color id>] [--thickness <line thickness>]";
90 #endif
91  std::cout << " [--verbose,-v] [--help,-h]" << std::endl;
92  return EXIT_SUCCESS;
93  }
94  }
95 
96  try {
98  std::cout << "Use Realsense 2 grabber" << std::endl;
99  vpRealSense2 g;
100  rs2::config config;
101  unsigned int width = 640, height = 480;
102  config.enable_stream(RS2_STREAM_COLOR, static_cast<int>(width), static_cast<int>(height), RS2_FORMAT_RGBA8, 30);
103  config.enable_stream(RS2_STREAM_DEPTH, static_cast<int>(width), static_cast<int>(height), RS2_FORMAT_Z16, 30);
104  config.enable_stream(RS2_STREAM_INFRARED, static_cast<int>(width), static_cast<int>(height), RS2_FORMAT_Y8, 30);
105 
107  vpImage<vpRGBa> I_color(height, width);
108  vpImage<uint16_t> I_depth_raw(height, width);
109  vpImage<vpRGBa> I_depth;
110 
111  g.open(config);
112  const float depth_scale = g.getDepthScale();
113  std::cout << "I_color: " << I_color.getWidth() << " " << I_color.getHeight() << std::endl;
114  std::cout << "I_depth_raw: " << I_depth_raw.getWidth() << " " << I_depth_raw.getHeight() << std::endl;
115 
116  rs2::align align_to_color = RS2_STREAM_COLOR;
117  g.acquire(reinterpret_cast<unsigned char *>(I_color.bitmap), reinterpret_cast<unsigned char *>(I_depth_raw.bitmap),
118  nullptr, nullptr, &align_to_color);
119 
120  std::cout << "Read camera parameters from Realsense device" << std::endl;
121  vpCameraParameters cam;
124 
125  std::cout << cam << std::endl;
126  std::cout << "poseEstimationMethod: " << poseEstimationMethod << std::endl;
127  std::cout << "tagFamily: " << tagFamily << std::endl;
128  std::cout << "nThreads : " << nThreads << std::endl;
129  std::cout << "Z aligned: " << align_frame << std::endl;
130 
131  vpImage<vpRGBa> I_color2 = I_color;
132  vpImage<float> depthMap;
133  vpImageConvert::createDepthHistogram(I_depth_raw, I_depth);
134 
135  vpDisplay *d1 = nullptr;
136  vpDisplay *d2 = nullptr;
137  vpDisplay *d3 = nullptr;
138  if (!display_off) {
139 #ifdef VISP_HAVE_X11
140  d1 = new vpDisplayX(I_color, 100, 30, "Pose from Homography");
141  d2 = new vpDisplayX(I_color2, I_color.getWidth() + 120, 30, "Pose from RGBD fusion");
142  d3 = new vpDisplayX(I_depth, 100, I_color.getHeight() + 70, "Depth");
143 #elif defined(VISP_HAVE_GDI)
144  d1 = new vpDisplayGDI(I_color, 100, 30, "Pose from Homography");
145  d2 = new vpDisplayGDI(I_color2, I_color.getWidth() + 120, 30, "Pose from RGBD fusion");
146  d3 = new vpDisplayGDI(I_depth, 100, I_color.getHeight() + 70, "Depth");
147 #elif defined(HAVE_OPENCV_HIGHGUI)
148  d1 = new vpDisplayOpenCV(I_color, 100, 30, "Pose from Homography");
149  d2 = new vpDisplayOpenCV(I_color2, I_color.getWidth() + 120, 30, "Pose from RGBD fusion");
150  d3 = new vpDisplayOpenCV(I_depth, 100, I_color.getHeight() + 70, "Depth");
151 #endif
152  }
153 
155  vpDetectorAprilTag detector(tagFamily);
157 
159  detector.setAprilTagQuadDecimate(quad_decimate);
160  detector.setAprilTagPoseEstimationMethod(poseEstimationMethod);
161  detector.setAprilTagNbThreads(nThreads);
162  detector.setDisplayTag(display_tag, color_id < 0 ? vpColor::none : vpColor::getColor(color_id), thickness);
163  detector.setZAlignedWithCameraAxis(align_frame);
165  std::vector<double> time_vec;
166  for (;;) {
167  double t = vpTime::measureTimeMs();
168 
170  g.acquire(reinterpret_cast<unsigned char *>(I_color.bitmap),
171  reinterpret_cast<unsigned char *>(I_depth_raw.bitmap), nullptr, nullptr, &align_to_color);
173 
174  I_color2 = I_color;
175  vpImageConvert::convert(I_color, I);
176  vpImageConvert::createDepthHistogram(I_depth_raw, I_depth);
177 
178  depthMap.resize(I_depth_raw.getHeight(), I_depth_raw.getWidth());
179 #ifdef VISP_HAVE_OPENMP
180 #pragma omp parallel for
181 #endif
182  for (int i = 0; i < static_cast<int>(I_depth_raw.getHeight()); i++) {
183  for (int j = 0; j < static_cast<int>(I_depth_raw.getWidth()); j++) {
184  if (I_depth_raw[i][j]) {
185  float Z = I_depth_raw[i][j] * depth_scale;
186  depthMap[i][j] = Z;
187  }
188  else {
189  depthMap[i][j] = 0;
190  }
191  }
192  }
193 
194  vpDisplay::display(I_color);
195  vpDisplay::display(I_color2);
196  vpDisplay::display(I_depth);
197 
198  std::vector<vpHomogeneousMatrix> cMo_vec;
199  detector.detect(I, tagSize, cam, cMo_vec);
200 
201  // Display camera pose for each tag
202  for (size_t i = 0; i < cMo_vec.size(); i++) {
203  vpDisplay::displayFrame(I_color, cMo_vec[i], cam, tagSize / 2, vpColor::none, 3);
204  }
205 
207  std::vector<std::vector<vpImagePoint> > tags_corners = detector.getPolygon();
208  std::vector<int> tags_id = detector.getTagsId();
209  std::map<int, double> tags_size;
210  tags_size[-1] = tagSize; // Default tag size
211  std::vector<std::vector<vpPoint> > tags_points3d = detector.getTagsPoints3D(tags_id, tags_size);
212  for (size_t i = 0; i < tags_corners.size(); i++) {
214  double confidence_index;
215  if (vpPose::computePlanarObjectPoseFromRGBD(depthMap, tags_corners[i], cam, tags_points3d[i], cMo,
216  &confidence_index)) {
217  if (confidence_index > 0.5) {
218  vpDisplay::displayFrame(I_color2, cMo, cam, tagSize / 2, vpColor::none, 3);
219  }
220  else if (confidence_index > 0.25) {
221  vpDisplay::displayFrame(I_color2, cMo, cam, tagSize / 2, vpColor::orange, 3);
222  }
223  else {
224  vpDisplay::displayFrame(I_color2, cMo, cam, tagSize / 2, vpColor::red, 3);
225  }
226  std::stringstream ss;
227  ss << "Tag id " << tags_id[i] << " confidence: " << confidence_index;
228  vpDisplay::displayText(I_color2, 35 + i * 15, 20, ss.str(), vpColor::red);
229 
230  if (opt_verbose) {
231  std::cout << "cMo[" << i << "]: \n" << cMo_vec[i] << std::endl;
232  std::cout << "cMo[" << i << "] using depth: \n" << cMo << std::endl;
233  }
234  }
235  }
237 
238  vpDisplay::displayText(I_color, 20, 20, "Pose from homography + VVS", vpColor::red);
239  vpDisplay::displayText(I_color2, 20, 20, "Pose from RGBD fusion", vpColor::red);
240  vpDisplay::displayText(I_color, 35, 20, "Click to quit.", vpColor::red);
241  t = vpTime::measureTimeMs() - t;
242  time_vec.push_back(t);
243 
244  std::stringstream ss;
245  ss << "Detection time: " << t << " ms for " << detector.getNbObjects() << " tags";
246  vpDisplay::displayText(I_color, 50, 20, ss.str(), vpColor::red);
247 
248  if (vpDisplay::getClick(I_color, false))
249  break;
250 
251  vpDisplay::flush(I_color);
252  vpDisplay::flush(I_color2);
253  vpDisplay::flush(I_depth);
254  }
255 
256  std::cout << "Benchmark loop processing time" << std::endl;
257  std::cout << "Mean / Median / Std: " << vpMath::getMean(time_vec) << " ms"
258  << " ; " << vpMath::getMedian(time_vec) << " ms"
259  << " ; " << vpMath::getStdev(time_vec) << " ms" << std::endl;
260 
261  if (!display_off) {
262  delete d1;
263  delete d2;
264  delete d3;
265  }
266 
267  }
268  catch (const vpException &e) {
269  std::cerr << "Catch an exception: " << e.getMessage() << std::endl;
270 }
271 
272  return EXIT_SUCCESS;
273 #else
274  (void)argc;
275  (void)argv;
276 #ifndef VISP_HAVE_APRILTAG
277  std::cout << "Enable Apriltag support, configure and build ViSP to run this tutorial" << std::endl;
278 #else
279  std::cout << "Install librealsense 3rd party, configure and build ViSP again to use this example" << std::endl;
280 #endif
281 #endif
282  return EXIT_SUCCESS;
283 }
Generic class defining intrinsic camera parameters.
@ perspectiveProjWithoutDistortion
Perspective projection without distortion model.
static vpColor getColor(const unsigned int &i)
Definition: vpColor.h:311
static const vpColor red
Definition: vpColor.h:217
static const vpColor none
Definition: vpColor.h:229
static const vpColor orange
Definition: vpColor.h:227
@ TAG_36h11
AprilTag 36h11 pattern (recommended)
Display for windows using GDI (available on any windows 32 platform).
Definition: vpDisplayGDI.h:130
The vpDisplayOpenCV allows to display image using the OpenCV library. Thus to enable this class OpenC...
Use the X11 console to display images on unix-like OS. Thus to enable this class X11 should be instal...
Definition: vpDisplayX.h:135
Class that defines generic functionalities for display.
Definition: vpDisplay.h:178
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void display(const vpImage< unsigned char > &I)
static void displayFrame(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &cMo, const vpCameraParameters &cam, double size, const vpColor &color=vpColor::none, unsigned int thickness=1, const vpImagePoint &offset=vpImagePoint(0, 0), const std::string &frameName="", const vpColor &textColor=vpColor::black, const vpImagePoint &textOffset=vpImagePoint(15, 15))
static void flush(const vpImage< unsigned char > &I)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
Definition: vpException.h:60
const char * getMessage() const
Definition: vpException.cpp:65
Implementation of an homogeneous matrix and operations on such kind of matrices.
static void createDepthHistogram(const vpImage< uint16_t > &src_depth, vpImage< vpRGBa > &dest_rgba)
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
void resize(unsigned int h, unsigned int w)
resize the image : Image initialization
Definition: vpImage.h:538
static double getMedian(const std::vector< double > &v)
Definition: vpMath.cpp:322
static double getStdev(const std::vector< double > &v, bool useBesselCorrection=false)
Definition: vpMath.cpp:353
static double getMean(const std::vector< double > &v)
Definition: vpMath.cpp:302
static bool computePlanarObjectPoseFromRGBD(const vpImage< float > &depthMap, const std::vector< vpImagePoint > &corners, const vpCameraParameters &colorIntrinsics, const std::vector< vpPoint > &point3d, vpHomogeneousMatrix &cMo, double *confidence_index=nullptr)
Definition: vpPoseRGBD.cpp:158
vpCameraParameters getCameraParameters(const rs2_stream &stream, vpCameraParameters::vpCameraParametersProjType type=vpCameraParameters::perspectiveProjWithDistortion, int index=-1) const
void acquire(vpImage< unsigned char > &grey, double *ts=nullptr)
bool open(const rs2::config &cfg=rs2::config())
float getDepthScale()
VISP_EXPORT double measureTimeMs()