Visual Servoing Platform  version 3.6.1 under development (2025-02-18)
tutorial-mb-generic-tracker-apriltag-webcam.cpp
1 #include <iostream>
3 
4 #include <visp3/core/vpConfig.h>
5 
7 // #undef VISP_HAVE_V4L2
8 // #undef HAVE_OPENCV_HIGHGUI
9 // #undef HAVE_OPENCV_VIDEOIO
11 
13 #if defined(VISP_HAVE_APRILTAG) && defined(VISP_HAVE_MODULE_MBT) && \
14  (defined(VISP_HAVE_V4L2) || \
15  ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI)) || \
16  ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_VIDEOIO)))
18 
19 #include <fstream>
20 #include <ios>
21 
22 #ifdef VISP_HAVE_MODULE_SENSOR
23 #include <visp3/sensor/vpV4l2Grabber.h>
24 #endif
25 #include <visp3/core/vpXmlParserCamera.h>
26 #include <visp3/detection/vpDetectorAprilTag.h>
27 #include <visp3/gui/vpDisplayFactory.h>
28 #include <visp3/mbt/vpMbGenericTracker.h>
29 
30 #if (VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI)
31 #include <opencv2/highgui/highgui.hpp> // for cv::VideoCapture
32 #elif (VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_VIDEOIO)
33 #include <opencv2/videoio/videoio.hpp> // for cv::VideoCapture
34 #endif
35 
36 #ifdef ENABLE_VISP_NAMESPACE
37 using namespace VISP_NAMESPACE_NAME;
38 #endif
39 
40 typedef enum { state_detection, state_tracking, state_quit } state_t;
41 
42 // Creates a cube.cao file in your current directory
43 // cubeEdgeSize : size of cube edges in meters
44 void createCaoFile(double cubeEdgeSize)
45 {
46  std::ofstream fileStream;
47  fileStream.open("cube.cao", std::ofstream::out | std::ofstream::trunc);
48  fileStream << "V1\n";
49  fileStream << "# 3D Points\n";
50  fileStream << "8 # Number of points\n";
51  fileStream << cubeEdgeSize / 2 << " " << cubeEdgeSize / 2 << " " << 0 << " # Point 0: (X, Y, Z)\n";
52  fileStream << cubeEdgeSize / 2 << " " << -cubeEdgeSize / 2 << " " << 0 << " # Point 1\n";
53  fileStream << -cubeEdgeSize / 2 << " " << -cubeEdgeSize / 2 << " " << 0 << " # Point 2\n";
54  fileStream << -cubeEdgeSize / 2 << " " << cubeEdgeSize / 2 << " " << 0 << " # Point 3\n";
55  fileStream << -cubeEdgeSize / 2 << " " << cubeEdgeSize / 2 << " " << -cubeEdgeSize << " # Point 4\n";
56  fileStream << -cubeEdgeSize / 2 << " " << -cubeEdgeSize / 2 << " " << -cubeEdgeSize << " # Point 5\n";
57  fileStream << cubeEdgeSize / 2 << " " << -cubeEdgeSize / 2 << " " << -cubeEdgeSize << " # Point 6\n";
58  fileStream << cubeEdgeSize / 2 << " " << cubeEdgeSize / 2 << " " << -cubeEdgeSize << " # Point 7\n";
59  fileStream << "# 3D Lines\n";
60  fileStream << "0 # Number of lines\n";
61  fileStream << "# Faces from 3D lines\n";
62  fileStream << "0 # Number of faces\n";
63  fileStream << "# Faces from 3D points\n";
64  fileStream << "6 # Number of faces\n";
65  fileStream << "4 0 3 2 1 # Face 0: [number of points] [index of the 3D points]...\n";
66  fileStream << "4 1 2 5 6\n";
67  fileStream << "4 4 7 6 5\n";
68  fileStream << "4 0 7 4 3\n";
69  fileStream << "4 5 2 3 4\n";
70  fileStream << "4 0 1 6 7 # Face 5\n";
71  fileStream << "# 3D cylinders\n";
72  fileStream << "0 # Number of cylinders\n";
73  fileStream << "# 3D circles\n";
74  fileStream << "0 # Number of circles\n";
75  fileStream.close();
76 }
77 
78 state_t detectAprilTag(const vpImage<unsigned char> &I, vpDetectorAprilTag &detector, double tagSize,
79  const vpCameraParameters &cam, vpHomogeneousMatrix &cMo)
80 {
81  std::vector<vpHomogeneousMatrix> cMo_vec;
82 
83  // Detection
84  bool ret = detector.detect(I, tagSize, cam, cMo_vec);
85 
86  // Display camera pose
87  for (size_t i = 0; i < cMo_vec.size(); i++) {
88  vpDisplay::displayFrame(I, cMo_vec[i], cam, tagSize / 2, vpColor::none, 3);
89  }
90 
91  vpDisplay::displayText(I, 40, 20, "State: waiting tag detection", vpColor::red);
92 
93  if (ret && detector.getNbObjects() > 0) { // if tag detected, we pick the first one
94  cMo = cMo_vec[0];
95  return state_tracking;
96  }
97 
98  return state_detection;
99 }
100 
101 state_t track(const vpImage<unsigned char> &I, vpMbGenericTracker &tracker, double projection_error_threshold,
102  vpHomogeneousMatrix &cMo)
103 {
104  vpCameraParameters cam;
105  tracker.getCameraParameters(cam);
106 
107  // Track the object
108  try {
109  tracker.track(I);
110  }
111  catch (...) {
112  return state_detection;
113  }
114 
115  tracker.getPose(cMo);
116 
117  // Detect tracking error
118  double projection_error = tracker.computeCurrentProjectionError(I, cMo, cam);
119  if (projection_error > projection_error_threshold) {
120  return state_detection;
121  }
122 
123  // Display
124  tracker.display(I, cMo, cam, vpColor::red, 2);
125  vpDisplay::displayFrame(I, cMo, cam, 0.025, vpColor::none, 3);
126  vpDisplay::displayText(I, 40, 20, "State: tracking in progress", vpColor::red);
127  {
128  std::stringstream ss;
129  ss << "Features: edges " << tracker.getNbFeaturesEdge() << ", klt " << tracker.getNbFeaturesKlt();
130  vpDisplay::displayText(I, 60, 20, ss.str(), vpColor::red);
131  }
132 
133  return state_tracking;
134 }
135 
136 int main(int argc, const char **argv)
137 {
138  int opt_device = 0;
140  double opt_tag_size = 0.08;
141  float opt_quad_decimate = 1.0;
142  int opt_nthreads = 1;
143  std::string opt_intrinsic_file = "";
144  std::string opt_camera_name = "";
145  double opt_cube_size = 0.125; // 12.5cm by default
146 #ifdef VISP_HAVE_OPENCV
147  bool opt_use_texture = false;
148 #endif
149  double opt_projection_error_threshold = 40.;
150 
151 #if !(defined(VISP_HAVE_DISPLAY))
152  bool display_off = true;
153 #else
154  bool display_off = false;
155 #endif
156 
157  for (int i = 1; i < argc; i++) {
158  if (std::string(argv[i]) == "--tag-size" && i + 1 < argc) {
159  opt_tag_size = atof(argv[i + 1]);
160  }
161  else if (std::string(argv[i]) == "--input" && i + 1 < argc) {
162  opt_device = atoi(argv[i + 1]);
163  }
164  else if (std::string(argv[i]) == "--quad-decimate" && i + 1 < argc) {
165  opt_quad_decimate = (float)atof(argv[i + 1]);
166  }
167  else if (std::string(argv[i]) == "--nthreads" && i + 1 < argc) {
168  opt_nthreads = atoi(argv[i + 1]);
169  }
170  else if (std::string(argv[i]) == "--intrinsic" && i + 1 < argc) {
171  opt_intrinsic_file = std::string(argv[i + 1]);
172  }
173  else if (std::string(argv[i]) == "--camera-name" && i + 1 < argc) {
174  opt_camera_name = std::string(argv[i + 1]);
175  }
176  else if (std::string(argv[i]) == "--display-off") {
177  display_off = true;
178  }
179  else if (std::string(argv[i]) == "--tag-family" && i + 1 < argc) {
180  opt_tag_family = (vpDetectorAprilTag::vpAprilTagFamily)atoi(argv[i + 1]);
181  }
182  else if (std::string(argv[i]) == "--cube-size" && i + 1 < argc) {
183  opt_cube_size = atof(argv[i + 1]);
184 #ifdef VISP_HAVE_OPENCV
185  }
186  else if (std::string(argv[i]) == "--texture") {
187  opt_use_texture = true;
188 #endif
189  }
190  else if (std::string(argv[i]) == "--projection-error" && i + 1 < argc) {
191  opt_projection_error_threshold = atof(argv[i + 1]);
192  }
193  else if (std::string(argv[i]) == "--help" || std::string(argv[i]) == "-h") {
194  std::cout << "Usage: " << argv[0]
195  << " [--input <camera id>]"
196  << " [--cube-size <size in m>]"
197  << " [--tag-size <size in m>]"
198  << " [--quad-decimate <decimation>]"
199  << " [--nthreads <nb>]"
200  << " [--intrinsic <xml intrinsic file>]"
201  << " [--camera-name <camera name in xml file>]"
202  << " [--tag-family <0: TAG_36h11, 1: TAG_36h10, 2: TAG_36ARTOOLKIT, 3: TAG_25h9, 4: TAG_25h7, 5: TAG_16h5>]";
203 #if (defined(VISP_HAVE_DISPLAY))
204  std::cout << " [--display-off]";
205 #endif
206  std::cout << " [--texture]"
207  << " [--projection-error <30 - 100>]"
208  << " [--help,-h]" << std::endl;
209  return EXIT_SUCCESS;
210  }
211  }
212 
213  createCaoFile(opt_cube_size);
214 
215  vpCameraParameters cam;
216  bool camIsInit = false;
217 #if defined(VISP_HAVE_PUGIXML)
218  vpXmlParserCamera parser;
219  if (!opt_intrinsic_file.empty() && !opt_camera_name.empty()) {
220  parser.parse(cam, opt_intrinsic_file, opt_camera_name, vpCameraParameters::perspectiveProjWithoutDistortion);
221  camIsInit = true;
222  }
223 #endif
224 
225 #if (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_11)
226  std::shared_ptr<vpDisplay> display;
227 #else
228  vpDisplay *display = nullptr;
229 #endif
230 
231  try {
233 
235 #if defined(VISP_HAVE_V4L2)
236  vpV4l2Grabber g;
237  std::ostringstream device;
238  device << "/dev/video" << opt_device;
239  std::cout << "Use Video 4 Linux grabber on device " << device.str() << std::endl;
240  g.setDevice(device.str());
241  g.setScale(1);
242  g.open(I);
243 #elif ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI))|| ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_VIDEOIO))
244  std::cout << "Use OpenCV grabber on device " << opt_device << std::endl;
245  cv::VideoCapture g(opt_device); // Open the default camera
246  if (!g.isOpened()) { // Check if we succeeded
247  std::cout << "Failed to open the camera" << std::endl;
248  return EXIT_FAILURE;
249  }
250  cv::Mat frame;
251  g >> frame; // get a new frame from camera
252  vpImageConvert::convert(frame, I);
253 #endif
255  if (!camIsInit) {
256  cam.initPersProjWithoutDistortion(600, 600, I.getWidth() / 2., I.getHeight() / 2.);
257  }
258 
259  std::cout << "Cube size: " << opt_cube_size << std::endl;
260  std::cout << "AprilTag size: " << opt_tag_size << std::endl;
261  std::cout << "AprilTag family: " << opt_tag_family << std::endl;
262  std::cout << "Camera parameters:\n" << cam << std::endl;
263  std::cout << "Detection: " << std::endl;
264  std::cout << " Quad decimate: " << opt_quad_decimate << std::endl;
265  std::cout << " Threads number: " << opt_nthreads << std::endl;
266  std::cout << "Tracker: " << std::endl;
267  std::cout << " Use edges : 1" << std::endl;
268  std::cout << " Use texture: "
269 #ifdef VISP_HAVE_OPENCV
270  << opt_use_texture << std::endl;
271 #else
272  << " na" << std::endl;
273 #endif
274  std::cout << " Projection error: " << opt_projection_error_threshold << std::endl;
275 
276  // Construct display
277  if (!display_off) {
278 #if (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_11)
279  display = vpDisplayFactory::createDisplay(I);
280 #else
282 #endif
283  }
284 
285  // Initialize AprilTag detector
286  vpDetectorAprilTag detector(opt_tag_family);
287  detector.setAprilTagQuadDecimate(opt_quad_decimate);
288  detector.setAprilTagNbThreads(opt_nthreads);
289 
290  // Prepare MBT
291  vpMbGenericTracker tracker;
292 #if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGPROC) && defined(HAVE_OPENCV_VIDEO)
293  if (opt_use_texture)
294  tracker.setTrackerType(vpMbGenericTracker::EDGE_TRACKER | vpMbGenericTracker::KLT_TRACKER);
295  else
296 #endif
298  // edges
299  vpMe me;
300  me.setMaskSize(5);
301  me.setMaskNumber(180);
302  me.setRange(12);
304  me.setThreshold(20);
305  me.setMu1(0.5);
306  me.setMu2(0.5);
307  me.setSampleStep(4);
308  tracker.setMovingEdge(me);
309 
310 #if defined(VISP_HAVE_OPENCV) && defined(HAVE_OPENCV_IMGPROC) && defined(HAVE_OPENCV_VIDEO)
311  if (opt_use_texture) {
312  vpKltOpencv klt_settings;
313  klt_settings.setMaxFeatures(300);
314  klt_settings.setWindowSize(5);
315  klt_settings.setQuality(0.015);
316  klt_settings.setMinDistance(8);
317  klt_settings.setHarrisFreeParameter(0.01);
318  klt_settings.setBlockSize(3);
319  klt_settings.setPyramidLevels(3);
320  tracker.setKltOpencv(klt_settings);
321  tracker.setKltMaskBorder(5);
322  }
323 #endif
324 
325  // camera calibration params
326  tracker.setCameraParameters(cam);
327  // model definition
328  tracker.loadModel("cube.cao");
329  tracker.setDisplayFeatures(true);
330  tracker.setAngleAppear(vpMath::rad(70));
331  tracker.setAngleDisappear(vpMath::rad(80));
332 
334  state_t state = state_detection;
335 
336  // wait for a tag detection
337  while (state != state_quit) {
338 
339 #if defined(VISP_HAVE_V4L2)
340  g.acquire(I);
341 #elif ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI))|| ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_VIDEOIO))
342  g >> frame;
343  vpImageConvert::convert(frame, I);
344 #endif
345 
347 
348  if (state == state_detection) {
349  state = detectAprilTag(I, detector, opt_tag_size, cam, cMo);
350 
351  // Initialize the tracker with the result of the detection
352  if (state == state_tracking) {
354  tracker.initFromPose(I, cMo);
356  }
357  }
358 
359  if (state == state_tracking) {
360  state = track(I, tracker, opt_projection_error_threshold, cMo);
361  }
362 
363  vpDisplay::displayText(I, 20, 20, "Click to quit...", vpColor::red);
364  if (vpDisplay::getClick(I, false)) { // exit
365  state = state_quit;
366  }
367 
368  vpDisplay::flush(I);
369  }
370  }
371  catch (const vpException &e) {
372  std::cerr << "Catch an exception: " << e.getMessage() << std::endl;
373  }
374 
375 #if (VISP_CXX_STANDARD < VISP_CXX_STANDARD_11)
376  if (!display_off)
377  delete display;
378 #endif
379  return EXIT_SUCCESS;
380 }
381 
382 #else
383 
384 int main()
385 {
386 #if !defined(VISP_HAVE_APRILTAG)
387  std::cout << "ViSP is not build with Apriltag support" << std::endl;
388 #endif
389 #if !(defined(VISP_HAVE_V4L2) || defined(VISP_HAVE_OPENCV))
390  std::cout << "ViSP is not build with v4l2 or OpenCV support" << std::endl;
391 #else
392  std::cout << "Install missing 3rd parties, configure and build ViSP to run this tutorial" << std::endl;
393 #endif
394 
395  return EXIT_SUCCESS;
396 }
397 
398 #endif
Generic class defining intrinsic camera parameters.
void initPersProjWithoutDistortion(double px, double py, double u0, double v0)
@ perspectiveProjWithoutDistortion
Perspective projection without distortion model.
static const vpColor red
Definition: vpColor.h:198
static const vpColor none
Definition: vpColor.h:210
void setAprilTagQuadDecimate(float quadDecimate)
bool detect(const vpImage< unsigned char > &I) VP_OVERRIDE
@ TAG_36h11
AprilTag 36h11 pattern (recommended)
void setAprilTagNbThreads(int nThreads)
size_t getNbObjects() const
Class that defines generic functionalities for display.
Definition: vpDisplay.h:178
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void display(const vpImage< unsigned char > &I)
static void displayFrame(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &cMo, const vpCameraParameters &cam, double size, const vpColor &color=vpColor::none, unsigned int thickness=1, const vpImagePoint &offset=vpImagePoint(0, 0), const std::string &frameName="", const vpColor &textColor=vpColor::black, const vpImagePoint &textOffset=vpImagePoint(15, 15))
static void flush(const vpImage< unsigned char > &I)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
Definition: vpException.h:60
const char * getMessage() const
Definition: vpException.cpp:65
Implementation of an homogeneous matrix and operations on such kind of matrices.
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
unsigned int getWidth() const
Definition: vpImage.h:242
unsigned int getHeight() const
Definition: vpImage.h:181
Wrapper for the KLT (Kanade-Lucas-Tomasi) feature tracker implemented in OpenCV. Thus to enable this ...
Definition: vpKltOpencv.h:79
void setBlockSize(int blockSize)
Definition: vpKltOpencv.h:272
void setQuality(double qualityLevel)
Definition: vpKltOpencv.h:361
void setHarrisFreeParameter(double harris_k)
Definition: vpKltOpencv.h:280
void setMaxFeatures(int maxCount)
Definition: vpKltOpencv.h:320
void setMinDistance(double minDistance)
Definition: vpKltOpencv.h:329
void setWindowSize(int winSize)
Definition: vpKltOpencv.h:382
void setPyramidLevels(int pyrMaxLevel)
Definition: vpKltOpencv.h:348
static double rad(double deg)
Definition: vpMath.h:129
Real-time 6D object pose tracking using its CAD model.
virtual void setCameraParameters(const vpCameraParameters &camera) VP_OVERRIDE
virtual void setDisplayFeatures(bool displayF) VP_OVERRIDE
virtual unsigned int getNbFeaturesEdge() const
virtual double computeCurrentProjectionError(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &_cMo, const vpCameraParameters &_cam) VP_OVERRIDE
virtual void getCameraParameters(vpCameraParameters &camera) const VP_OVERRIDE
virtual void initFromPose(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &cMo) VP_OVERRIDE
virtual void getPose(vpHomogeneousMatrix &cMo) const VP_OVERRIDE
virtual unsigned int getNbFeaturesKlt() const
virtual void setMovingEdge(const vpMe &me)
virtual void setAngleDisappear(const double &a) VP_OVERRIDE
virtual void track(const vpImage< unsigned char > &I) VP_OVERRIDE
virtual void loadModel(const std::string &modelFile, bool verbose=false, const vpHomogeneousMatrix &T=vpHomogeneousMatrix()) VP_OVERRIDE
virtual void setTrackerType(int type)
virtual void display(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &cMo, const vpCameraParameters &cam, const vpColor &col, unsigned int thickness=1, bool displayFullModel=false) VP_OVERRIDE
virtual void setAngleAppear(const double &a) VP_OVERRIDE
Definition: vpMe.h:134
void setMu1(const double &mu_1)
Definition: vpMe.h:385
void setRange(const unsigned int &range)
Definition: vpMe.h:415
void setLikelihoodThresholdType(const vpLikelihoodThresholdType likelihood_threshold_type)
Definition: vpMe.h:505
void setMaskNumber(const unsigned int &mask_number)
Definition: vpMe.cpp:552
void setThreshold(const double &threshold)
Definition: vpMe.h:466
void setSampleStep(const double &sample_step)
Definition: vpMe.h:422
void setMaskSize(const unsigned int &mask_size)
Definition: vpMe.cpp:560
void setMu2(const double &mu_2)
Definition: vpMe.h:392
@ NORMALIZED_THRESHOLD
Definition: vpMe.h:145
Class that is a wrapper over the Video4Linux2 (V4L2) driver.
void open(vpImage< unsigned char > &I)
void setScale(unsigned scale=vpV4l2Grabber::DEFAULT_SCALE)
void setDevice(const std::string &devname)
void acquire(vpImage< unsigned char > &I)
XML parser to load and save intrinsic camera parameters.
int parse(vpCameraParameters &cam, const std::string &filename, const std::string &camera_name, const vpCameraParameters::vpCameraParametersProjType &projModel, unsigned int image_width=0, unsigned int image_height=0, bool verbose=true)
std::shared_ptr< vpDisplay > createDisplay()
Return a smart pointer vpDisplay specialization if a GUI library is available or nullptr otherwise.
vpDisplay * allocateDisplay()
Return a newly allocated vpDisplay specialization if a GUI library is available or nullptr otherwise.