Visual Servoing Platform  version 3.6.1 under development (2024-11-15)
Tutorial: Keypoint tracking

Introduction

With ViSP it is possible to track keypoints using OpenCV KLT tracker, an implementation of the Kanade-Lucas-Tomasi feature tracker.

Note that all the material (source code and videos) described in this tutorial is part of ViSP source code (in tutorial/tracking/keypoint folder) and could be found in https://github.com/lagadic/visp/tree/master/tutorial/tracking/keypoint.

KLT tracker

The following example code available in tutorial-klt-tracker.cpp shows how to use ViSP vpKltOpencv class to track KLT keypoints. This class is a wrapper over the OpenCV KLT tracker implementation.

#include <visp3/core/vpConfig.h>
#include <visp3/core/vpImageConvert.h>
#include <visp3/gui/vpDisplayOpenCV.h>
#include <visp3/io/vpVideoReader.h>
#include <visp3/klt/vpKltOpencv.h>
int main(int argc, const char *argv[])
{
#if (defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_VIDEOIO) || defined(VISP_HAVE_V4L2)) && defined(HAVE_OPENCV_IMGPROC) && defined(HAVE_OPENCV_VIDEO)
#ifdef ENABLE_VISP_NAMESPACE
using namespace VISP_NAMESPACE_NAME;
#endif
try {
std::string opt_videoname = "video-postcard.mp4";
bool opt_init_by_click = false;
unsigned int opt_subsample = 1;
for (int i = 0; i < argc; i++) {
if (std::string(argv[i]) == "--videoname")
opt_videoname = std::string(argv[i + 1]);
else if (std::string(argv[i]) == "--init-by-click")
opt_init_by_click = true;
else if (std::string(argv[i]) == "--subsample")
opt_subsample = static_cast<unsigned int>(std::atoi(argv[i + 1]));
else if (std::string(argv[i]) == "--help" || std::string(argv[i]) == "-h") {
std::cout << "Usage: " << argv[0]
<< " [--videoname <video name>] [--subsample <scale factor>] [--init-by-click]"
<< " [--help] [-h]" << std::endl;
return EXIT_SUCCESS;
}
}
vpVideoReader reader;
reader.setFileName(opt_videoname);
reader.acquire(Iacq);
Iacq.subsample(opt_subsample, opt_subsample, I);
cv::Mat cvI;
vpDisplayOpenCV d(I, 0, 0, "Klt tracking");
vpKltOpencv tracker;
tracker.setMaxFeatures(200);
tracker.setWindowSize(10);
tracker.setQuality(0.01);
tracker.setMinDistance(15);
tracker.setHarrisFreeParameter(0.04);
tracker.setBlockSize(9);
tracker.setUseHarris(1);
tracker.setPyramidLevels(3);
// Initialise the tracking
if (opt_init_by_click) {
std::vector<cv::Point2f> feature;
do {
vpDisplay::displayText(I, 10, 10, "Left click to select a point, right to start tracking", vpColor::red);
if (vpDisplay::getClick(I, ip, button, false)) {
if (button == vpMouseButton::button1) {
feature.push_back(cv::Point2f((float)ip.get_u(), (float)ip.get_v()));
}
}
} while (button != vpMouseButton::button3);
tracker.initTracking(cvI, feature);
}
else {
tracker.initTracking(cvI);
}
std::cout << "Tracker initialized with " << tracker.getNbFeatures() << " features" << std::endl;
while (!reader.end()) {
double t = vpTime::measureTimeMs();
reader.acquire(Iacq);
Iacq.subsample(opt_subsample, opt_subsample, I);
if (opt_init_by_click && reader.getFrameIndex() == reader.getFirstFrameIndex() + 20) {
std::vector<cv::Point2f> feature;
do {
vpDisplay::displayText(I, 10, 10, "Left click to select a point, right to start tracking", vpColor::red);
if (vpDisplay::getClick(I, ip, button, false)) {
if (button == vpMouseButton::button1) {
feature.push_back(cv::Point2f((float)ip.get_u(), (float)ip.get_v()));
}
}
} while (button != vpMouseButton::button3);
tracker.initTracking(cvI, feature);
}
tracker.track(cvI);
tracker.display(I, vpColor::red);
vpDisplay::displayText(I, 10, 10, "Click to quit", vpColor::red);
if (vpDisplay::getClick(I, false))
break;
if (!reader.isVideoFormat()) {
vpTime::wait(t, 40);
}
}
}
catch (const vpException &e) {
std::cout << "Catch an exception: " << e << std::endl;
return EXIT_FAILURE;
}
#else
(void)argc;
(void)argv;
#endif
return EXIT_SUCCESS;
}
static const vpColor red
Definition: vpColor.h:217
static const vpColor green
Definition: vpColor.h:220
The vpDisplayOpenCV allows to display image using the OpenCV library. Thus to enable this class OpenC...
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void display(const vpImage< unsigned char > &I)
static void displayCross(const vpImage< unsigned char > &I, const vpImagePoint &ip, unsigned int size, const vpColor &color, unsigned int thickness=1)
static void flush(const vpImage< unsigned char > &I)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
Definition: vpException.h:60
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
Class that defines a 2D point in an image. This class is useful for image processing and stores only ...
Definition: vpImagePoint.h:82
double get_u() const
Definition: vpImagePoint.h:136
double get_v() const
Definition: vpImagePoint.h:147
void subsample(unsigned int v_scale, unsigned int h_scale, vpImage< Type > &sampled) const
Definition: vpImage.h:753
Wrapper for the KLT (Kanade-Lucas-Tomasi) feature tracker implemented in OpenCV. Thus to enable this ...
Definition: vpKltOpencv.h:74
void display(const vpImage< unsigned char > &I, const vpColor &color=vpColor::red, unsigned int thickness=1) const
void setBlockSize(int blockSize)
Definition: vpKltOpencv.h:267
void setQuality(double qualityLevel)
Definition: vpKltOpencv.h:356
void track(const cv::Mat &I)
int getNbFeatures() const
Get the number of current features.
Definition: vpKltOpencv.h:198
void setHarrisFreeParameter(double harris_k)
Definition: vpKltOpencv.h:275
void setMaxFeatures(int maxCount)
Definition: vpKltOpencv.h:315
void initTracking(const cv::Mat &I, const cv::Mat &mask=cv::Mat())
Definition: vpKltOpencv.cpp:94
void setMinDistance(double minDistance)
Definition: vpKltOpencv.h:324
void setUseHarris(int useHarrisDetector)
Definition: vpKltOpencv.h:368
void setWindowSize(int winSize)
Definition: vpKltOpencv.h:377
void setPyramidLevels(int pyrMaxLevel)
Definition: vpKltOpencv.h:343
Class that enables to manipulate easily a video file or a sequence of images. As it inherits from the...
bool isVideoFormat() const
void acquire(vpImage< vpRGBa > &I)
void setFileName(const std::string &filename)
long getFirstFrameIndex()
long getFrameIndex() const
VISP_EXPORT int wait(double t0, double t)
VISP_EXPORT double measureTimeMs()

The video shows the result of the tracking:

The previous example can be run without command line options. In that case, keypoints are automatically detected before tracking.

$ ./tutorial-klt-tracker

It can also be run with [–init-by-click] option. In that case, the user can select a set of keypoints to track with a left mouse click. A right mouse click stops the keypoints selection and allows to start the tracking.

$ ./tutorial-klt-tracker --init-by-click
void init(unsigned int h, unsigned int w, Type value)
Definition: vpImage.h:376

Here is the line by line explanation of the source :

#include <visp3/core/vpConfig.h>
#include <visp3/core/vpImageConvert.h>
#include <visp3/gui/vpDisplayOpenCV.h>
#include <visp3/io/vpVideoReader.h>
#include <visp3/klt/vpKltOpencv.h>

We include here the headers that define the corresponding classes. vpImageConvert class will be used to convert ViSP images implemented in vpImage class into OpenCV cv::Mat structures used as an entry by the KLT tracker. Then we include the header of vpKltOpencv class which is the wrapper over OpenCV KLT tracker implementation.

We need also to include a device to display the images. We retain vpDisplayOpenCV that works on Unix and Windows since OpenCV is mandatory by the tracker. Finally we include vpVideoReader header that will be used to read an mpeg input stream.

At the beginning of the main() function, we use the following macro to ensure that OpenCV requested by the tracker is available. Note that OpenCV will also be used to render the images and read the input video stream.

#if (defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_VIDEOIO) || defined(VISP_HAVE_V4L2)) && defined(HAVE_OPENCV_IMGPROC) && defined(HAVE_OPENCV_VIDEO)

The program starts by the creation of a vpVideoReader instance able to extract all the images of the video file video-postcard.mpeg. Here, the video should be in the same folder than the binary.

vpVideoReader reader;
reader.setFileName(opt_videoname);

Then we extract the first image of the video in the gray level ViSP image container I.

reader.acquire(Iacq);
Iacq.subsample(opt_subsample, opt_subsample, I);

This image I is then converted into cvI, an OpenCV image format that will be used by the tracker.

cv::Mat cvI;

We also create a window associated to I, at position (0,0) in the screen, with "Klt tracking" as title, and display image I.

vpDisplayOpenCV d(I, 0, 0, "Klt tracking");

From now we have to create an instance of the tracker and set the parameters of the Harris keypoint detector.

vpKltOpencv tracker;
tracker.setMaxFeatures(200);
tracker.setWindowSize(10);
tracker.setQuality(0.01);
tracker.setMinDistance(15);
tracker.setHarrisFreeParameter(0.04);
tracker.setBlockSize(9);
tracker.setUseHarris(1);
tracker.setPyramidLevels(3);

The tracker is then initialized on cvI image.

tracker.initTracking(cvI);

With the next line the user can know how many keypoints were detected automatically or selected by the user during initialization.

std::cout << "Tracker initialized with " << tracker.getNbFeatures() << " features" << std::endl;
Note
If no keypoints were found, the next call to vpKltTracker::track() will throw an exception.

To detect more keypoints, you may decrease the quality parameter set with the following line:

tracker.setQuality(0.01);

Until the end of the video, we get I the next image in ViSP format, display and convert it in OpenCV format. Then we track the Harris keypoints using KLT tracker before displaying the keypoints that are tracked with a red cross.

while (!reader.end()) {
double t = vpTime::measureTimeMs();
reader.acquire(Iacq);
Iacq.subsample(opt_subsample, opt_subsample, I);
if (opt_init_by_click && reader.getFrameIndex() == reader.getFirstFrameIndex() + 20) {
std::vector<cv::Point2f> feature;
do {
vpDisplay::displayText(I, 10, 10, "Left click to select a point, right to start tracking", vpColor::red);
if (vpDisplay::getClick(I, ip, button, false)) {
if (button == vpMouseButton::button1) {
feature.push_back(cv::Point2f((float)ip.get_u(), (float)ip.get_v()));
}
}
} while (button != vpMouseButton::button3);
tracker.initTracking(cvI, feature);
}
tracker.track(cvI);
tracker.display(I, vpColor::red);
vpDisplay::displayText(I, 10, 10, "Click to quit", vpColor::red);
if (vpDisplay::getClick(I, false))
break;
if (!reader.isVideoFormat()) {
vpTime::wait(t, 40);
}
}

We are waiting for a mouse click event on image I to end the program.

KLT tracker with re-initialisation

Once initialized, the number of tracked features decreases over the time. Depending on a criteria, it may sense to detect and track new features online. A possible criteria is for example to compare the number of currently tracked features to the initial number of detected features. If less than a given percentage of features are tracked, you can start a new detection.

To get the number of detected or tracked features just call:

tracker.getNbFeatures();

Then the idea is to add the previously tracked features to the list of features that are detected.

The example tutorial-klt-tracker-with-reinit.cpp shows how to do that. In that example we start a new detection on frame 25. Compared to the previous code available in tutorial-klt-tracker.cpp we add the following lines:

if (reader.getFrameIndex() == 25) {
std::cout << "Re initialize the tracker" << std::endl;
// Save of previous features
std::vector<cv::Point2f> prev_features = tracker.getFeatures();
// Start a new feature detection
tracker.initTracking(cvI);
std::vector<cv::Point2f> new_features = tracker.getFeatures();
// Add previous features if they are not to close to detected one
double distance, minDistance_ = tracker.getMinDistance();
bool is_redundant;
for (size_t i=0; i < prev_features.size(); i++) {
// Test if a previous feature is not redundant with one of the newly detected
is_redundant = false;
for (size_t j=0; j < new_features.size(); j++){
distance = sqrt(vpMath::sqr(new_features[j].x-prev_features[i].x)
+ vpMath::sqr(new_features[j].y-prev_features[i].y));
if(distance < minDistance_){
is_redundant = true;
break;
}
}
if(is_redundant){
continue;
}
//std::cout << "Add previous feature with index " << i << std::endl;
tracker.addFeature(prev_features[i]);
}
}
// Track the features
tracker.track(cvI);
static double sqr(double x)
Definition: vpMath.h:203

In this code we do the following:

  • save the features that are tracked until now
  • initialize the tracker to detect new features
  • parse all the saved features and compare them to the newly detected features. If a previous feature is close in terms of geometric distance to a newly detected one, it is rejected (in our case less than 2 pixels). If not, it is added to the list of detected features.

Next tutorial

You are now ready to see the next Tutorial: Moving-edges tracking.