Introduction

This tutorial shows how to detect one or more faces with ViSP. Face detection is performed using OpenCV Haar cascade capabilities that are used in vpDetectorFace class. At least OpenCV 2.2.0 or a more recent version is requested.

In the next sections you will find examples that show how to detect faces in a video, or in images acquired by a camera connected to your computer.

Note that all the material (source code and image) described in this tutorial is part of ViSP source code (in tutorial/detection/face folder) and could be found in https://github.com/lagadic/visp/tree/master/tutorial/detection/face.

Face detection in a video

The following example also available in tutorial-face-detector.cpp allows to detect faces in an mpeg video located near the source code. The Haar cascade classifier file requested by OpenCV is also provided in the same folder as the source code.

#include <visp3/core/vpConfig.h>
#include <visp3/gui/vpDisplayFactory.h>
#include <visp3/detection/vpDetectorFace.h>
#include <visp3/io/vpVideoReader.h>
 
int main(int argc, const char *argv[])
{
#if defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC) && \
  (((VISP_HAVE_OPENCV_VERSION < 0x050000) && defined(HAVE_OPENCV_OBJDETECT)) || ((VISP_HAVE_OPENCV_VERSION >= 0x050000) && defined(HAVE_OPENCV_XOBJDETECT)))
#ifdef ENABLE_VISP_NAMESPACE
  using namespace VISP_NAMESPACE_NAME;
#endif
#if (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_11)
  std::shared_ptr<vpDisplay> display;
#else
  vpDisplay *display = nullptr;
#endif
  try {
    std::string opt_face_cascade_name = "./haarcascade_frontalface_alt.xml";
    std::string opt_video = "video.mp4";
 
    for (int i = 1; i < argc; i++) {
      if (std::string(argv[i]) == "--haar" && i + 1 < argc) {
        opt_face_cascade_name = std::string(argv[++i]);
      }
      else if (std::string(argv[i]) == "--video" && i + 1 < argc) {
        opt_video = std::string(argv[++i]);
      }
      else if ((std::string(argv[i]) == "--help") || (std::string(argv[i]) == "-h")) {
        std::cout << "Usage: " << argv[0]
          << " [--haar <haarcascade xml filename>]"
          << " [--video <input video file>]"
          << " [--help] [-h]" << std::endl;
        return EXIT_SUCCESS;
      }
    }
 
    vpImage<unsigned char> I;
 
    vpVideoReader g;
    g.setFileName(opt_video);
    g.open(I);
 
#if (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_11)
    display = vpDisplayFactory::createDisplay(I);
#else
    display = vpDisplayFactory::allocateDisplay(I);
#endif
    vpDisplay::setTitle(I, "ViSP viewer");
 
    vpDetectorFace face_detector;
    face_detector.setCascadeClassifierFile(opt_face_cascade_name);
 
    bool exit_requested = false;
    while (!g.end() && !exit_requested) {
      g.acquire(I);
 
      vpDisplay::display(I);
      bool face_found = face_detector.detect(I);
 
      if (face_found) {
        std::ostringstream text;
        text << "Found " << face_detector.getNbObjects() << " face(s)";
        vpDisplay::displayText(I, 10, 10, text.str(), vpColor::red);
        for (size_t i = 0; i < face_detector.getNbObjects(); i++) {
          vpRect bbox = face_detector.getBBox(i);
          vpDisplay::displayRectangle(I, bbox, vpColor::green, false, 4);
          vpDisplay::displayText(I, (int)bbox.getTop() - 10, (int)bbox.getLeft(),
                                 "Message: \"" + face_detector.getMessage(i) + "\"", vpColor::red);
        }
      }
      vpDisplay::displayText(I, (int)I.getHeight() - 25, 10, "Click to quit...", vpColor::red);
      vpDisplay::flush(I);
      if (vpDisplay::getClick(I, false)) // a click to exit
        exit_requested = true;
    }
    if (!exit_requested)
      vpDisplay::getClick(I);
  }
  catch (const vpException &e) {
    std::cout << e.getMessage() << std::endl;
  }
#if (VISP_CXX_STANDARD < VISP_CXX_STANDARD_11)
  if (display != nullptr) {
    delete display;
  }
#endif
#else
 
#if !defined(HAVE_OPENCV_HIGHGUI)
  std::cout << "This tutorial needs OpenCV highgui module that is missing." << std::endl;
#endif
#if !defined(HAVE_OPENCV_IMGPROC)
  std::cout << "This tutorial needs OpenCV imgproc module that is missing." << std::endl;
#endif
#if (VISP_HAVE_OPENCV_VERSION < 0x050000) && !defined(HAVE_OPENCV_OBJDETECT)
  std::cout << "This tutorial needs OpenCV objdetect module that is missing." << std::endl;
#endif
#if ((VISP_HAVE_OPENCV_VERSION >= 0x050000) && !defined(HAVE_OPENCV_XOBJDETECT))
  std::cout << "This tutorial needs OpenCV xobjdetect module that is missing." << std::endl;
#endif
 
  (void)argc;
  (void)argv;
#endif
}

To detect the faces just run:

$ ./tutorial-face-detector

You will get the following result:

Now we explain the main lines of the source.

First we have to include the header of the class that allows to detect a face.

#include <visp3/detection/vpDetectorFace.h>

Then in the main() function before going further we need to check if OpenCV 2.2.0 is available.

#if defined(HAVE_OPENCV_HIGHGUI) && defined(HAVE_OPENCV_IMGPROC) && \
  (((VISP_HAVE_OPENCV_VERSION < 0x050000) && defined(HAVE_OPENCV_OBJDETECT)) || ((VISP_HAVE_OPENCV_VERSION >= 0x050000) && defined(HAVE_OPENCV_XOBJDETECT)))
#ifdef ENABLE_VISP_NAMESPACE
  using namespace VISP_NAMESPACE_NAME;
#endif

We set then the default input data:

the name of the Haar cascade classifier file "haarcascade_frontalface_alt.xml"
the name of the input video "video.mpeg"

std::string opt_face_cascade_name = "./haarcascade_frontalface_alt.xml";

std::string opt_video = "video.mp4";

With command line options it is possible to use other inputs. To know how just run:

$ ./tutorial-face-detector --help

Usage: ./tutorial-face-detector [--haar <haarcascade xml filename>] [--video <input video file>] [--help]

Then we open the video stream, create a windows named "ViSP viewer" where images and the resulting face detection will be displayed.

The creation of the face detector is performed using

vpDetectorFace face_detector;

We need also to set the location and name of the xml file that contains the Haar cascade classifier data used to recognized a face.

face_detector.setCascadeClassifierFile(opt_face_cascade_name);

Then we enter in the while loop where for each new image, the try to detect one or more faces:

bool face_found = face_detector.detect(I);

If a face is detected, vpDetectorFace::detect() returns true. It is then possible to retrieve the number of faces that are detected:

text << "Found " << face_detector.getNbObjects() << " face(s)";

For each face, we have access to its location using vpDetectorFace::getPolygon(), its bounding box using vpDetectorFace::getBBox() and its identifier message using vpDetectorFace::getMessage().

        for (size_t i = 0; i < face_detector.getNbObjects(); i++) {
          vpRect bbox = face_detector.getBBox(i);
          vpDisplay::displayRectangle(I, bbox, vpColor::green, false, 4);
          vpDisplay::displayText(I, (int)bbox.getTop() - 10, (int)bbox.getLeft(),
                                 "Message: \"" + face_detector.getMessage(i) + "\"", vpColor::red);
        }

Note: When more than one face is detected, faces are ordered from the largest to the smallest. That means that vpDetectorFace::getPolygon(0), vpDetectorFace::getBBox(0) and vpDetectorFace::getMessage(0) return always the characteristics of the largest face.

Face detection from a camera

This other example also available in tutorial-face-detector-live.cpp shows how to detect one or more faces in images acquired by a camera connected to your computer.

#include <iostream>
 
#include <visp3/core/vpConfig.h>
 
// Comment / uncomment following lines to use the specific 3rd party compatible with your camera
// #undef VISP_HAVE_V4L2
// #undef HAVE_OPENCV_HIGHGUI
// #undef HAVE_OPENCV_VIDEOIO
 
#if defined(HAVE_OPENCV_IMGPROC) \
  && (((VISP_HAVE_OPENCV_VERSION < 0x050000) && defined(HAVE_OPENCV_OBJDETECT)) || ((VISP_HAVE_OPENCV_VERSION >= 0x050000) && defined(HAVE_OPENCV_XOBJDETECT))) \
  && (defined(VISP_HAVE_V4L2) || (((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI)) || ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_VIDEOIO))))
 
#include <visp3/detection/vpDetectorFace.h>
#include <visp3/gui/vpDisplayFactory.h>
#ifdef VISP_HAVE_MODULE_SENSOR
#include <visp3/sensor/vpV4l2Grabber.h>
#endif
 
#if (VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI)
#include <opencv2/highgui/highgui.hpp> // for cv::VideoCapture
#elif (VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_VIDEOIO)
#include <opencv2/videoio/videoio.hpp>
#endif
 
int main(int argc, const char *argv[])
{
#ifdef ENABLE_VISP_NAMESPACE
  using namespace VISP_NAMESPACE_NAME;
#endif
#if (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_11)
  std::shared_ptr<vpDisplay> display;
#else
  vpDisplay *display = nullptr;
#endif
  try {
    std::string opt_face_cascade_name = "./haarcascade_frontalface_alt.xml";
    unsigned int opt_device = 0;
    unsigned int opt_scale = 2; // Default value is 2 in the constructor. Turn
    // it to 1 to avoid subsampling
 
    for (int i = 1; i < argc; i++) {
      if (std::string(argv[i]) == "--haar" && i + 1 < argc) {
        opt_face_cascade_name = std::string(argv[++i]);
      }
      else if (std::string(argv[i]) == "--device" && i + 1 < argc) {
        opt_device = (unsigned int)atoi(argv[++i]);
      }
      else if (std::string(argv[i]) == "--scale" && i + 1 < argc) {
        opt_scale = (unsigned int)atoi(argv[++i]);
      }
      else if ((std::string(argv[i]) == "--help") || (std::string(argv[i]) == "-h")) {
        std::cout << "Usage: " << argv[0]
          << " [--haar <haarcascade xml filename>]"
          << " [--device <camera device>]"
          << " [--scale <subsampling factor>]"
          << " [--help] [-h]"
          << std::endl;
        return EXIT_SUCCESS;
      }
    }
 
    vpImage<unsigned char> I; // for gray images
 
#if defined(VISP_HAVE_V4L2)
    vpV4l2Grabber g;
    std::ostringstream device;
    device << "/dev/video" << opt_device;
    g.setDevice(device.str());
    g.setScale(opt_scale); // Default value is 2 in the constructor. Turn it
    // to 1 to avoid subsampling
    g.acquire(I);
#elif ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI))|| ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_VIDEOIO))
    cv::VideoCapture cap(opt_device); // open the default camera
#if (VISP_HAVE_OPENCV_VERSION >= 0x030000)
    int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
    int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
    cap.set(cv::CAP_PROP_FRAME_WIDTH, width / opt_scale);
    cap.set(cv::CAP_PROP_FRAME_HEIGHT, height / opt_scale);
#else
    int width = cap.get(CV_CAP_PROP_FRAME_WIDTH);
    int height = cap.get(CV_CAP_PROP_FRAME_HEIGHT);
    cap.set(CV_CAP_PROP_FRAME_WIDTH, width / opt_scale);
    cap.set(CV_CAP_PROP_FRAME_HEIGHT, height / opt_scale);
#endif
    if (!cap.isOpened()) { // check if we succeeded
      std::cout << "Failed to open the camera" << std::endl;
      return EXIT_FAILURE;
    }
    cv::Mat frame;
    cap >> frame; // get a new frame from camera
    vpImageConvert::convert(frame, I);
#endif
 
#if (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_11)
    display = vpDisplayFactory::createDisplay(I);
#else
    display = vpDisplayFactory::allocateDisplay(I);
#endif
    vpDisplay::setTitle(I, "ViSP viewer");
 
    vpDetectorFace face_detector;
    face_detector.setCascadeClassifierFile(opt_face_cascade_name);
 
    while (1) {
      double t = vpTime::measureTimeMs();
#if defined(VISP_HAVE_V4L2)
      g.acquire(I);
      bool face_found = face_detector.detect(I);
#elif ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI))|| ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_VIDEOIO))
      cap >> frame; // get a new frame from camera
      vpImageConvert::convert(frame, I);
      bool face_found = face_detector.detect(frame); // We pass frame to avoid an internal image conversion
#endif
 
      vpDisplay::display(I);
 
      if (face_found) {
        std::ostringstream text;
        text << "Found " << face_detector.getNbObjects() << " face(s)";
        vpDisplay::displayText(I, 10, 10, text.str(), vpColor::red);
        for (size_t i = 0; i < face_detector.getNbObjects(); i++) {
          vpRect bbox = face_detector.getBBox(i);
          vpDisplay::displayRectangle(I, bbox, vpColor::green, false, 4);
          vpDisplay::displayText(I, (int)bbox.getTop() - 10, (int)bbox.getLeft(),
                                 "Message: \"" + face_detector.getMessage(i) + "\"", vpColor::red);
        }
      }
      vpDisplay::displayText(I, (int)I.getHeight() - 25, 10, "Click to quit...", vpColor::red);
      vpDisplay::flush(I);
      if (vpDisplay::getClick(I, false)) // a click to exit
        break;
 
      std::cout << "Loop time: " << vpTime::measureTimeMs() - t << " ms" << std::endl;
    }
  }
  catch (const vpException &e) {
    std::cout << e.getMessage() << std::endl;
  }
#if (VISP_CXX_STANDARD < VISP_CXX_STANDARD_11)
  if (display != nullptr) {
    delete display;
  }
#endif
}
 
#else
 
int main()
{
#if !defined(HAVE_OPENCV_IMGPROC)
  std::cout << "This tutorial needs OpenCV imgproc module that is missing." << std::endl;
#endif
#if (VISP_HAVE_OPENCV_VERSION < 0x050000) && !defined(HAVE_OPENCV_OBJDETECT)
  std::cout << "This tutorial needs OpenCV objdetect module that is missing." << std::endl;
#endif
#if ((VISP_HAVE_OPENCV_VERSION >= 0x050000) && !defined(HAVE_OPENCV_XOBJDETECT))
  std::cout << "This tutorial needs OpenCV xobjdetect module that is missing." << std::endl;
#endif
  }
 
#endif

The usage of this example is similar to the previous one. Just run

$ ./tutorial-face-detector-live

Additional command line options are available to specify the location of the Haar cascade file and also the camera identifier if more than one camera is connected to your computer:

$ ./tutorial-face-detector-live --help

Usage: ./tutorial-face-detector-live [--device <camera device>] [--haar <haarcascade xml filename>] [--help]

The source code of this example is very similar to the previous one except that here we use camera framegrabber devices (see Tutorial: Image frame grabbing). Two different grabber may be used:

If ViSP was build with Video For Linux (V4L2) support available for example on Fedora or Ubuntu distribution, VISP_HAVE_V4L2 macro is defined. In that case, images coming from an USB camera are acquired using vpV4l2Grabber class.
If ViSP wasn't build with V4L2 support, but with OpenCV we use cv::VideoCapture class to grab the images. Notice that when images are acquired with OpenCV there is an additional conversion from cv::Mat to vpImage.

#if defined(VISP_HAVE_V4L2)
    vpV4l2Grabber g;
    std::ostringstream device;
    device << "/dev/video" << opt_device;
    g.setDevice(device.str());
    g.setScale(opt_scale); // Default value is 2 in the constructor. Turn it
    // to 1 to avoid subsampling
    g.acquire(I);
#elif ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI))|| ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_VIDEOIO))
    cv::VideoCapture cap(opt_device); // open the default camera
#if (VISP_HAVE_OPENCV_VERSION >= 0x030000)
    int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH);
    int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT);
    cap.set(cv::CAP_PROP_FRAME_WIDTH, width / opt_scale);
    cap.set(cv::CAP_PROP_FRAME_HEIGHT, height / opt_scale);
#else
    int width = cap.get(CV_CAP_PROP_FRAME_WIDTH);
    int height = cap.get(CV_CAP_PROP_FRAME_HEIGHT);
    cap.set(CV_CAP_PROP_FRAME_WIDTH, width / opt_scale);
    cap.set(CV_CAP_PROP_FRAME_HEIGHT, height / opt_scale);
#endif
    if (!cap.isOpened()) { // check if we succeeded
      std::cout << "Failed to open the camera" << std::endl;
      return EXIT_FAILURE;
    }
    cv::Mat frame;
    cap >> frame; // get a new frame from camera
    vpImageConvert::convert(frame, I);
#endif

Then in the while loop, at each iteration we acquire a new image

#if defined(VISP_HAVE_V4L2)
      g.acquire(I);
      bool face_found = face_detector.detect(I);
#elif ((VISP_HAVE_OPENCV_VERSION < 0x030000) && defined(HAVE_OPENCV_HIGHGUI))|| ((VISP_HAVE_OPENCV_VERSION >= 0x030000) && defined(HAVE_OPENCV_VIDEOIO))
      cap >> frame; // get a new frame from camera
      vpImageConvert::convert(frame, I);
      bool face_found = face_detector.detect(frame); // We pass frame to avoid an internal image conversion
#endif

This new image is then given as input to the face detector.

Next tutorial

You are now ready to see the Tutorial: Object detection and localization, that illustrates the case of object detection.

Table of Contents

Introduction

Face detection in a video

Face detection from a camera

Next tutorial