doxygen/visp-daily/tutorial-dnn-object-detection-live_8cpp_source.html

 #include <visp3/core/vpConfig.h>

 #include <visp3/core/vpIoTools.h>

 #include <visp3/detection/vpDetectorDNNOpenCV.h>

 #include <visp3/gui/vpDisplayGDI.h>

 #include <visp3/gui/vpDisplayOpenCV.h>

 #include <visp3/gui/vpDisplayX.h>


 #if defined(HAVE_OPENCV_VIDEOIO)

 #include <opencv2/videoio.hpp>

 #endif


 #ifdef VISP_HAVE_NLOHMANN_JSON

 #include <nlohmann/json.hpp>

 using json = nlohmann::json;

 #endif


 typedef enum

 {

   DETECTION_CONTAINER_MAP = 0,

   DETECTION_CONTAINER_VECTOR = 1,

   DETECTION_CONTAINER_BOTH = 2,

   DETECTION_CONTAINER_COUNT = 3

 } ChosenDetectionContainer;


 std::string chosenDetectionContainerToString(const ChosenDetectionContainer &choice)

 {

   switch (choice) {

   case DETECTION_CONTAINER_MAP:

     return "map";

   case DETECTION_CONTAINER_VECTOR:

     return "vector";

   case DETECTION_CONTAINER_BOTH:

     return "both";

   default:

     break;

   }

   return "unknown";

 }


 ChosenDetectionContainer chosenDetectionContainerFromString(const std::string &choiceStr)

 {

   ChosenDetectionContainer choice(DETECTION_CONTAINER_COUNT);

   bool hasFoundMatch = false;

   for (unsigned int i = 0; i < DETECTION_CONTAINER_COUNT && !hasFoundMatch; i++) {

     ChosenDetectionContainer candidate = (ChosenDetectionContainer)i;

     hasFoundMatch = (chosenDetectionContainerToString(candidate) == vpIoTools::toLowerCase(choiceStr));

     if (hasFoundMatch) {

       choice = candidate;

     }

   }

   return choice;

 }


 std::string getAvailableDetectionContainer()

 {

   std::string availableContainers("< ");

   for (unsigned int i = 0; i < DETECTION_CONTAINER_COUNT - 1; i++) {

     std::string name = chosenDetectionContainerToString((ChosenDetectionContainer)i);

     availableContainers += name + " , ";

   }

   availableContainers +=

     chosenDetectionContainerToString((ChosenDetectionContainer)(DETECTION_CONTAINER_COUNT - 1)) + " >";

   return availableContainers;

 }


 int main(int argc, const char *argv[])

 {

   // Check if std:c++17 or higher

 #if defined(HAVE_OPENCV_DNN) && defined(HAVE_OPENCV_VIDEOIO) && \

     ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))

   try {

     std::string opt_device("0");

     std::string opt_dnn_model = "opencv_face_detector_uint8.pb";

     std::string opt_dnn_config = "opencv_face_detector.pbtxt";

     std::string opt_dnn_framework = "none";

     std::string opt_dnn_label_file = "";

     vpDetectorDNNOpenCV::DNNResultsParsingType opt_dnn_type = vpDetectorDNNOpenCV::RESNET_10;

     int opt_dnn_width = 300, opt_dnn_height = 300;

     double opt_dnn_meanR = 104.0, opt_dnn_meanG = 177.0, opt_dnn_meanB = 123.0;

     double opt_dnn_scale_factor = 1.0;

     bool opt_dnn_swapRB = false;

     bool opt_step_by_step = false;

     float opt_dnn_confThresh = 0.5f;

     float opt_dnn_nmsThresh = 0.4f;

     double opt_dnn_filterThresh = 0.25;

     ChosenDetectionContainer opt_dnn_containerType = DETECTION_CONTAINER_MAP;

     bool opt_verbose = false;

     std::string opt_input_json = "";

     std::string opt_output_json = "";


     for (int i = 1; i < argc; i++) {

       if (std::string(argv[i]) == "--device" && i + 1 < argc) {

         opt_device = std::string(argv[++i]);

       }

       else if (std::string(argv[i]) == "--step-by-step") {

         opt_step_by_step = true;

       }

       else if (std::string(argv[i]) == "--model" && i + 1 < argc) {

         opt_dnn_model = std::string(argv[++i]);

       }

       else if (std::string(argv[i]) == "--type" && i + 1 < argc) {

         opt_dnn_type = vpDetectorDNNOpenCV::dnnResultsParsingTypeFromString(std::string(argv[++i]));

       }

       else if (std::string(argv[i]) == "--config" && i + 1 < argc) {

         opt_dnn_config = std::string(argv[++i]);

         if (opt_dnn_config.find("none") != std::string::npos) {

           opt_dnn_config = std::string();

         }

       }

       else if (std::string(argv[i]) == "--framework" && i + 1 < argc) {

         opt_dnn_framework = std::string(argv[++i]);

         if (opt_dnn_framework.find("none") != std::string::npos) {

           opt_dnn_framework = std::string();

         }

       }

       else if (std::string(argv[i]) == "--width" && i + 1 < argc) {

         opt_dnn_width = atoi(argv[++i]);

       }

       else if (std::string(argv[i]) == "--height" && i + 1 < argc) {

         opt_dnn_height = atoi(argv[++i]);

       }

       else if (std::string(argv[i]) == "--mean" && i + 3 < argc) {

         opt_dnn_meanR = atof(argv[++i]);

         opt_dnn_meanG = atof(argv[++i]);

         opt_dnn_meanB = atof(argv[++i]);

       }

       else if (std::string(argv[i]) == "--scale" && i + 1 < argc) {

         opt_dnn_scale_factor = atof(argv[++i]);

       }

       else if (std::string(argv[i]) == "--swapRB") {

         opt_dnn_swapRB = true;

       }

       else if (std::string(argv[i]) == "--confThresh" && i + 1 < argc) {

         opt_dnn_confThresh = (float)atof(argv[++i]);

       }

       else if (std::string(argv[i]) == "--nmsThresh" && i + 1 < argc) {

         opt_dnn_nmsThresh = (float)atof(argv[++i]);

       }

       else if (std::string(argv[i]) == "--filterThresh" && i + 1 < argc) {

         opt_dnn_filterThresh = atof(argv[++i]);

       }

       else if (std::string(argv[i]) == "--labels" && i + 1 < argc) {

         opt_dnn_label_file = std::string(argv[++i]);

       }

       else if (std::string(argv[i]) == "--container" && i + 1 < argc) {

         opt_dnn_containerType = chosenDetectionContainerFromString(std::string(argv[++i]));

       }

       else if (std::string(argv[i]) == "--input-json" && i + 1 < argc) {

         opt_input_json = std::string(std::string(argv[++i]));

       }

       else if (std::string(argv[i]) == "--output-json" && i + 1 < argc) {

         opt_output_json = std::string(std::string(argv[++i]));

       }

       else if (std::string(argv[i]) == "--verbose" || std::string(argv[i]) == "-v") {

         opt_verbose = true;

       }

       else if (std::string(argv[i]) == "--help" || std::string(argv[i]) == "-h") {

         std::cout << "\nSYNOPSIS " << std::endl

           << argv[0] << " [--device <video>]"

           << " [--model <dnn weights file>]"

           << " [--type <dnn type>]"

           << " [--config <dnn config file]"

           << " [--framework <name>]"

           << " [--width <blob width>] [--height <blob height>]"

           << " [--mean <meanR meanG meanB>]"

           << " [--scale <scale factor>]"

           << " [--swapRB]"

           << " [--confThresh <threshold>]"

           << " [--nmsThresh <threshold>]"

           << " [--filterThresh <threshold>]"

           << " [--labels <file>]"

           << " [--container <type>]"

           << " [--input-json <path_to_input_json>]"

           << " [--output-json <path_to_output_json>]"

           << " [--step-by-step]"

           << " [--verbose, -v]"

           << " [--help, -h]" << std::endl;

         std::cout << "\nOPTIONS " << std::endl

           << "  --device <video>" << std::endl

           << "      Camera device number or video name used to stream images." << std::endl

           << "      To use the first camera found on the bus set 0. On Ubuntu setting 0" << std::endl

           << "      will use /dev/video0 device. To use a video simply put the name of" << std::endl

           << "      the video, like \"path/my-video.mp4\" or \"path/image-%04d.png\"" << std::endl

           << "      if your video is a sequence of images." << std::endl

           << "      Default: " << opt_device << std::endl

           << std::endl

           << "  --model <dnn weights file>" << std::endl

           << "      Path to dnn network trained weights." << std::endl

           << "      Default: " << opt_dnn_model << std::endl

           << std::endl

           << "  --type <dnn type>" << std::endl

           << "      Type of dnn network. Admissible values are in " << std::endl

           << "      " << vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes() << std::endl

           << "      Default: " << opt_dnn_type << std::endl

           << std::endl

           << "  --config <dnn config file>" << std::endl

           << "      Path to dnn network config file or \"none\" not to use one. " << std::endl

           << "      Default: " << opt_dnn_config << std::endl

           << std::endl

           << "  --framework <name>" << std::endl

           << "      Framework name or \"none\" not to specify one. " << std::endl

           << "      Default: " << opt_dnn_framework << std::endl

           << std::endl

           << "  --width <blob width>" << std::endl

           << "      Input images will be resized to this width. " << std::endl

           << "      Default: " << opt_dnn_width << std::endl

           << std::endl

           << "  --height <blob height>" << std::endl

           << "      Input images will be resized to this height. " << std::endl

           << "      Default: " << opt_dnn_height << std::endl

           << std::endl

           << "  --mean <meanR meanG meanB>" << std::endl

           << "      Mean RGB subtraction values. " << std::endl

           << "      Default: " << opt_dnn_meanR << " " << opt_dnn_meanG << " " << opt_dnn_meanB << std::endl

           << std::endl

           << "  --scale <scale factor>" << std::endl

           << "      Scale factor used to normalize the range of pixel values. " << std::endl

           << "      Default: " << opt_dnn_scale_factor << std::endl

           << std::endl

           << "  --swapRB" << std::endl

           << "      When used this option allows to swap Red and Blue channels. " << std::endl

           << std::endl

           << "  --confThresh <threshold>" << std::endl

           << "      Confidence threshold. " << std::endl

           << "      Default: " << opt_dnn_confThresh << std::endl

           << std::endl

           << "  --nmsThresh <threshold>" << std::endl

           << "      Non maximum suppression threshold. " << std::endl

           << "      Default: " << opt_dnn_nmsThresh << std::endl

           << std::endl

           << "  --filterThresh <threshold >" << std::endl

           << "      Filter threshold. Set 0. to disable." << std::endl

           << "      Default: " << opt_dnn_filterThresh << std::endl

           << std::endl

           << "  --labels <file>" << std::endl

           << "      Path to label file either in txt or yaml format. Keep empty if unknown." << std::endl

           << "      Default: \"" << opt_dnn_label_file << "\"" << std::endl

           << std::endl

           << "  --container <type>" << std::endl

           << "      Container type in " << getAvailableDetectionContainer() << std::endl

           << "      Default: " << chosenDetectionContainerToString(opt_dnn_containerType) << std::endl

           << std::endl

           << "  --input-json <path_to_input_json>" << std::endl

           << "      Input JSON file used to configure the DNN. If set, the other arguments will be used to override the values set in the json file." << std::endl

           << "      Default: empty" << std::endl

           << std::endl

           << "  --output-json <type>" << std::endl

           << "      Output JSON file where will be saved the DNN configuration. If empty, does not save the configuration." << std::endl

           << "      Default: empty" << std::endl

           << std::endl

           << "  --step-by-step" << std::endl

           << "      Enable step by step mode, waiting for a user click to process next image." << std::endl

           << std::endl

           << "  --verbose, -v" << std::endl

           << "      Enable verbose mode." << std::endl

           << std::endl

           << "  --help, -h" << std::endl

           << "      Display this helper message." << std::endl

           << std::endl;

         return EXIT_SUCCESS;

       }

     }


     std::cout << "Video device         : " << opt_device << std::endl;

     std::cout << "Label file (optional): " << (opt_dnn_label_file.empty() ? "None" : opt_dnn_label_file) << std::endl;


     cv::VideoCapture capture;

     bool hasCaptureOpeningSucceeded;

     if (vpMath::isNumber(opt_device)) {

       hasCaptureOpeningSucceeded = capture.open(std::atoi(opt_device.c_str()));

     }

     else {

       hasCaptureOpeningSucceeded = capture.open(opt_device);

     }

     if (!hasCaptureOpeningSucceeded) {

       std::cout << "Capture from camera: " << opt_device << " didn't work" << std::endl;

       return EXIT_FAILURE;

     }


     vpImage<vpRGBa> I;

 #if defined(VISP_HAVE_X11)

     vpDisplayX d;

 #elif defined(VISP_HAVE_GDI)

     vpDisplayGDI d;

 #elif defined(HAVE_OPENCV_HIGHGUI)

     vpDisplayOpenCV d;

 #endif

     d.setDownScalingFactor(vpDisplay::SCALE_AUTO);


     if (!opt_dnn_label_file.empty() && !vpIoTools::checkFilename(opt_dnn_label_file)) {

       throw(vpException(vpException::fatalError,

                         "The file containing the classes labels \"" + opt_dnn_label_file + "\" does not exist !"));

     }


     vpDetectorDNNOpenCV dnn;

 #ifdef VISP_HAVE_NLOHMANN_JSON

     if (!opt_input_json.empty()) {

       dnn.initFromJSON(opt_input_json);

     }

 #else

     if (!opt_input_json.empty()) {

       std::cerr << "Error: NLOHMANN JSON library is not installed, please install it following ViSP documentation to configure the vpDetectorDNNOpenCV from a JSON file." << std::endl;

       return EXIT_FAILURE;

     }

 #endif

     else {

       vpDetectorDNNOpenCV::NetConfig netConfig(opt_dnn_confThresh, opt_dnn_nmsThresh, opt_dnn_label_file

         , cv::Size(opt_dnn_width, opt_dnn_height), opt_dnn_filterThresh, cv::Scalar(opt_dnn_meanR, opt_dnn_meanG, opt_dnn_meanB)

         , opt_dnn_scale_factor, opt_dnn_swapRB, opt_dnn_type

         , opt_dnn_model, opt_dnn_config, opt_dnn_framework

       );

       dnn.setNetConfig(netConfig);

     }


     std::cout << dnn.getNetConfig() << std::endl;


 #ifdef VISP_HAVE_NLOHMANN_JSON

     if (!opt_output_json.empty()) {

       dnn.saveConfigurationInJSON(opt_output_json);

     }

 #else

     if (!opt_output_json.empty()) {

       std::cerr << "Error: NLOHMANN JSON library is not installed, please install it following ViSP documentation to save the configuration in a JSON file." << std::endl;

     }

 #endif


     cv::Mat frame;

     while (true) {

       capture >> frame;

       if (frame.empty())

         break;


       if (I.getSize() == 0) {

         vpImageConvert::convert(frame, I);

         d.init(I);

         vpDisplay::setTitle(I, "DNN object detection");

         if (opt_verbose) {

           std::cout << "Process image: " << I.getWidth() << " x " << I.getHeight() << std::endl;

         }

       }

       else {

         vpImageConvert::convert(frame, I);

       }

       if (opt_verbose) {

         std::cout << "Process new image" << std::endl;

       }


       vpDisplay::display(I);


       if (opt_dnn_containerType == DETECTION_CONTAINER_MAP || opt_dnn_containerType == DETECTION_CONTAINER_BOTH) {

         double t = vpTime::measureTimeMs();

         std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D> > detections;

         dnn.detect(frame, detections);

         t = vpTime::measureTimeMs() - t;


         for (auto key_val : detections) {

           if (opt_verbose) {

             std::cout << "  Class name      : " << key_val.first << std::endl;

           }

           for (vpDetectorDNNOpenCV::DetectedFeatures2D detection : key_val.second) {

             if (opt_verbose) {

               std::cout << "  Bounding box    : " << detection.getBoundingBox() << std::endl;

               std::cout << "  Class Id        : " << detection.getClassId() << std::endl;

               if (detection.getClassName())

                 std::cout << "  Class name      : " << detection.getClassName().value() << std::endl;

               std::cout << "  Confidence score: " << detection.getConfidenceScore() << std::endl;

             }

             detection.display(I);

           }

         }


         std::ostringstream oss_map;

         oss_map << "Detection time (map): " << t << " ms";

         if (opt_verbose) {

           // Displaying timing result in console

           std::cout << "  " << oss_map.str() << std::endl;

         }

         // Displaying timing result on the image

         vpDisplay::displayText(I, 60, 20, oss_map.str(), vpColor::red);

       }


       if (opt_dnn_containerType == DETECTION_CONTAINER_VECTOR || opt_dnn_containerType == DETECTION_CONTAINER_BOTH) {

         double t_vector = vpTime::measureTimeMs();

         std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D> detections_vec;

         dnn.detect(frame, detections_vec);

         t_vector = vpTime::measureTimeMs() - t_vector;


         for (auto detection : detections_vec) {

           if (opt_verbose) {

             std::cout << "  Bounding box    : " << detection.getBoundingBox() << std::endl;

             std::cout << "  Class Id        : " << detection.getClassId() << std::endl;

             std::optional<std::string> classname_opt = detection.getClassName();

             std::cout << "  Class name      : " << (classname_opt ? *classname_opt : "Not known") << std::endl;

             std::cout << "  Confidence score: " << detection.getConfidenceScore() << std::endl;

           }

           detection.display(I);

         }


         std::ostringstream oss_vec;

         oss_vec << "Detection time (vector): " << t_vector << " ms";

         if (opt_verbose) {

           // Displaying timing result in console

           std::cout << "  " << oss_vec.str() << std::endl;

         }

         // Displaying timing result on the image

         vpDisplay::displayText(I, 80, 20, oss_vec.str(), vpColor::red);

       }


       // // UI display

       if (opt_step_by_step) {

         vpDisplay::displayText(I, 20, 20, "Left click to display next image", vpColor::red);

       }

       vpDisplay::displayText(I, 40, 20, "Right click to quit", vpColor::red);


       vpDisplay::flush(I);

       vpMouseButton::vpMouseButtonType button;


       if (vpDisplay::getClick(I, button, opt_step_by_step)) {

         if (button == vpMouseButton::button1) {

           // Left click => next image

           continue;

         }

         else if (button == vpMouseButton::button3) {

           // Right click => stop the program

           break;

         }

       }

     }


   }

   catch (const vpException &e) {

     std::cout << e.what() << std::endl;

   }

 #else

   (void)argc;

   (void)argv;

 #endif

   return EXIT_SUCCESS;

 }

vpColor::red
static const vpColor red
Definition: vpColor.h:211

vpDisplayGDI
Display for windows using GDI (available on any windows 32 platform).
Definition: vpDisplayGDI.h:128

vpDisplayOpenCV
The vpDisplayOpenCV allows to display image using the OpenCV library. Thus to enable this class OpenC...
Definition: vpDisplayOpenCV.h:136

vpDisplayX
Use the X11 console to display images on unix-like OS. Thus to enable this class X11 should be instal...
Definition: vpDisplayX.h:128

vpDisplayX::init
void init(vpImage< unsigned char > &I, int win_x=-1, int win_y=-1, const std::string &win_title="") vp_override
Definition: vpDisplayX.cpp:1798

vpDisplay::getClick
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
Definition: vpDisplay_uchar.cpp:850

vpDisplay::setDownScalingFactor
virtual void setDownScalingFactor(unsigned int scale)
Definition: vpDisplay.cpp:227

vpDisplay::display
static void display(const vpImage< unsigned char > &I)
Definition: vpDisplay_uchar.cpp:825

vpDisplay::setTitle
static void setTitle(const vpImage< unsigned char > &I, const std::string &windowtitle)
Definition: vpDisplay_uchar.cpp:1308

vpDisplay::flush
static void flush(const vpImage< unsigned char > &I)
Definition: vpDisplay_uchar.cpp:801

vpDisplay::SCALE_AUTO
@ SCALE_AUTO
Definition: vpDisplay.h:179

vpDisplay::displayText
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
Definition: vpDisplay_uchar.cpp:749

vpException
error that can be emitted by ViSP classes.
Definition: vpException.h:59

vpException::fatalError
@ fatalError
Fatal error.
Definition: vpException.h:84

vpException::what
const char * what() const
Definition: vpException.cpp:70

vpImageConvert::convert
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
Definition: vpImageConvert.cpp:72

vpImage< vpRGBa >

vpImage::getWidth
unsigned int getWidth() const
Definition: vpImage.h:245

vpImage::getSize
unsigned int getSize() const
Definition: vpImage.h:224

vpImage::getHeight
unsigned int getHeight() const
Definition: vpImage.h:184

vpIoTools::toLowerCase
static std::string toLowerCase(const std::string &input)
Return a lower-case version of the string input . Numbers and special characters stay the same.
Definition: vpIoTools.cpp:2111

vpIoTools::checkFilename
static bool checkFilename(const std::string &filename)
Definition: vpIoTools.cpp:1213

vpMath::isNumber
static bool isNumber(const std::string &str)
Definition: vpMath.cpp:215

vpMouseButton::vpMouseButtonType
vpMouseButtonType
Definition: vpMouseButton.h:47

vpMouseButton::button1
@ button1
Definition: vpMouseButton.h:48

vpMouseButton::button3
@ button3
Definition: vpMouseButton.h:50

vpTime::measureTimeMs
VISP_EXPORT double measureTimeMs()