Visual Servoing Platform  version 3.3.1 under development (2020-08-12)
tutorial-dnn-object-detection-live.cpp
1 #include <visp3/core/vpConfig.h>
3 #include <visp3/detection/vpDetectorDNN.h>
4 #include <visp3/gui/vpDisplayGDI.h>
5 #include <visp3/gui/vpDisplayOpenCV.h>
6 #include <visp3/gui/vpDisplayX.h>
7 
8 int main(int argc, const char *argv[])
9 {
10 #if (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(VISP_HAVE_OPENCV_DNN)
11  try {
12  int opt_device = 0;
13  std::string input = "";
15  std::string model = "opencv_face_detector_uint8.pb";
16  std::string config = "opencv_face_detector.pbtxt";
18  int inputWidth = 300, inputHeight = 300;
19  double meanR = 104.0, meanG = 177.0, meanB = 123.0;
20  double scaleFactor = 1.0;
21  bool swapRB = false;
22  float confThresh = 0.5f;
23  float nmsThresh = 0.4f;
24  std::string labelFile = "";
25  for (int i = 1; i < argc; i++) {
26  if (std::string(argv[i]) == "--device" && i+1 < argc) {
27  opt_device = atoi(argv[i+1]);
28  } else if (std::string(argv[i]) == "--input" && i+1 < argc) {
29  input = std::string(argv[i+1]);
30  } else if (std::string(argv[i]) == "--model" && i+1 < argc) {
31  model = std::string(argv[i+1]);
32  } else if (std::string(argv[i]) == "--config" && i+1 < argc) {
33  config = std::string(argv[i+1]);
34  } else if (std::string(argv[i]) == "--width" && i+1 < argc) {
35  inputWidth = atoi(argv[i+1]);
36  } else if (std::string(argv[i]) == "--height" && i+1 < argc) {
37  inputHeight = atoi(argv[i+1]);
38  } else if (std::string(argv[i]) == "--mean" && i+3 < argc) {
39  meanR = atof(argv[i+1]);
40  meanG = atof(argv[i+2]);
41  meanB = atof(argv[i+3]);
42  } else if (std::string(argv[i]) == "--scale" && i+1 < argc) {
43  scaleFactor = atof(argv[i+1]);
44  } else if (std::string(argv[i]) == "--swapRB") {
45  swapRB = true;
46  } else if (std::string(argv[i]) == "--confThresh" && i+1 < argc) {
47  confThresh = (float)atof(argv[i+1]);
48  } else if (std::string(argv[i]) == "--nmsThresh" && i+1 < argc) {
49  nmsThresh = (float)atof(argv[i+1]);
50  } else if (std::string(argv[i]) == "--labels" && i+1 < argc) {
51  labelFile = std::string(argv[i+1]);
52  } else if (std::string(argv[i]) == "--help" || std::string(argv[i]) == "-h") {
53  std::cout << argv[0] << " --device <camera device number> --input <path to image or video>"
54  " (camera is used if input is empty) --model <path to net trained weights>"
55  " --config <path to net config file>"
56  " --width <blob width> --height <blob height>"
57  " -- mean <meanR meanG meanB> --scale <scale factor>"
58  " --swapRB --confThresh <confidence threshold>"
59  " --nmsThresh <NMS threshold> --labels <path to label file>" << std::endl;
60  return EXIT_SUCCESS;
61  }
62  }
63 
64  std::cout << "Model: " << model << std::endl;
65  std::cout << "Config: " << config << std::endl;
66  std::cout << "Width: " << inputWidth << std::endl;
67  std::cout << "Height: " << inputHeight << std::endl;
68  std::cout << "Mean: " << meanR << ", " << meanG << ", " << meanB << std::endl;
69  std::cout << "Scale: " << scaleFactor << std::endl;
70  std::cout << "Swap RB? " << swapRB << std::endl;
71  std::cout << "Confidence threshold: " << confThresh << std::endl;
72  std::cout << "NMS threshold: " << nmsThresh << std::endl;
73 
74  cv::VideoCapture capture;
75  if (input.empty()) {
76  capture.open(opt_device);
77  } else {
78  capture.open(input);
79  }
80 
82 #if defined(VISP_HAVE_X11)
83  vpDisplayX d;
84 #elif defined(VISP_HAVE_GDI)
85  vpDisplayGDI d;
86 #elif defined(VISP_HAVE_OPENCV)
88 #endif
89 
91  vpDetectorDNN dnn;
92  dnn.readNet(model, config);
93  dnn.setInputSize(inputWidth, inputHeight);
94  dnn.setMean(meanR, meanG, meanB);
95  dnn.setScaleFactor(scaleFactor);
96  dnn.setSwapRB(swapRB);
97  dnn.setConfidenceThreshold(confThresh);
98  dnn.setNMSThreshold(nmsThresh);
100 
101  std::vector<std::string> labels;
102  if (!labelFile.empty()) {
103  std::ifstream f_label(labelFile);
104  std::string line;
105  while (std::getline(f_label, line)) {
106  labels.push_back(line);
107  }
108  }
109 
110  cv::Mat frame;
111  while (true) {
112  capture >> frame;
113  if (frame.empty())
114  break;
115 
116  if (I.getSize() == 0) {
117  vpImageConvert::convert(frame, I);
118  d.init(I);
119  vpDisplay::setTitle(I, "DNN object detection");
120  } else {
121  vpImageConvert::convert(frame, I);
122  }
123 
124  double t = vpTime::measureTimeMs();
126  std::vector<vpRect> boundingBoxes;
127  dnn.detect(I, boundingBoxes);
129  t = vpTime::measureTimeMs() - t;
130 
132 
134  std::vector<int> classIds = dnn.getDetectionClassIds();
135  std::vector<float> confidences = dnn.getDetectionConfidence();
137  for (size_t i = 0; i < boundingBoxes.size(); i++) {
138  vpDisplay::displayRectangle(I, boundingBoxes[i], vpColor::red, false, 2);
139 
140  std::ostringstream oss;
141  if (labels.empty())
142  oss << "class: " << classIds[i];
143  else
144  oss << labels[classIds[i]];
145  oss << " - conf: " << confidences[i];
146 
147  vpDisplay::displayText(I, (int)boundingBoxes[i].getTop()-10, (int)boundingBoxes[i].getLeft()+10,
148  oss.str(), vpColor::red);
149  }
150  std::ostringstream oss;
151  oss << "Detection time: " << t << " ms";
152  vpDisplay::displayText(I, 20, 20, oss.str(), vpColor::red);
153 
154  vpDisplay::flush(I);
155  if (vpDisplay::getClick(I, false))
156  break;
157  }
158 
159  } catch (const vpException &e) {
160  std::cout << e.what() << std::endl;
161  }
162 #else
163  (void)argc;
164  (void)argv;
165 #endif
166  return EXIT_SUCCESS;
167 }
void setMean(double meanR, double meanG, double meanB)
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
Display for windows using GDI (available on any windows 32 platform).
Definition: vpDisplayGDI.h:128
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
Use the X11 console to display images on unix-like OS. Thus to enable this class X11 should be instal...
Definition: vpDisplayX.h:150
error that can be emited by ViSP classes.
Definition: vpException.h:71
void init(vpImage< unsigned char > &I, int winx=-1, int winy=-1, const std::string &title="")
std::vector< int > getDetectionClassIds(bool afterNMS=true) const
static void flush(const vpImage< unsigned char > &I)
VISP_EXPORT double measureTimeMs()
Definition: vpTime.cpp:126
static const vpColor red
Definition: vpColor.h:217
std::vector< float > getDetectionConfidence(bool afterNMS=true) const
void readNet(const std::string &model, const std::string &config="", const std::string &framework="")
void setSwapRB(bool swapRB)
static void display(const vpImage< unsigned char > &I)
The vpDisplayOpenCV allows to display image using the OpenCV library. Thus to enable this class OpenC...
static void displayRectangle(const vpImage< unsigned char > &I, const vpImagePoint &topLeft, unsigned int width, unsigned int height, const vpColor &color, bool fill=false, unsigned int thickness=1)
void setScaleFactor(double scaleFactor)
const char * what() const
void setNMSThreshold(float nmsThreshold)
void setInputSize(int width, int height)
void setConfidenceThreshold(float confThreshold)
unsigned int getSize() const
Definition: vpImage.h:227
static void setTitle(const vpImage< unsigned char > &I, const std::string &windowtitle)
virtual bool detect(const vpImage< unsigned char > &I)