4 #include <visp3/core/vpConfig.h>
7 #if defined(HAVE_OPENCV_DNN) && defined(HAVE_OPENCV_VIDEOIO) && \
8 ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
10 #include <visp3/core/vpIoTools.h>
11 #include <visp3/detection/vpDetectorDNNOpenCV.h>
12 #include <visp3/gui/vpDisplayGDI.h>
13 #include <visp3/gui/vpDisplayOpenCV.h>
14 #include <visp3/gui/vpDisplayX.h>
16 #include <opencv2/videoio.hpp>
18 #ifdef VISP_HAVE_NLOHMANN_JSON
19 #include VISP_NLOHMANN_JSON(json.hpp)
20 using json = nlohmann::json;
23 #ifdef ENABLE_VISP_NAMESPACE
29 DETECTION_CONTAINER_MAP = 0,
30 DETECTION_CONTAINER_VECTOR = 1,
31 DETECTION_CONTAINER_BOTH = 2,
32 DETECTION_CONTAINER_COUNT = 3
33 } ChosenDetectionContainer;
35 std::string chosenDetectionContainerToString(
const ChosenDetectionContainer &choice)
38 case DETECTION_CONTAINER_MAP:
40 case DETECTION_CONTAINER_VECTOR:
42 case DETECTION_CONTAINER_BOTH:
50 ChosenDetectionContainer chosenDetectionContainerFromString(
const std::string &choiceStr)
52 ChosenDetectionContainer choice(DETECTION_CONTAINER_COUNT);
53 bool hasFoundMatch =
false;
54 for (
unsigned int i = 0; i < DETECTION_CONTAINER_COUNT && !hasFoundMatch; i++) {
55 ChosenDetectionContainer candidate = (ChosenDetectionContainer)i;
64 std::string getAvailableDetectionContainer()
66 std::string availableContainers(
"< ");
67 for (
unsigned int i = 0; i < DETECTION_CONTAINER_COUNT - 1; i++) {
68 std::string name = chosenDetectionContainerToString((ChosenDetectionContainer)i);
69 availableContainers += name +
" , ";
71 availableContainers +=
72 chosenDetectionContainerToString((ChosenDetectionContainer)(DETECTION_CONTAINER_COUNT - 1)) +
" >";
73 return availableContainers;
76 int main(
int argc,
const char *argv[])
79 std::string opt_device(
"0");
81 std::string opt_dnn_model =
"opencv_face_detector_uint8.pb";
82 std::string opt_dnn_config =
"opencv_face_detector.pbtxt";
83 std::string opt_dnn_framework =
"none";
84 std::string opt_dnn_label_file =
"";
85 vpDetectorDNNOpenCV::DNNResultsParsingType opt_dnn_type = vpDetectorDNNOpenCV::RESNET_10;
87 int opt_dnn_width = 300, opt_dnn_height = 300;
88 double opt_dnn_meanR = 104.0, opt_dnn_meanG = 177.0, opt_dnn_meanB = 123.0;
89 double opt_dnn_scale_factor = 1.0;
90 bool opt_dnn_swapRB =
false;
91 bool opt_step_by_step =
false;
92 float opt_dnn_confThresh = 0.5f;
93 float opt_dnn_nmsThresh = 0.4f;
94 double opt_dnn_filterThresh = 0.25;
95 ChosenDetectionContainer opt_dnn_containerType = DETECTION_CONTAINER_MAP;
96 bool opt_verbose =
false;
97 std::string opt_input_json =
"";
98 std::string opt_output_json =
"";
100 for (
int i = 1; i < argc; i++) {
101 if (std::string(argv[i]) ==
"--device" && i + 1 < argc) {
102 opt_device = std::string(argv[++i]);
104 else if (std::string(argv[i]) ==
"--step-by-step") {
105 opt_step_by_step =
true;
107 else if (std::string(argv[i]) ==
"--model" && i + 1 < argc) {
108 opt_dnn_model = std::string(argv[++i]);
110 else if (std::string(argv[i]) ==
"--type" && i + 1 < argc) {
111 opt_dnn_type = vpDetectorDNNOpenCV::dnnResultsParsingTypeFromString(std::string(argv[++i]));
113 else if (std::string(argv[i]) ==
"--config" && i + 1 < argc) {
114 opt_dnn_config = std::string(argv[++i]);
115 if (opt_dnn_config.find(
"none") != std::string::npos) {
116 opt_dnn_config = std::string();
119 else if (std::string(argv[i]) ==
"--framework" && i + 1 < argc) {
120 opt_dnn_framework = std::string(argv[++i]);
121 if (opt_dnn_framework.find(
"none") != std::string::npos) {
122 opt_dnn_framework = std::string();
125 else if (std::string(argv[i]) ==
"--width" && i + 1 < argc) {
126 opt_dnn_width = atoi(argv[++i]);
128 else if (std::string(argv[i]) ==
"--height" && i + 1 < argc) {
129 opt_dnn_height = atoi(argv[++i]);
131 else if (std::string(argv[i]) ==
"--mean" && i + 3 < argc) {
132 opt_dnn_meanR = atof(argv[++i]);
133 opt_dnn_meanG = atof(argv[++i]);
134 opt_dnn_meanB = atof(argv[++i]);
136 else if (std::string(argv[i]) ==
"--scale" && i + 1 < argc) {
137 opt_dnn_scale_factor = atof(argv[++i]);
139 else if (std::string(argv[i]) ==
"--swapRB") {
140 opt_dnn_swapRB =
true;
142 else if (std::string(argv[i]) ==
"--confThresh" && i + 1 < argc) {
143 opt_dnn_confThresh = (float)atof(argv[++i]);
145 else if (std::string(argv[i]) ==
"--nmsThresh" && i + 1 < argc) {
146 opt_dnn_nmsThresh = (float)atof(argv[++i]);
148 else if (std::string(argv[i]) ==
"--filterThresh" && i + 1 < argc) {
149 opt_dnn_filterThresh = atof(argv[++i]);
151 else if (std::string(argv[i]) ==
"--labels" && i + 1 < argc) {
152 opt_dnn_label_file = std::string(argv[++i]);
154 else if (std::string(argv[i]) ==
"--container" && i + 1 < argc) {
155 opt_dnn_containerType = chosenDetectionContainerFromString(std::string(argv[++i]));
157 else if (std::string(argv[i]) ==
"--input-json" && i + 1 < argc) {
158 opt_input_json = std::string(std::string(argv[++i]));
160 else if (std::string(argv[i]) ==
"--output-json" && i + 1 < argc) {
161 opt_output_json = std::string(std::string(argv[++i]));
163 else if (std::string(argv[i]) ==
"--verbose" || std::string(argv[i]) ==
"-v") {
166 else if (std::string(argv[i]) ==
"--help" || std::string(argv[i]) ==
"-h") {
167 std::cout <<
"\nSYNOPSIS " << std::endl
168 << argv[0] <<
" [--device <video>]"
169 <<
" [--model <dnn weights file>]"
170 <<
" [--type <dnn type>]"
171 <<
" [--config <dnn config file]"
172 <<
" [--framework <name>]"
173 <<
" [--width <blob width>] [--height <blob height>]"
174 <<
" [--mean <meanR meanG meanB>]"
175 <<
" [--scale <scale factor>]"
177 <<
" [--confThresh <threshold>]"
178 <<
" [--nmsThresh <threshold>]"
179 <<
" [--filterThresh <threshold>]"
180 <<
" [--labels <file>]"
181 <<
" [--container <type>]"
182 <<
" [--input-json <path_to_input_json>]"
183 <<
" [--output-json <path_to_output_json>]"
184 <<
" [--step-by-step]"
185 <<
" [--verbose, -v]"
186 <<
" [--help, -h]" << std::endl;
187 std::cout <<
"\nOPTIONS " << std::endl
188 <<
" --device <video>" << std::endl
189 <<
" Camera device number or video name used to stream images." << std::endl
190 <<
" To use the first camera found on the bus set 0. On Ubuntu setting 0" << std::endl
191 <<
" will use /dev/video0 device. To use a video simply put the name of" << std::endl
192 <<
" the video, like \"path/my-video.mp4\" or \"path/image-%04d.png\"" << std::endl
193 <<
" if your video is a sequence of images." << std::endl
194 <<
" Default: " << opt_device << std::endl
196 <<
" --model <dnn weights file>" << std::endl
197 <<
" Path to dnn network trained weights." << std::endl
198 <<
" Default: " << opt_dnn_model << std::endl
200 <<
" --type <dnn type>" << std::endl
201 <<
" Type of dnn network. Admissible values are in " << std::endl
202 <<
" " << vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes() << std::endl
203 <<
" Default: " << opt_dnn_type << std::endl
205 <<
" --config <dnn config file>" << std::endl
206 <<
" Path to dnn network config file or \"none\" not to use one. " << std::endl
207 <<
" Default: " << opt_dnn_config << std::endl
209 <<
" --framework <name>" << std::endl
210 <<
" Framework name or \"none\" not to specify one. " << std::endl
211 <<
" Default: " << opt_dnn_framework << std::endl
213 <<
" --width <blob width>" << std::endl
214 <<
" Input images will be resized to this width. " << std::endl
215 <<
" Default: " << opt_dnn_width << std::endl
217 <<
" --height <blob height>" << std::endl
218 <<
" Input images will be resized to this height. " << std::endl
219 <<
" Default: " << opt_dnn_height << std::endl
221 <<
" --mean <meanR meanG meanB>" << std::endl
222 <<
" Mean RGB subtraction values. " << std::endl
223 <<
" Default: " << opt_dnn_meanR <<
" " << opt_dnn_meanG <<
" " << opt_dnn_meanB << std::endl
225 <<
" --scale <scale factor>" << std::endl
226 <<
" Scale factor used to normalize the range of pixel values. " << std::endl
227 <<
" Default: " << opt_dnn_scale_factor << std::endl
229 <<
" --swapRB" << std::endl
230 <<
" When used this option allows to swap Red and Blue channels. " << std::endl
232 <<
" --confThresh <threshold>" << std::endl
233 <<
" Confidence threshold. " << std::endl
234 <<
" Default: " << opt_dnn_confThresh << std::endl
236 <<
" --nmsThresh <threshold>" << std::endl
237 <<
" Non maximum suppression threshold. " << std::endl
238 <<
" Default: " << opt_dnn_nmsThresh << std::endl
240 <<
" --filterThresh <threshold >" << std::endl
241 <<
" Filter threshold. Set 0. to disable." << std::endl
242 <<
" Default: " << opt_dnn_filterThresh << std::endl
244 <<
" --labels <file>" << std::endl
245 <<
" Path to label file either in txt or yaml format. Keep empty if unknown." << std::endl
246 <<
" Default: \"" << opt_dnn_label_file <<
"\"" << std::endl
248 <<
" --container <type>" << std::endl
249 <<
" Container type in " << getAvailableDetectionContainer() << std::endl
250 <<
" Default: " << chosenDetectionContainerToString(opt_dnn_containerType) << std::endl
252 <<
" --input-json <path_to_input_json>" << std::endl
253 <<
" Input JSON file used to configure the DNN. If set, the other arguments will be used to override the values set in the json file." << std::endl
254 <<
" Default: empty" << std::endl
256 <<
" --output-json <type>" << std::endl
257 <<
" Output JSON file where will be saved the DNN configuration. If empty, does not save the configuration." << std::endl
258 <<
" Default: empty" << std::endl
260 <<
" --step-by-step" << std::endl
261 <<
" Enable step by step mode, waiting for a user click to process next image." << std::endl
263 <<
" --verbose, -v" << std::endl
264 <<
" Enable verbose mode." << std::endl
266 <<
" --help, -h" << std::endl
267 <<
" Display this helper message." << std::endl
273 std::cout <<
"Video device : " << opt_device << std::endl;
274 std::cout <<
"Label file (optional): " << (opt_dnn_label_file.empty() ?
"None" : opt_dnn_label_file) << std::endl;
276 cv::VideoCapture capture;
277 bool hasCaptureOpeningSucceeded;
279 hasCaptureOpeningSucceeded = capture.open(std::atoi(opt_device.c_str()));
282 hasCaptureOpeningSucceeded = capture.open(opt_device);
284 if (!hasCaptureOpeningSucceeded) {
285 std::cout <<
"Capture from camera: " << opt_device <<
" didn't work" << std::endl;
290 #if defined(VISP_HAVE_X11)
292 #elif defined(VISP_HAVE_GDI)
294 #elif defined(HAVE_OPENCV_HIGHGUI)
301 "The file containing the classes labels \"" + opt_dnn_label_file +
"\" does not exist !"));
304 vpDetectorDNNOpenCV dnn;
305 #ifdef VISP_HAVE_NLOHMANN_JSON
306 if (!opt_input_json.empty()) {
308 dnn.initFromJSON(opt_input_json);
312 if (!opt_input_json.empty()) {
313 std::cerr <<
"Error: NLOHMANN JSON library is not installed, please install it following ViSP documentation to configure the vpDetectorDNNOpenCV from a JSON file." << std::endl;
319 vpDetectorDNNOpenCV::NetConfig netConfig(opt_dnn_confThresh, opt_dnn_nmsThresh, opt_dnn_label_file
320 , cv::Size(opt_dnn_width, opt_dnn_height), opt_dnn_filterThresh, cv::Scalar(opt_dnn_meanR, opt_dnn_meanG, opt_dnn_meanB)
321 , opt_dnn_scale_factor, opt_dnn_swapRB, opt_dnn_type
322 , opt_dnn_model, opt_dnn_config, opt_dnn_framework
324 dnn.setNetConfig(netConfig);
328 std::cout << dnn.getNetConfig() << std::endl;
330 #ifdef VISP_HAVE_NLOHMANN_JSON
331 if (!opt_output_json.empty()) {
332 dnn.saveConfigurationInJSON(opt_output_json);
335 if (!opt_output_json.empty()) {
336 std::cerr <<
"Error: NLOHMANN JSON library is not installed, please install it following ViSP documentation to save the configuration in a JSON file." << std::endl;
343 if (frame.type() == CV_8UC4) {
346 cv::cvtColor(cpy, frame, cv::COLOR_RGBA2BGR);
356 std::cout <<
"Process image: " << I.
getWidth() <<
" x " << I.
getHeight() << std::endl;
363 std::cout <<
"Process new image" << std::endl;
368 if (opt_dnn_containerType == DETECTION_CONTAINER_MAP || opt_dnn_containerType == DETECTION_CONTAINER_BOTH) {
371 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D> > detections;
372 dnn.detect(frame, detections);
377 for (
auto key_val : detections) {
379 std::cout <<
" Class name : " << key_val.first << std::endl;
381 for (vpDetectorDNNOpenCV::DetectedFeatures2D detection : key_val.second) {
383 std::cout <<
" Bounding box : " << detection.getBoundingBox() << std::endl;
384 std::cout <<
" Class Id : " << detection.getClassId() << std::endl;
385 if (detection.getClassName())
386 std::cout <<
" Class name : " << detection.getClassName().value() << std::endl;
387 std::cout <<
" Confidence score: " << detection.getConfidenceScore() << std::endl;
389 detection.display(I);
394 std::ostringstream oss_map;
395 oss_map <<
"Detection time (map): " << t <<
" ms";
398 std::cout <<
" " << oss_map.str() << std::endl;
404 if (opt_dnn_containerType == DETECTION_CONTAINER_VECTOR || opt_dnn_containerType == DETECTION_CONTAINER_BOTH) {
407 std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D> detections_vec;
408 dnn.detect(frame, detections_vec);
413 for (
auto detection : detections_vec) {
415 std::cout <<
" Bounding box : " << detection.getBoundingBox() << std::endl;
416 std::cout <<
" Class Id : " << detection.getClassId() << std::endl;
417 std::optional<std::string> classname_opt = detection.getClassName();
418 std::cout <<
" Class name : " << (classname_opt ? *classname_opt :
"Not known") << std::endl;
419 std::cout <<
" Confidence score: " << detection.getConfidenceScore() << std::endl;
421 detection.display(I);
425 std::ostringstream oss_vec;
426 oss_vec <<
"Detection time (vector): " << t_vector <<
" ms";
429 std::cout <<
" " << oss_vec.str() << std::endl;
436 if (opt_step_by_step) {
458 std::cout << e.
what() << std::endl;
466 #if !defined(HAVE_OPENCV_DNN)
467 std::cout <<
"This tutorial needs OpenCV dnn module that is missing." << std::endl;
469 #if !defined(HAVE_OPENCV_VIDEOIO)
470 std::cout <<
"This tutorial needs OpenCV videoio module that is missing." << std::endl;
472 #if (__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L))
473 std::cout <<
"This tutorial needs std::c++17 standard enabled." << std::endl;
Display for windows using GDI (available on any windows 32 platform).
The vpDisplayOpenCV allows to display image using the OpenCV library. Thus to enable this class OpenC...
void init(vpImage< unsigned char > &I, int winx=-1, int winy=-1, const std::string &title="") VP_OVERRIDE
void setDownScalingFactor(unsigned int scale)
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void display(const vpImage< unsigned char > &I)
static void setTitle(const vpImage< unsigned char > &I, const std::string &windowtitle)
static void flush(const vpImage< unsigned char > &I)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
const char * what() const
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
unsigned int getWidth() const
unsigned int getSize() const
unsigned int getHeight() const
static bool isNumber(const std::string &str)
VISP_EXPORT double measureTimeMs()