34 #ifndef VP_DETECTOR_DNN_OPENCV_H
35 #define VP_DETECTOR_DNN_OPENCV_H
37 #include <visp3/core/vpConfig.h>
41 #if (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(HAVE_OPENCV_DNN) && \
42 ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
48 #include <opencv2/dnn.hpp>
50 #include <visp3/core/vpColor.h>
51 #include <visp3/core/vpDisplay.h>
52 #include <visp3/core/vpImage.h>
53 #include <visp3/core/vpRect.h>
57 #ifdef VISP_HAVE_NLOHMANN_JSON
58 #include VISP_NLOHMANN_JSON(json.hpp)
85 class VISP_EXPORT vpDetectorDNNOpenCV
93 typedef enum DNNResultsParsingType
106 } DNNResultsParsingType;
108 typedef struct DetectionCandidates
110 std::vector< float > m_confidences;
111 std::vector< cv::Rect > m_boxes;
112 std::vector< int > m_classIds;
113 } DetectionCandidates;
120 typedef class DetectedFeatures2D
126 std::optional<std::string> m_classname;
139 inline explicit DetectedFeatures2D(
double u_min,
double u_max
140 ,
double v_min,
double v_max
141 ,
unsigned int cls,
double score
142 ,
const std::optional<std::string> &classname
149 m_classname = classname;
152 m_classname = std::nullopt;
159 inline vpRect getBoundingBox()
const {
return m_bbox; }
163 inline double getConfidenceScore()
const {
return m_score; }
167 inline unsigned int getClassId()
const {
return m_cls; }
171 inline std::optional<std::string> getClassName()
const {
return m_classname; }
173 template <
typename Type >
176 friend vpDetectorDNNOpenCV;
177 } DetectedFeatures2D;
183 typedef class NetConfig
186 float m_confThreshold;
187 float m_nmsThreshold;
188 std::vector<std::string> m_classNames;
189 cv::Size m_inputSize;
190 double m_filterSizeRatio;
193 double m_scaleFactor;
195 DNNResultsParsingType m_parsingMethodType;
196 std::string m_modelFilename;
197 std::string m_modelConfigFilename;
198 std::string m_framework;
200 #ifdef VISP_HAVE_NLOHMANN_JSON
208 friend inline void from_json(
const nlohmann::json &j, NetConfig &config)
210 config.m_confThreshold = j.value(
"confidenceThreshold", config.m_confThreshold);
211 if (config.m_confThreshold <= 0) {
215 config.m_nmsThreshold = j.value(
"nmsThreshold", config.m_nmsThreshold);
216 if (config.m_nmsThreshold <= 0) {
220 config.m_filterSizeRatio = j.value(
"filterSizeRatio", config.m_filterSizeRatio);
222 config.m_classNames = j.value(
"classNames", config.m_classNames);
224 std::pair<unsigned int, unsigned int> resolution = j.value(
"resolution", std::pair<unsigned int, unsigned int>(config.m_inputSize.width, config.m_inputSize.height));
225 config.m_inputSize.width = resolution.first;
226 config.m_inputSize.height = resolution.second;
228 std::vector<double> v_mean = j.value(
"mean", std::vector<double>({ config.m_mean[0], config.m_mean[1], config.m_mean[2] }));
229 if (v_mean.size() != 3) {
232 config.m_mean = cv::Scalar(v_mean[0], v_mean[1], v_mean[2]);
234 config.m_scaleFactor = j.value(
"scale", config.m_scaleFactor);
235 config.m_swapRB = j.value(
"swapRB", config.m_swapRB);
236 config.m_parsingMethodType = dnnResultsParsingTypeFromString(j.value(
"parsingType", dnnResultsParsingTypeToString(config.m_parsingMethodType)));
237 config.m_modelFilename = j.value(
"modelFile", config.m_modelFilename);
238 config.m_modelConfigFilename = j.value(
"configurationFile", config.m_modelConfigFilename);
239 config.m_framework = j.value(
"framework", config.m_framework);
248 friend inline void to_json(nlohmann::json &j,
const NetConfig &config)
250 std::pair<unsigned int, unsigned int> resolution = { config.m_inputSize.width, config.m_inputSize.height };
251 std::vector<double> v_mean = { config.m_mean[0], config.m_mean[1], config.m_mean[2] };
253 {
"confidenceThreshold", config.m_confThreshold } ,
254 {
"nmsThreshold" , config.m_nmsThreshold } ,
255 {
"filterSizeRatio" , config.m_filterSizeRatio} ,
256 {
"classNames" , config.m_classNames } ,
257 {
"resolution" , resolution } ,
259 {
"scale" , config.m_scaleFactor } ,
260 {
"swapRB" , config.m_swapRB } ,
261 {
"parsingType" , dnnResultsParsingTypeToString(config.m_parsingMethodType) },
262 {
"modelFile" , config.m_modelFilename } ,
263 {
"configurationFile" , config.m_modelConfigFilename } ,
264 {
"framework" , config.m_framework }
292 inline static std::vector<std::string> parseClassNamesFile(
const std::string &filename)
294 std::vector<std::string> classNames;
295 std::ifstream ifs(filename);
297 while (getline(ifs, line)) {
298 if (line.find(
"[") == std::string::npos) {
299 classNames.push_back(line);
302 std::string lineWithoutBracket;
303 if (line.find(
"[") != std::string::npos) {
304 lineWithoutBracket = line.substr(line.find(
"[") + 1, line.size() - 2);
307 while (!lineWithoutBracket.empty()) {
308 std::string className;
309 auto start_pos = lineWithoutBracket.find(
"\"");
310 auto end_pos = lineWithoutBracket.find(
"\"", start_pos + 1);
311 className = lineWithoutBracket.substr(start_pos + 1, end_pos - (start_pos + 1));
312 if (!className.empty()) {
313 classNames.push_back(className);
314 lineWithoutBracket = lineWithoutBracket.substr(end_pos + 1);
326 : m_confThreshold(0.5f)
327 , m_nmsThreshold(0.4f)
329 , m_inputSize(300, 300)
330 , m_filterSizeRatio(0.)
331 , m_mean(127.5, 127.5, 127.5)
332 , m_scaleFactor(2.0 / 255.0)
334 , m_parsingMethodType(vpDetectorDNNOpenCV::USER_SPECIFIED)
336 , m_modelConfigFilename()
342 inline NetConfig(
const NetConfig &config)
343 : m_confThreshold(config.m_confThreshold)
344 , m_nmsThreshold(config.m_nmsThreshold)
345 , m_classNames(config.m_classNames)
346 , m_inputSize(config.m_inputSize.width, config.m_inputSize.height)
347 , m_filterSizeRatio(config.m_filterSizeRatio)
348 , m_mean(cv::Scalar(config.m_mean[0], config.m_mean[1], config.m_mean[2]))
349 , m_scaleFactor(config.m_scaleFactor)
350 , m_swapRB(config.m_swapRB)
351 , m_parsingMethodType(config.m_parsingMethodType)
352 , m_modelFilename(config.m_modelFilename)
353 , m_modelConfigFilename(config.m_modelConfigFilename)
354 , m_framework(config.m_framework)
376 inline NetConfig(
float confThresh,
const float &nmsThresh,
const std::vector<std::string> &classNames,
const cv::Size &dnnInputSize,
const double &filterSizeRatio = 0.
377 ,
const cv::Scalar &mean = cv::Scalar(127.5, 127.5, 127.5),
const double &scaleFactor = 2. / 255.,
const bool &swapRB =
true
378 ,
const DNNResultsParsingType &parsingType = vpDetectorDNNOpenCV::USER_SPECIFIED,
const std::string &modelFilename =
"",
const std::string &configFilename =
"",
const std::string &framework =
"")
379 : m_confThreshold(confThresh)
380 , m_nmsThreshold(nmsThresh)
381 , m_classNames(classNames)
382 , m_inputSize(dnnInputSize)
383 , m_filterSizeRatio(filterSizeRatio)
385 , m_scaleFactor(scaleFactor)
387 , m_parsingMethodType(parsingType)
388 , m_modelFilename(modelFilename)
389 , m_modelConfigFilename(configFilename)
390 , m_framework(framework)
410 inline NetConfig(
const float &confThresh,
const float &nmsThresh,
const std::string &classNamesFile,
const cv::Size &dnnInputSize,
const double &filterSizeRatio = 0.
411 ,
const cv::Scalar &mean = cv::Scalar(127.5, 127.5, 127.5),
const double &scaleFactor = 2. / 255.,
const bool &swapRB =
true
412 ,
const DNNResultsParsingType &parsingType = vpDetectorDNNOpenCV::USER_SPECIFIED,
const std::string &modelFilename =
"",
const std::string &configFilename =
"",
const std::string &framework =
"")
413 : m_confThreshold(confThresh)
414 , m_nmsThreshold(nmsThresh)
415 , m_inputSize(dnnInputSize)
416 , m_filterSizeRatio(filterSizeRatio)
418 , m_scaleFactor(scaleFactor)
420 , m_parsingMethodType(parsingType)
421 , m_modelFilename(modelFilename)
422 , m_modelConfigFilename(configFilename)
423 , m_framework(framework)
425 m_classNames = parseClassNamesFile(classNamesFile);
428 inline std::string toString()
const
431 text +=
"Model : " + m_modelFilename +
"\n";
432 text +=
"Type : " + vpDetectorDNNOpenCV::dnnResultsParsingTypeToString(m_parsingMethodType) +
"\n";
433 text +=
"Config (optional): " + (m_modelConfigFilename.empty() ?
"\"None\"" : m_modelConfigFilename) +
"\n";
434 text +=
"Framework (optional): " + (m_framework.empty() ?
"\"None\"" : m_framework) +
"\n";
435 text +=
"Width x Height : " + std::to_string(m_inputSize.width) +
" x " + std::to_string(m_inputSize.height) +
"\n";
436 text +=
"Mean RGB : " + std::to_string(m_mean[0]) +
" " + std::to_string(m_mean[1]) +
" " + std::to_string(m_mean[2]) +
"\n";
437 text +=
"Scale : " + std::to_string(m_scaleFactor) +
"\n";
438 text +=
"Swap RB? : " + (m_swapRB ? std::string(
"true") : std::string(
"false")) +
"\n";
439 text +=
"Confidence threshold : " + std::to_string(m_confThreshold) +
"\n";
440 text +=
"NMS threshold : " + std::to_string(m_nmsThreshold) +
"\n";
441 text +=
"Filter threshold : " +
442 (m_filterSizeRatio > std::numeric_limits<double>::epsilon() ? std::to_string(m_filterSizeRatio)
443 :
"disabled") +
"\n";
447 friend inline std::ostream &operator<<(std::ostream &os,
const NetConfig &config)
449 os << config.toString();
453 NetConfig &operator=(
const NetConfig &config)
455 m_confThreshold = config.m_confThreshold;
456 m_nmsThreshold = config.m_nmsThreshold;
457 m_classNames = config.m_classNames;
458 m_inputSize = cv::Size(config.m_inputSize.width, config.m_inputSize.height);
459 m_filterSizeRatio = config.m_filterSizeRatio;
460 m_mean = cv::Scalar(config.m_mean[0], config.m_mean[1], config.m_mean[2]);
461 m_scaleFactor = config.m_scaleFactor;
462 m_swapRB = config.m_swapRB;
463 m_parsingMethodType = config.m_parsingMethodType;
464 m_modelFilename = config.m_modelFilename;
465 m_modelConfigFilename = config.m_modelConfigFilename;
466 m_framework = config.m_framework;
470 friend vpDetectorDNNOpenCV;
473 static std::string getAvailableDnnResultsParsingTypes();
474 static std::string dnnResultsParsingTypeToString(
const DNNResultsParsingType &type);
475 static DNNResultsParsingType dnnResultsParsingTypeFromString(
const std::string &name);
476 static std::vector<std::string> parseClassNamesFile(
const std::string &filename);
477 vpDetectorDNNOpenCV();
478 vpDetectorDNNOpenCV(
const NetConfig &config,
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
479 #ifdef VISP_HAVE_NLOHMANN_JSON
480 vpDetectorDNNOpenCV(
const std::string &jsonPath,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
481 void initFromJSON(
const std::string &jsonPath);
482 void saveConfigurationInJSON(
const std::string &jsonPath)
const;
484 virtual ~vpDetectorDNNOpenCV();
487 virtual bool detect(
const vpImage<unsigned char> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
488 virtual bool detect(
const vpImage<unsigned char> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
489 virtual bool detect(
const vpImage<vpRGBa> &I, std::vector<DetectedFeatures2D> &output);
490 virtual bool detect(
const vpImage<vpRGBa> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
491 virtual bool detect(
const vpImage<vpRGBa> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
492 virtual bool detect(
const cv::Mat &I, std::vector<DetectedFeatures2D> &output);
493 virtual bool detect(
const cv::Mat &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
494 virtual bool detect(
const cv::Mat &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
496 void readNet(
const std::string &model,
const std::string &config =
"",
const std::string &framework =
"");
498 void setNetConfig(
const NetConfig &config);
499 void setConfidenceThreshold(
const float &confThreshold);
500 void setNMSThreshold(
const float &nmsThreshold);
501 void setDetectionFilterSizeRatio(
const double &sizeRatio);
502 void setInputSize(
const int &width,
const int &height);
503 void setMean(
const double &meanR,
const double &meanG,
const double &meanB);
504 void setPreferableBackend(
const int &backendId);
505 void setPreferableTarget(
const int &targetId);
506 void setScaleFactor(
const double &scaleFactor);
507 void setSwapRB(
const bool &swapRB);
508 void setParsingMethod(
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
509 inline const NetConfig &getNetConfig()
const
514 #ifdef VISP_HAVE_NLOHMANN_JSON
522 friend inline void from_json(
const nlohmann::json &j, vpDetectorDNNOpenCV &network)
524 network.m_netConfig = j.value(
"networkSettings", network.m_netConfig);
533 friend inline void to_json(nlohmann::json &j,
const vpDetectorDNNOpenCV &network)
536 {
"networkSettings", network.m_netConfig}
541 friend inline std::ostream &operator<<(std::ostream &os,
const vpDetectorDNNOpenCV &network)
543 os << network.m_netConfig;
548 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
549 std::vector<cv::String> getOutputsNames();
551 std::vector<DetectedFeatures2D>
552 filterDetectionSingleClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk);
554 std::vector<DetectedFeatures2D>
555 filterDetectionMultiClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk);
557 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>>
558 filterDetectionMultiClassInput(
const std::map< std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> &detected_features,
const double minRatioOfAreaOk);
560 void postProcess(DetectionCandidates &proposals);
562 void postProcess_YoloV3_V4(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
564 void postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
566 void postProcess_YoloV8_V11(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
568 void postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
570 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
571 void postProcess_SSD_MobileNet(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
574 void postProcess_ResNet_10(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
576 static void postProcess_unimplemented(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
579 bool m_applySizeFilterAfterNMS;
587 std::vector<int> m_indices;
591 NetConfig m_netConfig;
593 std::vector<cv::String> m_outNames;
595 std::vector<cv::Mat> m_dnnRes;
597 void (*m_parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &);
607 template <
typename Type >
609 vpDetectorDNNOpenCV::DetectedFeatures2D::display(
const vpImage< Type > &img,
const vpColor &color,
unsigned int thickness)
const
613 std::stringstream ss;
620 ss <<
"(" << std::setprecision(4) << m_score * 100. <<
"%)";
Class to define RGB colors available for display functionalities.
static const vpColor blue
static void displayRectangle(const vpImage< unsigned char > &I, const vpImagePoint &topLeft, unsigned int width, unsigned int height, const vpColor &color, bool fill=false, unsigned int thickness=1)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
@ badValue
Used to indicate that a value is not in the allowed range.
@ dimensionError
Bad dimension.
Class that defines a 2D point in an image. This class is useful for image processing and stores only ...
Definition of the vpImage class member functions.
Defines a rectangle in the plane.
vpImagePoint getTopRight() const