33 #ifndef _vpDetectorDNN_h_
34 #define _vpDetectorDNN_h_
36 #include <visp3/core/vpConfig.h>
40 #if (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(HAVE_OPENCV_DNN) && \
41 ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
47 #include <opencv2/dnn.hpp>
49 #include <visp3/core/vpColor.h>
50 #include <visp3/core/vpDisplay.h>
51 #include <visp3/core/vpImage.h>
52 #include <visp3/core/vpRect.h>
55 #ifdef VISP_HAVE_NLOHMANN_JSON
56 #include <nlohmann/json.hpp>
81 class VISP_EXPORT vpDetectorDNNOpenCV
89 typedef enum DNNResultsParsingType
101 } DNNResultsParsingType;
103 typedef struct DetectionCandidates
105 std::vector< float > m_confidences;
106 std::vector< cv::Rect > m_boxes;
107 std::vector< int > m_classIds;
108 } DetectionCandidates;
115 typedef class DetectedFeatures2D
121 std::optional<std::string> m_classname;
134 inline explicit DetectedFeatures2D(
double u_min,
double u_max
135 ,
double v_min,
double v_max
136 ,
unsigned int cls,
double score
137 ,
const std::optional<std::string> &classname
144 m_classname = classname;
147 m_classname = std::nullopt;
154 inline vpRect getBoundingBox()
const {
return m_bbox; }
158 inline double getConfidenceScore()
const {
return m_score; }
162 inline unsigned int getClassId()
const {
return m_cls; }
166 inline std::optional<std::string> getClassName()
const {
return m_classname; }
168 template <
typename Type >
171 friend vpDetectorDNNOpenCV;
172 } DetectedFeatures2D;
178 typedef class NetConfig
181 float m_confThreshold;
182 float m_nmsThreshold;
183 std::vector<std::string> m_classNames;
184 cv::Size m_inputSize;
185 double m_filterSizeRatio;
188 double m_scaleFactor;
190 DNNResultsParsingType m_parsingMethodType;
191 std::string m_modelFilename;
192 std::string m_modelConfigFilename;
193 std::string m_framework;
195 #ifdef VISP_HAVE_NLOHMANN_JSON
203 friend inline void from_json(
const nlohmann::json &j, NetConfig &config)
205 config.m_confThreshold = j.value(
"confidenceThreshold", config.m_confThreshold);
206 if (config.m_confThreshold <= 0) {
210 config.m_nmsThreshold = j.value(
"nmsThreshold", config.m_nmsThreshold);
211 if (config.m_nmsThreshold <= 0) {
215 config.m_filterSizeRatio = j.value(
"filterSizeRatio", config.m_filterSizeRatio);
217 config.m_classNames = j.value(
"classNames", config.m_classNames);
219 std::pair<unsigned int, unsigned int> resolution = j.value(
"resolution", std::pair<unsigned int, unsigned int>(config.m_inputSize.width, config.m_inputSize.height));
220 config.m_inputSize.width = resolution.first;
221 config.m_inputSize.height = resolution.second;
223 std::vector<double> v_mean = j.value(
"mean", std::vector<double>({ config.m_mean[0], config.m_mean[1], config.m_mean[2] }));
224 if (v_mean.size() != 3) {
227 config.m_mean = cv::Scalar(v_mean[0], v_mean[1], v_mean[2]);
229 config.m_scaleFactor = j.value(
"scale", config.m_scaleFactor);
230 config.m_swapRB = j.value(
"swapRB", config.m_swapRB);
231 config.m_parsingMethodType = dnnResultsParsingTypeFromString(j.value(
"parsingType", dnnResultsParsingTypeToString(config.m_parsingMethodType)));
232 config.m_modelFilename = j.value(
"modelFile", config.m_modelFilename);
233 config.m_modelConfigFilename = j.value(
"configurationFile", config.m_modelConfigFilename);
234 config.m_framework = j.value(
"framework", config.m_framework);
243 friend inline void to_json(nlohmann::json &j,
const NetConfig &config)
245 std::pair<unsigned int, unsigned int> resolution = { config.m_inputSize.width, config.m_inputSize.height };
246 std::vector<double> v_mean = { config.m_mean[0], config.m_mean[1], config.m_mean[2] };
248 {
"confidenceThreshold", config.m_confThreshold } ,
249 {
"nmsThreshold" , config.m_nmsThreshold } ,
250 {
"filterSizeRatio" , config.m_filterSizeRatio} ,
251 {
"classNames" , config.m_classNames } ,
252 {
"resolution" , resolution } ,
254 {
"scale" , config.m_scaleFactor } ,
255 {
"swapRB" , config.m_swapRB } ,
256 {
"parsingType" , dnnResultsParsingTypeToString(config.m_parsingMethodType) },
257 {
"modelFile" , config.m_modelFilename } ,
258 {
"configurationFile" , config.m_modelConfigFilename } ,
259 {
"framework" , config.m_framework }
287 inline static std::vector<std::string> parseClassNamesFile(
const std::string &filename)
289 std::vector<std::string> classNames;
290 std::ifstream ifs(filename);
292 while (getline(ifs, line)) {
293 if (line.find(
"[") == std::string::npos) {
294 classNames.push_back(line);
297 std::string lineWithoutBracket;
298 if (line.find(
"[") != std::string::npos) {
299 lineWithoutBracket = line.substr(line.find(
"[") + 1, line.size() - 2);
302 while (!lineWithoutBracket.empty()) {
303 std::string className;
304 auto start_pos = lineWithoutBracket.find(
"\"");
305 auto end_pos = lineWithoutBracket.find(
"\"", start_pos + 1);
306 className = lineWithoutBracket.substr(start_pos + 1, end_pos - (start_pos + 1));
307 if (!className.empty()) {
308 classNames.push_back(className);
309 lineWithoutBracket = lineWithoutBracket.substr(end_pos + 1);
321 : m_confThreshold(0.5f)
322 , m_nmsThreshold(0.4f)
324 , m_inputSize(300, 300)
325 , m_filterSizeRatio(0.)
326 , m_mean(127.5, 127.5, 127.5)
327 , m_scaleFactor(2.0 / 255.0)
329 , m_parsingMethodType(vpDetectorDNNOpenCV::USER_SPECIFIED)
331 , m_modelConfigFilename()
337 inline NetConfig(
const NetConfig &config)
338 : m_confThreshold(config.m_confThreshold)
339 , m_nmsThreshold(config.m_nmsThreshold)
340 , m_classNames(config.m_classNames)
341 , m_inputSize(config.m_inputSize.width, config.m_inputSize.height)
342 , m_filterSizeRatio(config.m_filterSizeRatio)
343 , m_mean(cv::Scalar(config.m_mean[0], config.m_mean[1], config.m_mean[2]))
344 , m_scaleFactor(config.m_scaleFactor)
345 , m_swapRB(config.m_swapRB)
346 , m_parsingMethodType(config.m_parsingMethodType)
347 , m_modelFilename(config.m_modelFilename)
348 , m_modelConfigFilename(config.m_modelConfigFilename)
349 , m_framework(config.m_framework)
371 inline NetConfig(
float confThresh,
const float &nmsThresh,
const std::vector<std::string> &classNames,
const cv::Size &dnnInputSize,
const double &filterSizeRatio = 0.
372 ,
const cv::Scalar &mean = cv::Scalar(127.5, 127.5, 127.5),
const double &scaleFactor = 2. / 255.,
const bool &swapRB =
true
373 ,
const DNNResultsParsingType &parsingType = vpDetectorDNNOpenCV::USER_SPECIFIED,
const std::string &modelFilename =
"",
const std::string &configFilename =
"",
const std::string &framework =
"")
374 : m_confThreshold(confThresh)
375 , m_nmsThreshold(nmsThresh)
376 , m_classNames(classNames)
377 , m_inputSize(dnnInputSize)
378 , m_filterSizeRatio(filterSizeRatio)
380 , m_scaleFactor(scaleFactor)
382 , m_parsingMethodType(parsingType)
383 , m_modelFilename(modelFilename)
384 , m_modelConfigFilename(configFilename)
385 , m_framework(framework)
405 inline NetConfig(
const float &confThresh,
const float &nmsThresh,
const std::string &classNamesFile,
const cv::Size &dnnInputSize,
const double &filterSizeRatio = 0.
406 ,
const cv::Scalar &mean = cv::Scalar(127.5, 127.5, 127.5),
const double &scaleFactor = 2. / 255.,
const bool &swapRB =
true
407 ,
const DNNResultsParsingType &parsingType = vpDetectorDNNOpenCV::USER_SPECIFIED,
const std::string &modelFilename =
"",
const std::string &configFilename =
"",
const std::string &framework =
"")
408 : m_confThreshold(confThresh)
409 , m_nmsThreshold(nmsThresh)
410 , m_inputSize(dnnInputSize)
411 , m_filterSizeRatio(filterSizeRatio)
413 , m_scaleFactor(scaleFactor)
415 , m_parsingMethodType(parsingType)
416 , m_modelFilename(modelFilename)
417 , m_modelConfigFilename(configFilename)
418 , m_framework(framework)
420 m_classNames = parseClassNamesFile(classNamesFile);
423 inline std::string toString()
const
426 text +=
"Model : " + m_modelFilename +
"\n";
427 text +=
"Type : " + vpDetectorDNNOpenCV::dnnResultsParsingTypeToString(m_parsingMethodType) +
"\n";
428 text +=
"Config (optional): " + (m_modelConfigFilename.empty() ?
"\"None\"" : m_modelConfigFilename) +
"\n";
429 text +=
"Framework (optional): " + (m_framework.empty() ?
"\"None\"" : m_framework) +
"\n";
430 text +=
"Width x Height : " + std::to_string(m_inputSize.width) +
" x " + std::to_string(m_inputSize.height) +
"\n";
431 text +=
"Mean RGB : " + std::to_string(m_mean[0]) +
" " + std::to_string(m_mean[1]) +
" " + std::to_string(m_mean[2]) +
"\n";
432 text +=
"Scale : " + std::to_string(m_scaleFactor) +
"\n";
433 text +=
"Swap RB? : " + (m_swapRB ? std::string(
"true") : std::string(
"false")) +
"\n";
434 text +=
"Confidence threshold : " + std::to_string(m_confThreshold) +
"\n";
435 text +=
"NMS threshold : " + std::to_string(m_nmsThreshold) +
"\n";
436 text +=
"Filter threshold : " +
437 (m_filterSizeRatio > std::numeric_limits<double>::epsilon() ? std::to_string(m_filterSizeRatio)
438 :
"disabled") +
"\n";
442 friend inline std::ostream &operator<<(std::ostream &os,
const NetConfig &config)
444 os << config.toString();
448 NetConfig &operator=(
const NetConfig &config)
450 m_confThreshold = config.m_confThreshold;
451 m_nmsThreshold = config.m_nmsThreshold;
452 m_classNames = config.m_classNames;
453 m_inputSize = cv::Size(config.m_inputSize.width, config.m_inputSize.height);
454 m_filterSizeRatio = config.m_filterSizeRatio;
455 m_mean = cv::Scalar(config.m_mean[0], config.m_mean[1], config.m_mean[2]);
456 m_scaleFactor = config.m_scaleFactor;
457 m_swapRB = config.m_swapRB;
458 m_parsingMethodType = config.m_parsingMethodType;
459 m_modelFilename = config.m_modelFilename;
460 m_modelConfigFilename = config.m_modelConfigFilename;
461 m_framework = config.m_framework;
465 friend vpDetectorDNNOpenCV;
468 static std::string getAvailableDnnResultsParsingTypes();
469 static std::string dnnResultsParsingTypeToString(
const DNNResultsParsingType &type);
470 static DNNResultsParsingType dnnResultsParsingTypeFromString(
const std::string &name);
471 static std::vector<std::string> parseClassNamesFile(
const std::string &filename);
472 vpDetectorDNNOpenCV();
473 vpDetectorDNNOpenCV(
const NetConfig &config,
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
474 #ifdef VISP_HAVE_NLOHMANN_JSON
475 vpDetectorDNNOpenCV(
const std::string &jsonPath,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
476 void initFromJSON(
const std::string &jsonPath);
477 void saveConfigurationInJSON(
const std::string &jsonPath)
const;
479 virtual ~vpDetectorDNNOpenCV();
482 virtual bool detect(
const vpImage<unsigned char> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
483 virtual bool detect(
const vpImage<unsigned char> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
484 virtual bool detect(
const vpImage<vpRGBa> &I, std::vector<DetectedFeatures2D> &output);
485 virtual bool detect(
const vpImage<vpRGBa> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
486 virtual bool detect(
const vpImage<vpRGBa> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
487 virtual bool detect(
const cv::Mat &I, std::vector<DetectedFeatures2D> &output);
488 virtual bool detect(
const cv::Mat &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
489 virtual bool detect(
const cv::Mat &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
491 void readNet(
const std::string &model,
const std::string &config =
"",
const std::string &framework =
"");
493 void setNetConfig(
const NetConfig &config);
494 void setConfidenceThreshold(
const float &confThreshold);
495 void setNMSThreshold(
const float &nmsThreshold);
496 void setDetectionFilterSizeRatio(
const double &sizeRatio);
497 void setInputSize(
const int &width,
const int &height);
498 void setMean(
const double &meanR,
const double &meanG,
const double &meanB);
499 void setPreferableBackend(
const int &backendId);
500 void setPreferableTarget(
const int &targetId);
501 void setScaleFactor(
const double &scaleFactor);
502 void setSwapRB(
const bool &swapRB);
503 void setParsingMethod(
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
504 inline const NetConfig &getNetConfig()
const
509 #ifdef VISP_HAVE_NLOHMANN_JSON
517 friend inline void from_json(
const nlohmann::json &j, vpDetectorDNNOpenCV &network)
519 network.m_netConfig = j.value(
"networkSettings", network.m_netConfig);
528 friend inline void to_json(nlohmann::json &j,
const vpDetectorDNNOpenCV &network)
531 {
"networkSettings", network.m_netConfig}
536 friend inline std::ostream &operator<<(std::ostream &os,
const vpDetectorDNNOpenCV &network)
538 os << network.m_netConfig;
543 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
544 std::vector<cv::String> getOutputsNames();
546 std::vector<DetectedFeatures2D>
547 filterDetectionSingleClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk);
549 std::vector<DetectedFeatures2D>
550 filterDetectionMultiClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk);
552 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>>
553 filterDetectionMultiClassInput(
const std::map< std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> &detected_features,
const double minRatioOfAreaOk);
555 void postProcess(DetectionCandidates &proposals);
557 void postProcess_YoloV3_V4(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
559 void postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
561 void postProcess_YoloV8(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
563 void postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
565 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
566 void postProcess_SSD_MobileNet(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
569 void postProcess_ResNet_10(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
571 static void postProcess_unimplemented(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
574 bool m_applySizeFilterAfterNMS;
582 std::vector<int> m_indices;
586 NetConfig m_netConfig;
588 std::vector<cv::String> m_outNames;
590 std::vector<cv::Mat> m_dnnRes;
592 void (*m_parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &);
602 template <
typename Type >
604 vpDetectorDNNOpenCV::DetectedFeatures2D::display(
const vpImage< Type > &img,
const vpColor &color,
unsigned int thickness)
const
608 std::stringstream ss;
615 ss <<
"(" << std::setprecision(4) << m_score * 100. <<
"%)";
Class to define RGB colors available for display functionalities.
static const vpColor blue
static void displayRectangle(const vpImage< unsigned char > &I, const vpImagePoint &topLeft, unsigned int width, unsigned int height, const vpColor &color, bool fill=false, unsigned int thickness=1)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
@ badValue
Used to indicate that a value is not in the allowed range.
@ dimensionError
Bad dimension.
Class that defines a 2D point in an image. This class is useful for image processing and stores only ...
Definition of the vpImage class member functions.
Defines a rectangle in the plane.
vpImagePoint getTopRight() const
void display(vpImage< unsigned char > &I, const std::string &title)
Display a gray-scale image.