34 #ifndef VP_DETECTOR_DNN_OPENCV_H
35 #define VP_DETECTOR_DNN_OPENCV_H
37 #include <visp3/core/vpConfig.h>
41 #if (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(HAVE_OPENCV_DNN) && \
42 ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
48 #include <opencv2/dnn.hpp>
50 #include <visp3/core/vpColor.h>
51 #include <visp3/core/vpDisplay.h>
52 #include <visp3/core/vpImage.h>
53 #include <visp3/core/vpRect.h>
57 #ifdef VISP_HAVE_NLOHMANN_JSON
58 #include VISP_NLOHMANN_JSON(json.hpp)
106 } DNNResultsParsingType;
113 } DetectionCandidates;
140 ,
double v_min,
double v_max
141 ,
unsigned int cls,
double score
142 ,
const std::optional<std::string> &classname
149 m_classname = classname;
152 m_classname = std::nullopt;
171 inline std::optional<std::string>
getClassName()
const {
return m_classname; }
173 template <
typename Type >
177 } DetectedFeatures2D;
186 float m_confThreshold;
187 float m_nmsThreshold;
188 std::vector<std::string> m_classNames;
189 cv::Size m_inputSize;
190 double m_filterSizeRatio;
193 double m_scaleFactor;
196 std::string m_modelFilename;
197 std::string m_modelConfigFilename;
198 std::string m_framework;
200 #ifdef VISP_HAVE_NLOHMANN_JSON
210 config.m_confThreshold = j.value(
"confidenceThreshold", config.m_confThreshold);
211 if (config.m_confThreshold <= 0) {
215 config.m_nmsThreshold = j.value(
"nmsThreshold", config.m_nmsThreshold);
216 if (config.m_nmsThreshold <= 0) {
220 config.m_filterSizeRatio = j.value(
"filterSizeRatio", config.m_filterSizeRatio);
222 config.m_classNames = j.value(
"classNames", config.m_classNames);
224 std::pair<unsigned int, unsigned int> resolution = j.value(
"resolution", std::pair<unsigned int, unsigned int>(config.m_inputSize.width, config.m_inputSize.height));
225 config.m_inputSize.width = resolution.first;
226 config.m_inputSize.height = resolution.second;
228 std::vector<double> v_mean = j.value(
"mean", std::vector<double>({ config.m_mean[0], config.m_mean[1], config.m_mean[2] }));
229 if (v_mean.size() != 3) {
232 config.m_mean = cv::Scalar(v_mean[0], v_mean[1], v_mean[2]);
234 config.m_scaleFactor = j.value(
"scale", config.m_scaleFactor);
235 config.m_swapRB = j.value(
"swapRB", config.m_swapRB);
236 config.m_parsingMethodType = dnnResultsParsingTypeFromString(j.value(
"parsingType", dnnResultsParsingTypeToString(config.m_parsingMethodType)));
237 config.m_modelFilename = j.value(
"modelFile", config.m_modelFilename);
238 config.m_modelConfigFilename = j.value(
"configurationFile", config.m_modelConfigFilename);
239 config.m_framework = j.value(
"framework", config.m_framework);
250 std::pair<unsigned int, unsigned int> resolution = { config.m_inputSize.width, config.m_inputSize.height };
251 std::vector<double> v_mean = { config.m_mean[0], config.m_mean[1], config.m_mean[2] };
253 {
"confidenceThreshold", config.m_confThreshold } ,
254 {
"nmsThreshold" , config.m_nmsThreshold } ,
255 {
"filterSizeRatio" , config.m_filterSizeRatio} ,
256 {
"classNames" , config.m_classNames } ,
257 {
"resolution" , resolution } ,
259 {
"scale" , config.m_scaleFactor } ,
260 {
"swapRB" , config.m_swapRB } ,
261 {
"parsingType" , dnnResultsParsingTypeToString(config.m_parsingMethodType) },
262 {
"modelFile" , config.m_modelFilename } ,
263 {
"configurationFile" , config.m_modelConfigFilename } ,
264 {
"framework" , config.m_framework }
294 std::vector<std::string> classNames;
295 std::ifstream ifs(filename);
297 while (getline(ifs, line)) {
298 if (line.find(
"[") == std::string::npos) {
299 classNames.push_back(line);
302 std::string lineWithoutBracket;
303 if (line.find(
"[") != std::string::npos) {
304 lineWithoutBracket = line.substr(line.find(
"[") + 1, line.size() - 2);
307 while (!lineWithoutBracket.empty()) {
308 std::string className;
309 auto start_pos = lineWithoutBracket.find(
"\"");
310 auto end_pos = lineWithoutBracket.find(
"\"", start_pos + 1);
311 className = lineWithoutBracket.substr(start_pos + 1, end_pos - (start_pos + 1));
312 if (!className.empty()) {
313 classNames.push_back(className);
314 lineWithoutBracket = lineWithoutBracket.substr(end_pos + 1);
326 : m_confThreshold(0.5f)
327 , m_nmsThreshold(0.4f)
329 , m_inputSize(300, 300)
330 , m_filterSizeRatio(0.)
331 , m_mean(127.5, 127.5, 127.5)
332 , m_scaleFactor(2.0 / 255.0)
336 , m_modelConfigFilename()
343 : m_confThreshold(config.m_confThreshold)
344 , m_nmsThreshold(config.m_nmsThreshold)
345 , m_classNames(config.m_classNames)
346 , m_inputSize(config.m_inputSize.width, config.m_inputSize.height)
347 , m_filterSizeRatio(config.m_filterSizeRatio)
348 , m_mean(cv::Scalar(config.m_mean[0], config.m_mean[1], config.m_mean[2]))
349 , m_scaleFactor(config.m_scaleFactor)
350 , m_swapRB(config.m_swapRB)
351 , m_parsingMethodType(config.m_parsingMethodType)
352 , m_modelFilename(config.m_modelFilename)
353 , m_modelConfigFilename(config.m_modelConfigFilename)
354 , m_framework(config.m_framework)
376 inline NetConfig(
float confThresh,
const float &nmsThresh,
const std::vector<std::string> &classNames,
const cv::Size &dnnInputSize,
const double &filterSizeRatio = 0.
377 ,
const cv::Scalar &mean = cv::Scalar(127.5, 127.5, 127.5),
const double &scaleFactor = 2. / 255.,
const bool &swapRB =
true
379 : m_confThreshold(confThresh)
380 , m_nmsThreshold(nmsThresh)
381 , m_classNames(classNames)
382 , m_inputSize(dnnInputSize)
383 , m_filterSizeRatio(filterSizeRatio)
385 , m_scaleFactor(scaleFactor)
387 , m_parsingMethodType(parsingType)
388 , m_modelFilename(modelFilename)
389 , m_modelConfigFilename(configFilename)
390 , m_framework(framework)
410 inline NetConfig(
const float &confThresh,
const float &nmsThresh,
const std::string &classNamesFile,
const cv::Size &dnnInputSize,
const double &filterSizeRatio = 0.
411 ,
const cv::Scalar &mean = cv::Scalar(127.5, 127.5, 127.5),
const double &scaleFactor = 2. / 255.,
const bool &swapRB =
true
413 : m_confThreshold(confThresh)
414 , m_nmsThreshold(nmsThresh)
415 , m_inputSize(dnnInputSize)
416 , m_filterSizeRatio(filterSizeRatio)
418 , m_scaleFactor(scaleFactor)
420 , m_parsingMethodType(parsingType)
421 , m_modelFilename(modelFilename)
422 , m_modelConfigFilename(configFilename)
423 , m_framework(framework)
425 m_classNames = parseClassNamesFile(classNamesFile);
431 text +=
"Model : " + m_modelFilename +
"\n";
433 text +=
"Config (optional): " + (m_modelConfigFilename.empty() ?
"\"None\"" : m_modelConfigFilename) +
"\n";
434 text +=
"Framework (optional): " + (m_framework.empty() ?
"\"None\"" : m_framework) +
"\n";
435 text +=
"Width x Height : " + std::to_string(m_inputSize.width) +
" x " + std::to_string(m_inputSize.height) +
"\n";
436 text +=
"Mean RGB : " + std::to_string(m_mean[0]) +
" " + std::to_string(m_mean[1]) +
" " + std::to_string(m_mean[2]) +
"\n";
437 text +=
"Scale : " + std::to_string(m_scaleFactor) +
"\n";
438 text +=
"Swap RB? : " + (m_swapRB ? std::string(
"true") : std::string(
"false")) +
"\n";
439 text +=
"Confidence threshold : " + std::to_string(m_confThreshold) +
"\n";
440 text +=
"NMS threshold : " + std::to_string(m_nmsThreshold) +
"\n";
441 text +=
"Filter threshold : " +
442 (m_filterSizeRatio > std::numeric_limits<double>::epsilon() ? std::to_string(m_filterSizeRatio)
443 :
"disabled") +
"\n";
455 m_confThreshold = config.m_confThreshold;
456 m_nmsThreshold = config.m_nmsThreshold;
457 m_classNames = config.m_classNames;
458 m_inputSize = cv::Size(config.m_inputSize.width, config.m_inputSize.height);
459 m_filterSizeRatio = config.m_filterSizeRatio;
460 m_mean = cv::Scalar(config.m_mean[0], config.m_mean[1], config.m_mean[2]);
461 m_scaleFactor = config.m_scaleFactor;
462 m_swapRB = config.m_swapRB;
463 m_parsingMethodType = config.m_parsingMethodType;
464 m_modelFilename = config.m_modelFilename;
465 m_modelConfigFilename = config.m_modelConfigFilename;
466 m_framework = config.m_framework;
473 static std::string getAvailableDnnResultsParsingTypes();
474 static std::string dnnResultsParsingTypeToString(
const DNNResultsParsingType &type);
475 static DNNResultsParsingType dnnResultsParsingTypeFromString(
const std::string &name);
476 static std::vector<std::string> parseClassNamesFile(
const std::string &filename);
478 vpDetectorDNNOpenCV(
const NetConfig &config,
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
479 #ifdef VISP_HAVE_NLOHMANN_JSON
480 vpDetectorDNNOpenCV(
const std::string &jsonPath,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
481 void initFromJSON(
const std::string &jsonPath);
482 void saveConfigurationInJSON(
const std::string &jsonPath)
const;
487 virtual bool detect(
const vpImage<unsigned char> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
488 virtual bool detect(
const vpImage<unsigned char> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
489 virtual bool detect(
const vpImage<vpRGBa> &I, std::vector<DetectedFeatures2D> &output);
490 virtual bool detect(
const vpImage<vpRGBa> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
491 virtual bool detect(
const vpImage<vpRGBa> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
492 virtual bool detect(
const cv::Mat &I, std::vector<DetectedFeatures2D> &output);
493 virtual bool detect(
const cv::Mat &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
494 virtual bool detect(
const cv::Mat &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
496 void readNet(
const std::string &model,
const std::string &config =
"",
const std::string &framework =
"");
498 void setNetConfig(
const NetConfig &config);
499 void setConfidenceThreshold(
const float &confThreshold);
500 void setNMSThreshold(
const float &nmsThreshold);
501 void setDetectionFilterSizeRatio(
const double &sizeRatio);
502 void setInputSize(
const int &width,
const int &height);
503 void setMean(
const double &meanR,
const double &meanG,
const double &meanB);
504 void setPreferableBackend(
const int &backendId);
505 void setPreferableTarget(
const int &targetId);
506 void setScaleFactor(
const double &scaleFactor);
507 void setSwapRB(
const bool &swapRB);
508 void setParsingMethod(
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
514 #ifdef VISP_HAVE_NLOHMANN_JSON
548 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
549 std::vector<cv::String> getOutputsNames();
551 std::vector<DetectedFeatures2D>
552 filterDetectionSingleClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk);
554 std::vector<DetectedFeatures2D>
555 filterDetectionMultiClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk);
557 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>>
558 filterDetectionMultiClassInput(
const std::map< std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> &detected_features,
const double minRatioOfAreaOk);
560 void postProcess(DetectionCandidates &proposals);
562 void postProcess_YoloV3_V4(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
564 void postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
566 void postProcess_YoloV8_V11(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
568 void postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
570 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
571 void postProcess_SSD_MobileNet(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
574 void postProcess_ResNet_10(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
576 static void postProcess_unimplemented(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
607 template <
typename Type >
613 std::stringstream ss;
620 ss <<
"(" << std::setprecision(4) <<
m_score * 100. <<
"%)";
Class to define RGB colors available for display functionalities.
static const vpColor blue
Structure containing the bounding box, expressed in pixels, confidence and class information about an...
void display(const vpImage< Type > &img, const vpColor &color=vpColor::blue, unsigned int thickness=1) const
friend vpDetectorDNNOpenCV
vpRect getBoundingBox() const
DetectedFeatures2D(double u_min, double u_max, double v_min, double v_max, unsigned int cls, double score, const std::optional< std::string > &classname)
Construct a new Detected Features 2 D object.
std::optional< std::string > getClassName() const
double getConfidenceScore() const
std::optional< std::string > m_classname
unsigned int getClassId() const
Structure containing some information required for the configuration of a vpDetectorDNNOpenCV object.
NetConfig(const NetConfig &config)
friend void to_json(nlohmann::json &j, const NetConfig &config)
Parse a vpDetectorDNNOpenCV::NetConfig into JSON format.
NetConfig()
Default constructor of the structure vpDetectorDNNOpenCV::NetConfig , required for JSON serialization...
friend vpDetectorDNNOpenCV
friend std::ostream & operator<<(std::ostream &os, const NetConfig &config)
std::string toString() const
friend void from_json(const nlohmann::json &j, NetConfig &config)
Read the network configuration from JSON. All values are optional and if an argument is not present,...
NetConfig(float confThresh, const float &nmsThresh, const std::vector< std::string > &classNames, const cv::Size &dnnInputSize, const double &filterSizeRatio=0., const cv::Scalar &mean=cv::Scalar(127.5, 127.5, 127.5), const double &scaleFactor=2./255., const bool &swapRB=true, const DNNResultsParsingType &parsingType=vpDetectorDNNOpenCV::USER_SPECIFIED, const std::string &modelFilename="", const std::string &configFilename="", const std::string &framework="")
Construct a new Net Config object.
NetConfig & operator=(const NetConfig &config)
static std::vector< std::string > parseClassNamesFile(const std::string &filename)
Parse the file containing the list of classes the DNN can detect. These classes can be written either...
NetConfig(const float &confThresh, const float &nmsThresh, const std::string &classNamesFile, const cv::Size &dnnInputSize, const double &filterSizeRatio=0., const cv::Scalar &mean=cv::Scalar(127.5, 127.5, 127.5), const double &scaleFactor=2./255., const bool &swapRB=true, const DNNResultsParsingType &parsingType=vpDetectorDNNOpenCV::USER_SPECIFIED, const std::string &modelFilename="", const std::string &configFilename="", const std::string &framework="")
Construct a new Net Config object.
friend void from_json(const nlohmann::json &j, vpDetectorDNNOpenCV &network)
Read the network configuration from JSON. All values are optional and if an argument is not present,...
cv::Mat m_blob
Buffer for the blob in input net.
friend std::ostream & operator<<(std::ostream &os, const vpDetectorDNNOpenCV &network)
DNNResultsParsingType
Enumeration listing the types of DNN for which the vpDetectorDNNOpenCV furnishes the methods permitti...
std::vector< cv::String > m_outNames
Names of layers with unconnected outputs.
friend void to_json(nlohmann::json &j, const vpDetectorDNNOpenCV &network)
Parse the network configuration into JSON format.
const NetConfig & getNetConfig() const
cv::Mat m_img
Buffer for the input image.
std::vector< int > m_indices
Indices for NMS.
NetConfig m_netConfig
Configuration of the DNN.
std::vector< cv::Mat > m_dnnRes
Contains all output blobs for each layer specified in m_outNames.
cv::dnn::Net m_net
DNN network.
bool m_applySizeFilterAfterNMS
If true, filter the detections removing the ones for which the bbox does not respect area(bbox) € [me...
static std::string dnnResultsParsingTypeToString(const DNNResultsParsingType &type)
vpImage< vpRGBa > m_I_color
Buffer for gray to RGBa image conversion.
static void displayRectangle(const vpImage< unsigned char > &I, const vpImagePoint &topLeft, unsigned int width, unsigned int height, const vpColor &color, bool fill=false, unsigned int thickness=1)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
@ badValue
Used to indicate that a value is not in the allowed range.
@ dimensionError
Bad dimension.
Class that defines a 2D point in an image. This class is useful for image processing and stores only ...
Definition of the vpImage class member functions.
Defines a rectangle in the plane.
vpImagePoint getTopRight() const
std::vector< int > m_classIds
std::vector< float > m_confidences
std::vector< cv::Rect > m_boxes