35 #ifndef _vpDetectorDNN_h_
36 #define _vpDetectorDNN_h_
38 #include <visp3/core/vpConfig.h>
40 #if (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(HAVE_OPENCV_DNN) && (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_17)
45 #include <opencv2/dnn.hpp>
47 #include <visp3/core/vpColor.h>
48 #include <visp3/core/vpDisplay.h>
49 #include <visp3/core/vpImage.h>
50 #include <visp3/core/vpRect.h>
53 #ifdef VISP_HAVE_NLOHMANN_JSON
54 #include <nlohmann/json.hpp>
55 using json = nlohmann::json;
100 } DNNResultsParsingType;
107 } DetectionCandidates;
134 ,
double v_min,
double v_max
135 ,
unsigned int cls,
double score
136 ,
const std::optional<std::string> &classname
143 m_classname = classname;
146 m_classname = std::nullopt;
165 inline std::optional<std::string>
getClassName()
const {
return m_classname; }
167 template <
typename Type >
171 } DetectedFeatures2D;
180 float m_confThreshold;
181 float m_nmsThreshold;
182 std::vector<std::string> m_classNames;
183 cv::Size m_inputSize;
184 double m_filterSizeRatio;
187 double m_scaleFactor;
190 std::string m_modelFilename;
191 std::string m_modelConfigFilename;
192 std::string m_framework;
194 #ifdef VISP_HAVE_NLOHMANN_JSON
204 config.m_confThreshold = j.value(
"confidenceThreshold", config.m_confThreshold);
205 if (config.m_confThreshold <= 0) {
209 config.m_nmsThreshold = j.value(
"nmsThreshold", config.m_nmsThreshold);
210 if (config.m_nmsThreshold <= 0) {
214 config.m_filterSizeRatio = j.value(
"filterSizeRatio", config.m_filterSizeRatio);
216 config.m_classNames = j.value(
"classNames", config.m_classNames);
218 std::pair<unsigned int, unsigned int> resolution = j.value(
"resolution", std::pair<unsigned int, unsigned int>(config.m_inputSize.width, config.m_inputSize.height));
219 config.m_inputSize.width = resolution.first;
220 config.m_inputSize.height = resolution.second;
222 std::vector<double> v_mean = j.value(
"mean", std::vector<double>({ config.m_mean[0], config.m_mean[1], config.m_mean[2] }));
223 if (v_mean.size() != 3) {
226 config.m_mean = cv::Scalar(v_mean[0], v_mean[1], v_mean[2]);
228 config.m_scaleFactor = j.value(
"scale", config.m_scaleFactor);
229 config.m_swapRB = j.value(
"swapRB", config.m_swapRB);
230 config.m_parsingMethodType = dnnResultsParsingTypeFromString(j.value(
"parsingType", dnnResultsParsingTypeToString(config.m_parsingMethodType)));
231 config.m_modelFilename = j.value(
"modelFile", config.m_modelFilename);
232 config.m_modelConfigFilename = j.value(
"configurationFile", config.m_modelConfigFilename);
233 config.m_framework = j.value(
"framework", config.m_framework);
244 std::pair<unsigned int, unsigned int> resolution = { config.m_inputSize.width, config.m_inputSize.height };
245 std::vector<double> v_mean = { config.m_mean[0], config.m_mean[1], config.m_mean[2] };
247 {
"confidenceThreshold", config.m_confThreshold } ,
248 {
"nmsThreshold" , config.m_nmsThreshold } ,
249 {
"filterSizeRatio" , config.m_filterSizeRatio} ,
250 {
"classNames" , config.m_classNames } ,
251 {
"resolution" , resolution } ,
253 {
"scale" , config.m_scaleFactor } ,
254 {
"swapRB" , config.m_swapRB } ,
255 {
"parsingType" , dnnResultsParsingTypeToString(config.m_parsingMethodType) },
256 {
"modelFile" , config.m_modelFilename } ,
257 {
"configurationFile" , config.m_modelConfigFilename } ,
258 {
"framework" , config.m_framework }
288 std::vector<std::string> classNames;
289 std::ifstream ifs(filename);
291 while (getline(ifs, line)) {
292 if (line.find(
"[") == std::string::npos) {
293 classNames.push_back(line);
296 std::string lineWithoutBracket;
297 if (line.find(
"[") != std::string::npos) {
298 lineWithoutBracket = line.substr(line.find(
"[") + 1, line.size() - 2);
301 while (!lineWithoutBracket.empty()) {
302 std::string className;
303 auto start_pos = lineWithoutBracket.find(
"\"");
304 auto end_pos = lineWithoutBracket.find(
"\"", start_pos + 1);
305 className = lineWithoutBracket.substr(start_pos + 1, end_pos - (start_pos + 1));
306 if (!className.empty()) {
307 classNames.push_back(className);
308 lineWithoutBracket = lineWithoutBracket.substr(end_pos + 1);
320 : m_confThreshold(0.5f)
321 , m_nmsThreshold(0.4f)
323 , m_inputSize(300, 300)
324 , m_filterSizeRatio(0.)
325 , m_mean(127.5, 127.5, 127.5)
326 , m_scaleFactor(2.0 / 255.0)
330 , m_modelConfigFilename()
337 : m_confThreshold(config.m_confThreshold)
338 , m_nmsThreshold(config.m_nmsThreshold)
339 , m_classNames(config.m_classNames)
340 , m_inputSize(config.m_inputSize.width, config.m_inputSize.height)
341 , m_filterSizeRatio(config.m_filterSizeRatio)
342 , m_mean(cv::Scalar(config.m_mean[0], config.m_mean[1], config.m_mean[2]))
343 , m_scaleFactor(config.m_scaleFactor)
344 , m_swapRB(config.m_swapRB)
345 , m_parsingMethodType(config.m_parsingMethodType)
346 , m_modelFilename(config.m_modelFilename)
347 , m_modelConfigFilename(config.m_modelConfigFilename)
348 , m_framework(config.m_framework)
370 inline NetConfig(
float confThresh,
const float &nmsThresh,
const std::vector<std::string> &classNames,
const cv::Size &dnnInputSize,
const double &filterSizeRatio = 0.
371 ,
const cv::Scalar &mean = cv::Scalar(127.5, 127.5, 127.5),
const double &scaleFactor = 2. / 255.,
const bool &swapRB =
true
373 : m_confThreshold(confThresh)
374 , m_nmsThreshold(nmsThresh)
375 , m_classNames(classNames)
376 , m_inputSize(dnnInputSize)
377 , m_filterSizeRatio(filterSizeRatio)
379 , m_scaleFactor(scaleFactor)
381 , m_parsingMethodType(parsingType)
382 , m_modelFilename(modelFilename)
383 , m_modelConfigFilename(configFilename)
384 , m_framework(framework)
404 inline NetConfig(
const float &confThresh,
const float &nmsThresh,
const std::string &classNamesFile,
const cv::Size &dnnInputSize,
const double &filterSizeRatio = 0.
405 ,
const cv::Scalar &mean = cv::Scalar(127.5, 127.5, 127.5),
const double &scaleFactor = 2. / 255.,
const bool &swapRB =
true
407 : m_confThreshold(confThresh)
408 , m_nmsThreshold(nmsThresh)
409 , m_inputSize(dnnInputSize)
410 , m_filterSizeRatio(filterSizeRatio)
412 , m_scaleFactor(scaleFactor)
414 , m_parsingMethodType(parsingType)
415 , m_modelFilename(modelFilename)
416 , m_modelConfigFilename(configFilename)
417 , m_framework(framework)
419 m_classNames = parseClassNamesFile(classNamesFile);
425 text +=
"Model : " + m_modelFilename +
"\n";
427 text +=
"Config (optional): " + (m_modelConfigFilename.empty() ?
"\"None\"" : m_modelConfigFilename) +
"\n";
428 text +=
"Framework (optional): " + (m_framework.empty() ?
"\"None\"" : m_framework) +
"\n";
429 text +=
"Width x Height : " + std::to_string(m_inputSize.width) +
" x " + std::to_string(m_inputSize.height) +
"\n";
430 text +=
"Mean RGB : " + std::to_string(m_mean[0]) +
" " + std::to_string(m_mean[1]) +
" " + std::to_string(m_mean[2]) +
"\n";
431 text +=
"Scale : " + std::to_string(m_scaleFactor) +
"\n";
432 text +=
"Swap RB? : " + (m_swapRB ? std::string(
"true") : std::string(
"false")) +
"\n";
433 text +=
"Confidence threshold : " + std::to_string(m_confThreshold) +
"\n";
434 text +=
"NMS threshold : " + std::to_string(m_nmsThreshold) +
"\n";
435 text +=
"Filter threshold : " +
436 (m_filterSizeRatio > std::numeric_limits<double>::epsilon() ? std::to_string(m_filterSizeRatio)
437 :
"disabled") +
"\n";
449 m_confThreshold = config.m_confThreshold;
450 m_nmsThreshold = config.m_nmsThreshold;
451 m_classNames = config.m_classNames;
452 m_inputSize = cv::Size(config.m_inputSize.width, config.m_inputSize.height);
453 m_filterSizeRatio = config.m_filterSizeRatio;
454 m_mean = cv::Scalar(config.m_mean[0], config.m_mean[1], config.m_mean[2]);
455 m_scaleFactor = config.m_scaleFactor;
456 m_swapRB = config.m_swapRB;
457 m_parsingMethodType = config.m_parsingMethodType;
458 m_modelFilename = config.m_modelFilename;
459 m_modelConfigFilename = config.m_modelConfigFilename;
460 m_framework = config.m_framework;
467 static std::string getAvailableDnnResultsParsingTypes();
468 static std::string dnnResultsParsingTypeToString(
const DNNResultsParsingType &type);
469 static DNNResultsParsingType dnnResultsParsingTypeFromString(
const std::string &name);
470 static std::vector<std::string> parseClassNamesFile(
const std::string &filename);
472 vpDetectorDNNOpenCV(
const NetConfig &config,
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
473 #ifdef VISP_HAVE_NLOHMANN_JSON
474 vpDetectorDNNOpenCV(
const std::string &jsonPath,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
475 void initFromJSON(
const std::string &jsonPath);
476 void saveConfigurationInJSON(
const std::string &jsonPath)
const;
481 virtual bool detect(
const vpImage<unsigned char> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
482 virtual bool detect(
const vpImage<unsigned char> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
483 virtual bool detect(
const vpImage<vpRGBa> &I, std::vector<DetectedFeatures2D> &output);
484 virtual bool detect(
const vpImage<vpRGBa> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
485 virtual bool detect(
const vpImage<vpRGBa> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
486 virtual bool detect(
const cv::Mat &I, std::vector<DetectedFeatures2D> &output);
487 virtual bool detect(
const cv::Mat &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
488 virtual bool detect(
const cv::Mat &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
490 void readNet(
const std::string &model,
const std::string &config =
"",
const std::string &framework =
"");
492 void setNetConfig(
const NetConfig &config);
493 void setConfidenceThreshold(
const float &confThreshold);
494 void setNMSThreshold(
const float &nmsThreshold);
495 void setDetectionFilterSizeRatio(
const double &sizeRatio);
496 void setInputSize(
const int &width,
const int &height);
497 void setMean(
const double &meanR,
const double &meanG,
const double &meanB);
498 void setPreferableBackend(
const int &backendId);
499 void setPreferableTarget(
const int &targetId);
500 void setScaleFactor(
const double &scaleFactor);
501 void setSwapRB(
const bool &swapRB);
502 void setParsingMethod(
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
508 #ifdef VISP_HAVE_NLOHMANN_JSON
542 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
543 std::vector<cv::String> getOutputsNames();
545 std::vector<DetectedFeatures2D>
546 filterDetectionSingleClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk);
548 std::vector<DetectedFeatures2D>
549 filterDetectionMultiClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk);
551 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>>
552 filterDetectionMultiClassInput(
const std::map< std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> &detected_features,
const double minRatioOfAreaOk);
554 void postProcess(DetectionCandidates &proposals);
556 void postProcess_YoloV3_V4(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
558 void postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
560 void postProcess_YoloV8(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
562 void postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
564 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
565 void postProcess_SSD_MobileNet(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
568 void postProcess_ResNet_10(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
570 static void postProcess_unimplemented(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
601 template <
typename Type >
607 std::stringstream ss;
614 ss <<
"(" << std::setprecision(4) <<
m_score * 100. <<
"%)";
Class to define RGB colors available for display functionalities.
static const vpColor blue
Structure containing the bounding box, expressed in pixels, confidence and class information about an...
void display(const vpImage< Type > &img, const vpColor &color=vpColor::blue, unsigned int thickness=1) const
friend vpDetectorDNNOpenCV
vpRect getBoundingBox() const
DetectedFeatures2D(double u_min, double u_max, double v_min, double v_max, unsigned int cls, double score, const std::optional< std::string > &classname)
Construct a new Detected Features 2 D object.
std::optional< std::string > getClassName() const
double getConfidenceScore() const
std::optional< std::string > m_classname
unsigned int getClassId() const
Structure containing some information required for the configuration of a vpDetectorDNNOpenCV object.
NetConfig(const NetConfig &config)
friend void from_json(const json &j, NetConfig &config)
Read the network configuration from JSON. All values are optional and if an argument is not present,...
NetConfig()
Default constructor of the structure vpDetectorDNNOpenCV::NetConfig , required for JSON serialization...
friend vpDetectorDNNOpenCV
friend std::ostream & operator<<(std::ostream &os, const NetConfig &config)
std::string toString() const
NetConfig(float confThresh, const float &nmsThresh, const std::vector< std::string > &classNames, const cv::Size &dnnInputSize, const double &filterSizeRatio=0., const cv::Scalar &mean=cv::Scalar(127.5, 127.5, 127.5), const double &scaleFactor=2./255., const bool &swapRB=true, const DNNResultsParsingType &parsingType=vpDetectorDNNOpenCV::USER_SPECIFIED, const std::string &modelFilename="", const std::string &configFilename="", const std::string &framework="")
Construct a new Net Config object.
NetConfig & operator=(const NetConfig &config)
static std::vector< std::string > parseClassNamesFile(const std::string &filename)
Parse the file containing the list of classes the DNN can detect. These classes can be written either...
friend void to_json(json &j, const NetConfig &config)
Parse a vpDetectorDNNOpenCV::NetConfig into JSON format.
NetConfig(const float &confThresh, const float &nmsThresh, const std::string &classNamesFile, const cv::Size &dnnInputSize, const double &filterSizeRatio=0., const cv::Scalar &mean=cv::Scalar(127.5, 127.5, 127.5), const double &scaleFactor=2./255., const bool &swapRB=true, const DNNResultsParsingType &parsingType=vpDetectorDNNOpenCV::USER_SPECIFIED, const std::string &modelFilename="", const std::string &configFilename="", const std::string &framework="")
Construct a new Net Config object.
cv::Mat m_blob
Buffer for the blob in input net.
friend std::ostream & operator<<(std::ostream &os, const vpDetectorDNNOpenCV &network)
DNNResultsParsingType
Enumeration listing the types of DNN for which the vpDetectorDNNOpenCV furnishes the methods permitti...
friend void from_json(const json &j, vpDetectorDNNOpenCV &network)
Read the network configuration from JSON. All values are optional and if an argument is not present,...
std::vector< cv::String > m_outNames
Names of layers with unconnected outputs.
const NetConfig & getNetConfig() const
cv::Mat m_img
Buffer for the input image.
std::vector< int > m_indices
Indices for NMS.
NetConfig m_netConfig
Configuration of the DNN.
std::vector< cv::Mat > m_dnnRes
Contains all output blobs for each layer specified in m_outNames.
cv::dnn::Net m_net
DNN network.
bool m_applySizeFilterAfterNMS
If true, filter the detections removing the ones for which the bbox does not respect area(bbox) € [me...
friend void to_json(json &j, const vpDetectorDNNOpenCV &network)
Parse the network configuration into JSON format.
static std::string dnnResultsParsingTypeToString(const DNNResultsParsingType &type)
vpImage< vpRGBa > m_I_color
Buffer for gray to RGBa image conversion.
static void displayRectangle(const vpImage< unsigned char > &I, const vpImagePoint &topLeft, unsigned int width, unsigned int height, const vpColor &color, bool fill=false, unsigned int thickness=1)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
@ badValue
Used to indicate that a value is not in the allowed range.
@ dimensionError
Bad dimension.
Class that defines a 2D point in an image. This class is useful for image processing and stores only ...
Definition of the vpImage class member functions.
Defines a rectangle in the plane.
vpImagePoint getTopRight() const
std::vector< int > m_classIds
std::vector< float > m_confidences
std::vector< cv::Rect > m_boxes