35 #include <visp3/core/vpConfig.h>
38 #if (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(HAVE_OPENCV_DNN) && \
39 ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
41 #include <visp3/core/vpImageConvert.h>
42 #include <visp3/detection/vpDetectorDNNOpenCV.h>
43 #include <visp3/core/vpIoTools.h>
53 std::string vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes()
55 std::string list =
"[";
56 for (
unsigned int i = 0; i < vpDetectorDNNOpenCV::COUNT - 1; i++) {
57 list +=
"\"" + dnnResultsParsingTypeToString((vpDetectorDNNOpenCV::DNNResultsParsingType)i) +
"\", ";
59 list +=
"\"" + dnnResultsParsingTypeToString((vpDetectorDNNOpenCV::DNNResultsParsingType)(vpDetectorDNNOpenCV::COUNT - 1)) +
"\"]";
72 std::string vpDetectorDNNOpenCV::dnnResultsParsingTypeToString(
const DNNResultsParsingType &type)
95 name =
"ssd-mobilenet";
101 name =
"user-specified";
118 vpDetectorDNNOpenCV::DNNResultsParsingType vpDetectorDNNOpenCV::dnnResultsParsingTypeFromString(
const std::string &name)
120 vpDetectorDNNOpenCV::DNNResultsParsingType res(COUNT);
121 bool hasFoundMatch =
false;
123 for (
int id = 0;
id < COUNT && !hasFoundMatch;
id++) {
124 vpDetectorDNNOpenCV::DNNResultsParsingType temp = (vpDetectorDNNOpenCV::DNNResultsParsingType)
id;
125 if (dnnResultsParsingTypeToString(temp) == name_lowercase) {
127 hasFoundMatch =
true;
143 std::vector<std::string> vpDetectorDNNOpenCV::parseClassNamesFile(
const std::string &filename)
145 return NetConfig::parseClassNamesFile(filename);
148 vpDetectorDNNOpenCV::vpDetectorDNNOpenCV()
149 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
150 m_net(), m_netConfig(), m_outNames(), m_dnnRes(),
151 m_parsingMethod(vpDetectorDNNOpenCV::postProcess_unimplemented)
153 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
163 vpDetectorDNNOpenCV::vpDetectorDNNOpenCV(
const NetConfig &config,
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &))
164 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
165 m_net(), m_netConfig(config), m_outNames(), m_dnnRes()
167 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
168 setParsingMethod(typeParsingMethod, parsingMethod);
169 if (!m_netConfig.m_modelFilename.empty()) {
170 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
174 #ifdef VISP_HAVE_NLOHMANN_JSON
176 using json = nlohmann::json;
184 vpDetectorDNNOpenCV::vpDetectorDNNOpenCV(
const std::string &jsonPath,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &))
185 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
186 m_net(), m_netConfig(), m_outNames(), m_dnnRes()
188 initFromJSON(jsonPath);
189 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
190 setParsingMethod(m_netConfig.m_parsingMethodType, parsingMethod);
198 void vpDetectorDNNOpenCV::initFromJSON(
const std::string &jsonPath)
200 std::ifstream file(jsonPath);
202 std::stringstream ss;
203 ss <<
"Problem opening file " << jsonPath <<
". Make sure it exists and is readable" << std::endl;
208 j = json::parse(file);
210 catch (json::parse_error &e) {
211 std::stringstream msg;
212 msg <<
"Could not parse JSON file : \n";
214 msg << e.what() << std::endl;
215 msg <<
"Byte position of error: " << e.byte;
220 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
228 void vpDetectorDNNOpenCV::saveConfigurationInJSON(
const std::string &jsonPath)
const
230 std::ofstream file(jsonPath);
231 const json j = *
this;
240 vpDetectorDNNOpenCV::~vpDetectorDNNOpenCV() { }
251 bool vpDetectorDNNOpenCV::detect(
const vpImage<unsigned char> &I, std::vector<DetectedFeatures2D> &output)
255 return detect(m_I_color, output);
267 bool vpDetectorDNNOpenCV::detect(
const vpImage<unsigned char> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
271 return detect(m_I_color, output);
283 bool vpDetectorDNNOpenCV::detect(
const vpImage<unsigned char> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
287 return detect(m_I_color, output);
299 bool vpDetectorDNNOpenCV::detect(
const vpImage<vpRGBa> &I, std::vector<DetectedFeatures2D> &output)
303 return detect(m_img, output);
315 bool vpDetectorDNNOpenCV::detect(
const vpImage<vpRGBa> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
319 return detect(m_img, output);
329 bool vpDetectorDNNOpenCV::detect(
const vpImage<vpRGBa> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
333 return detect(m_img, output);
343 bool vpDetectorDNNOpenCV::detect(
const cv::Mat &I, std::vector<DetectedFeatures2D> &output)
348 cv::Size inputSize(m_netConfig.m_inputSize.width > 0 ? m_netConfig.m_inputSize.width : m_img.cols,
349 m_netConfig.m_inputSize.height > 0 ? m_netConfig.m_inputSize.height : m_img.rows);
350 cv::dnn::blobFromImage(m_img, m_blob, m_netConfig.m_scaleFactor, inputSize, m_netConfig.m_mean, m_netConfig.m_swapRB,
false);
352 m_net.setInput(m_blob);
354 m_net.forward(m_dnnRes, m_outNames);
356 catch (
const cv::Exception &e) {
357 std::cerr <<
"Caught an exception trying to run inference:" << std::endl <<
"\t"
359 <<
"\nCuda and/or GPU driver might not be correctly installed. Setting preferable backend to CPU and trying again." << std::endl;
360 m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
361 m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
362 m_net.forward(m_dnnRes, m_outNames);
365 DetectionCandidates proposals;
366 postProcess(proposals);
367 size_t nbClassNames = m_netConfig.m_classNames.size();
368 for (
size_t i = 0; i < m_indices.size(); ++i) {
369 int idx = m_indices[i];
370 cv::Rect box = proposals.m_boxes[idx];
371 std::optional<std::string> classname_opt;
372 if (nbClassNames > 0) {
373 classname_opt = m_netConfig.m_classNames[proposals.m_classIds[idx]];
375 output.emplace_back(box.x, box.x + box.width, box.y, box.y + box.height
376 , proposals.m_classIds[idx], proposals.m_confidences[idx]
381 if (m_applySizeFilterAfterNMS) {
383 output = filterDetectionMultiClassInput(output, m_netConfig.m_filterSizeRatio);
386 return !output.empty();
396 bool vpDetectorDNNOpenCV::detect(
const cv::Mat &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
401 cv::Size inputSize(m_netConfig.m_inputSize.width > 0 ? m_netConfig.m_inputSize.width : m_img.cols,
402 m_netConfig.m_inputSize.height > 0 ? m_netConfig.m_inputSize.height : m_img.rows);
403 cv::dnn::blobFromImage(m_img, m_blob, m_netConfig.m_scaleFactor, inputSize, m_netConfig.m_mean, m_netConfig.m_swapRB,
false);
405 m_net.setInput(m_blob);
407 m_net.forward(m_dnnRes, m_outNames);
409 catch (
const cv::Exception &e) {
410 std::cerr <<
"Caught an exception trying to run inference:" << std::endl <<
"\t"
412 <<
"\nCuda and/or GPU driver might not be correctly installed. Setting preferable backend to CPU and trying again." << std::endl;
413 m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
414 m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
415 m_net.forward(m_dnnRes, m_outNames);
418 DetectionCandidates proposals;
419 postProcess(proposals);
420 size_t nbClassNames = m_netConfig.m_classNames.size();
421 for (
size_t i = 0; i < m_indices.size(); ++i) {
422 int idx = m_indices[i];
423 cv::Rect box = proposals.m_boxes[idx];
424 std::string classname;
425 if (nbClassNames > 0) {
426 classname = m_netConfig.m_classNames[proposals.m_classIds[idx]];
429 classname = std::to_string(proposals.m_classIds[idx]);
431 std::optional<std::string> classname_opt = std::optional<std::string>(classname);
432 output[classname].emplace_back(box.x, box.x + box.width, box.y, box.y + box.height
433 , proposals.m_classIds[idx], proposals.m_confidences[idx]
438 if (m_applySizeFilterAfterNMS) {
439 output = filterDetectionMultiClassInput(output, m_netConfig.m_filterSizeRatio);
442 return !output.empty();
452 bool vpDetectorDNNOpenCV::detect(
const cv::Mat &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
454 std::map< std::string, std::vector<DetectedFeatures2D>> map_output;
455 bool returnStatus = detect(I, map_output);
456 for (
auto key_val : map_output) {
457 output.push_back(key_val);
462 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
468 std::vector<cv::String> vpDetectorDNNOpenCV::getOutputsNames()
470 static std::vector<cv::String> names;
472 std::vector<int> outLayers = m_net.getUnconnectedOutLayers();
473 std::vector<cv::String> layersNames = m_net.getLayerNames();
474 names.resize(outLayers.size());
475 for (
size_t i = 0; i < outLayers.size(); ++i)
476 names[i] = layersNames[outLayers[i] - 1];
490 void vpDetectorDNNOpenCV::postProcess(DetectionCandidates &proposals)
492 switch (m_netConfig.m_parsingMethodType) {
495 postProcess_YoloV3_V4(proposals, m_dnnRes, m_netConfig);
499 postProcess_YoloV5_V7(proposals, m_dnnRes, m_netConfig);
502 postProcess_YoloV8(proposals, m_dnnRes, m_netConfig);
505 postProcess_FasterRCNN(proposals, m_dnnRes, m_netConfig);
508 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
509 void postProcess_SSD_MobileNet(DetectionCandidates & proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig & netConfig);
513 postProcess_ResNet_10(proposals, m_dnnRes, m_netConfig);
517 postProcess_ResNet_10(proposals, m_dnnRes, m_netConfig);
520 m_parsingMethod(proposals, m_dnnRes, m_netConfig);
527 cv::dnn::NMSBoxes(proposals.m_boxes, proposals.m_confidences, m_netConfig.m_confThreshold, m_netConfig.m_nmsThreshold, m_indices);
541 std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>
542 vpDetectorDNNOpenCV::filterDetectionSingleClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk)
545 double originalNumberOfObj =
static_cast<double>(detected_features.size());
546 double meanFactor = 1. / originalNumberOfObj;
549 for (DetectedFeatures2D feature : detected_features) {
550 meanArea += feature.m_bbox.getArea();
552 meanArea *= meanFactor;
555 std::vector<DetectedFeatures2D> filtered_features;
556 for (DetectedFeatures2D feature : detected_features) {
557 if (feature.m_bbox.getArea() >= minRatioOfAreaOk * meanArea && feature.m_bbox.getArea() < meanArea / minRatioOfAreaOk) {
558 filtered_features.push_back(feature);
562 return filtered_features;
575 std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>
576 vpDetectorDNNOpenCV::filterDetectionMultiClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk)
578 #ifndef DOXYGEN_SHOULD_SKIP_THIS
583 class MeanAreaComputer
586 std::map<int, std::pair<int, double>> m_map_id_pairOccurrencesAreas;
589 std::map<int, double> m_mapMeans;
596 double computeMeanArea(
const int &class_id)
598 return m_map_id_pairOccurrencesAreas[class_id].second / (double)m_map_id_pairOccurrencesAreas[class_id].first;
607 for (
const auto &classID_pair : m_map_id_pairOccurrencesAreas) {
608 m_mapMeans[classID_pair.first] = computeMeanArea(classID_pair.first);
612 double getMean(
const int &class_id)
614 if (m_map_id_pairOccurrencesAreas.find(class_id) == m_map_id_pairOccurrencesAreas.end()) {
615 throw(
vpException(
vpException::badValue,
"[MeanAreaComputer::getMean] Asking for class_id \"" + std::to_string(class_id) +
"\" that is not present in m_mapMeans. Did you call computeMeans ?"));
617 return m_mapMeans[class_id];
625 void operator()(
const DetectedFeatures2D &feature)
627 int class_id = feature.getClassId();
628 double area = feature.getBoundingBox().getArea();
629 if (m_map_id_pairOccurrencesAreas.find(class_id) == m_map_id_pairOccurrencesAreas.end()) {
630 m_map_id_pairOccurrencesAreas[class_id] = std::pair<int, double>(1, area);
633 std::pair<int, double> prev_state = m_map_id_pairOccurrencesAreas[class_id];
634 m_map_id_pairOccurrencesAreas[class_id] = std::pair<int, double>(prev_state.first + 1, prev_state.second + area);
641 MeanAreaComputer meanComputer;
642 std::for_each(detected_features.begin(), detected_features.end(), meanComputer);
643 meanComputer.computeMeans();
646 std::vector<DetectedFeatures2D> filtered_features;
647 for (DetectedFeatures2D feature : detected_features) {
648 double meanArea = meanComputer.getMean(feature.getClassId());
649 if (feature.m_bbox.getArea() >= minRatioOfAreaOk * meanArea
650 && feature.m_bbox.getArea() < meanArea / minRatioOfAreaOk) {
651 filtered_features.push_back(feature);
655 return filtered_features;
667 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>>
668 vpDetectorDNNOpenCV::filterDetectionMultiClassInput(
const std::map< std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> &detected_features,
const double minRatioOfAreaOk)
670 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> output;
671 for (
auto keyval : detected_features) {
672 output[keyval.first] = filterDetectionSingleClassInput(detected_features.at(keyval.first), minRatioOfAreaOk);
690 void vpDetectorDNNOpenCV::postProcess_YoloV3_V4(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
692 size_t nbBatches = dnnRes.size();
694 for (
size_t i = 0; i < nbBatches; i++) {
697 int num_proposal = dnnRes[i].size[0];
698 int nout = dnnRes[i].size[1];
699 if (dnnRes[i].dims > 2) {
700 num_proposal = dnnRes[i].size[1];
701 nout = dnnRes[i].size[2];
702 dnnRes[i] = dnnRes[i].reshape(0, num_proposal);
705 int n = 0, row_ind = 0;
706 float *pdata = (
float *)dnnRes[i].data;
709 for (n = 0; n < num_proposal; n++) {
710 float box_score = pdata[4];
711 if (box_score > netConfig.m_confThreshold) {
712 cv::Mat scores = dnnRes[i].row(row_ind).colRange(5, nout);
713 cv::Point classIdPoint;
714 double max_class_score;
716 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
718 max_class_score *= box_score;
721 if (max_class_score > netConfig.m_confThreshold) {
722 const int class_idx = classIdPoint.x;
723 float cx = pdata[0] * m_img.cols;
724 float cy = pdata[1] * m_img.rows;
725 float w = pdata[2] * m_img.cols;
726 float h = pdata[3] * m_img.rows;
728 int left = int(cx - 0.5 * w);
729 int top = int(cy - 0.5 * h);
731 proposals.m_confidences.push_back((
float)max_class_score);
732 proposals.m_boxes.push_back(cv::Rect(left, top, (
int)(w), (
int)(h)));
733 proposals.m_classIds.push_back(class_idx);
753 void vpDetectorDNNOpenCV::postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
757 float ratioh = (float)m_img.rows / netConfig.m_inputSize.height, ratiow = (
float)m_img.cols / netConfig.m_inputSize.width;
758 size_t nbBatches = dnnRes.size();
760 for (
size_t i = 0; i < nbBatches; i++) {
762 int num_proposal = dnnRes[i].size[0];
763 int nout = dnnRes[i].size[1];
764 if (dnnRes[i].dims > 2) {
765 num_proposal = dnnRes[i].size[1];
766 nout = dnnRes[i].size[2];
767 dnnRes[i] = dnnRes[i].reshape(0, num_proposal);
770 int n = 0, row_ind = 0;
771 float *pdata = (
float *)dnnRes[i].data;
774 for (n = 0; n < num_proposal; n++) {
775 float box_score = pdata[4];
777 if (box_score > netConfig.m_confThreshold) {
778 cv::Mat scores = dnnRes[i].row(row_ind).colRange(5, nout);
779 cv::Point classIdPoint;
780 double max_class_score;
782 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
783 max_class_score *= box_score;
786 if (max_class_score > netConfig.m_confThreshold) {
787 const int class_idx = classIdPoint.x;
788 float cx = pdata[0] * ratiow;
789 float cy = pdata[1] * ratioh;
790 float w = pdata[2] * ratiow;
791 float h = pdata[3] * ratioh;
793 int left = int(cx - 0.5 * w);
794 int top = int(cy - 0.5 * h);
796 proposals.m_confidences.push_back((
float)max_class_score);
797 proposals.m_boxes.push_back(cv::Rect(left, top, (
int)(w), (
int)(h)));
798 proposals.m_classIds.push_back(class_idx);
818 void vpDetectorDNNOpenCV::postProcess_YoloV8(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
823 float ratioh = (float)m_img.rows / netConfig.m_inputSize.height, ratiow = (
float)m_img.cols / netConfig.m_inputSize.width;
824 size_t nbBatches = dnnRes.size();
826 for (
size_t i = 0; i < nbBatches; i++) {
828 int num_proposal = dnnRes[i].size[1];
829 int nout = dnnRes[i].size[0];
830 if (dnnRes[i].dims > 2) {
831 num_proposal = dnnRes[i].size[2];
832 nout = dnnRes[i].size[1];
833 dnnRes[i] = dnnRes[i].reshape(0, nout);
835 cv::transpose(dnnRes[i], dnnRes[i]);
837 int n = 0, row_ind = 0;
838 float *pdata = (
float *)dnnRes[i].data;
841 for (n = 0; n < num_proposal; n++) {
842 cv::Mat scores = dnnRes[i].row(row_ind).colRange(4, nout);
843 cv::Point classIdPoint;
844 double max_class_score;
846 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
849 if (max_class_score > netConfig.m_confThreshold) {
850 const int class_idx = classIdPoint.x;
851 float cx = pdata[0] * ratiow;
852 float cy = pdata[1] * ratioh;
853 float w = pdata[2] * ratiow;
854 float h = pdata[3] * ratioh;
856 int left = int(cx - 0.5 * w);
857 int top = int(cy - 0.5 * h);
859 proposals.m_confidences.push_back((
float)max_class_score);
860 proposals.m_boxes.push_back(cv::Rect(left, top, (
int)(w), (
int)(h)));
861 proposals.m_classIds.push_back(class_idx);
881 void vpDetectorDNNOpenCV::postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
889 size_t nbBatches = dnnRes.size();
890 for (
size_t j = 0; j < nbBatches; j++) {
891 float *data = (
float *)dnnRes[j].data;
892 for (
size_t i = 0; i < dnnRes[j].total(); i += 7) {
893 float confidence = data[i + 2];
894 if (confidence > netConfig.m_confThreshold) {
895 int left = (int)(data[i + 3] * m_img.cols);
896 int top = (int)(data[i + 4] * m_img.rows);
897 int right = (int)(data[i + 5] * m_img.cols);
898 int bottom = (int)(data[i + 6] * m_img.rows);
899 int classId = (int)(data[i + 1]);
901 proposals.m_confidences.push_back((
float)confidence);
902 proposals.m_boxes.push_back(cv::Rect(left, top, right - left + 1, bottom - top + 1));
903 proposals.m_classIds.push_back(classId);
910 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
923 void vpDetectorDNNOpenCV::postProcess_SSD_MobileNet(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
930 int scores_index = m_outNames[0] ==
"scores" ? 0 : 1;
931 int boxes_index = m_outNames[0] ==
"boxes" ? 0 : 1;
933 int N = dnnRes[scores_index].size[1], C = dnnRes[scores_index].size[2];
935 float *confidence = (
float *)dnnRes[scores_index].data;
936 float *bbox = (
float *)dnnRes[boxes_index].data;
939 for (
int i = 0; i < N; i++) {
940 uint32_t maxClass = 0;
941 float maxScore = -1000.0f;
943 for (
int j = 1; j < C; j++)
945 const float score = confidence[i * C + j];
947 if (score < netConfig.m_confThreshold)
950 if (score > maxScore) {
956 if (maxScore > netConfig.m_confThreshold) {
957 int left = (int)(bbox[4 * i] * m_img.cols);
958 int top = (int)(bbox[4 * i + 1] * m_img.rows);
959 int right = (int)(bbox[4 * i + 2] * m_img.cols);
960 int bottom = (int)(bbox[4 * i + 3] * m_img.rows);
961 int width = right - left + 1;
962 int height = bottom - top + 1;
964 int classId = maxClass;
965 proposals.m_confidences.push_back(maxScore);
966 proposals.m_boxes.push_back(cv::Rect(left, top, width, height));
967 proposals.m_classIds.push_back(classId);
984 void vpDetectorDNNOpenCV::postProcess_ResNet_10(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
991 CV_Assert(dnnRes.size() == 1);
992 float *data = (
float *)dnnRes[0].data;
993 for (
size_t i = 0; i < dnnRes[0].total(); i += 7) {
994 float confidence = data[i + 2];
995 if (confidence > netConfig.m_confThreshold) {
996 int left = (int)(data[i + 3] * m_img.cols);
997 int top = (int)(data[i + 4] * m_img.rows);
998 int right = (int)(data[i + 5] * m_img.cols);
999 int bottom = (int)(data[i + 6] * m_img.rows);
1000 int classId = (int)(data[i + 1]) - 1;
1002 proposals.m_confidences.push_back((
float)confidence);
1003 proposals.m_boxes.push_back(cv::Rect(left, top, right - left + 1, bottom - top + 1));
1004 proposals.m_classIds.push_back(classId);
1017 void vpDetectorDNNOpenCV::postProcess_unimplemented(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
1044 void vpDetectorDNNOpenCV::readNet(
const std::string &model,
const std::string &config,
const std::string &framework)
1046 m_netConfig.m_modelFilename = model;
1047 m_netConfig.m_modelConfigFilename = config;
1048 m_netConfig.m_framework = framework;
1049 m_net = cv::dnn::readNet(model, config, framework);
1050 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
1051 m_outNames = getOutputsNames();
1053 m_outNames = m_net.getUnconnectedOutLayersNames();
1063 void vpDetectorDNNOpenCV::setNetConfig(
const NetConfig &config)
1065 m_netConfig = config;
1066 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
1067 setParsingMethod(m_netConfig.m_parsingMethodType);
1068 if (!m_netConfig.m_modelFilename.empty()) {
1069 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
1078 void vpDetectorDNNOpenCV::setConfidenceThreshold(
const float &confThreshold) { m_netConfig.m_confThreshold = confThreshold; }
1086 void vpDetectorDNNOpenCV::setNMSThreshold(
const float &nmsThreshold) { m_netConfig.m_nmsThreshold = nmsThreshold; }
1095 void vpDetectorDNNOpenCV::setDetectionFilterSizeRatio(
const double &sizeRatio)
1097 m_netConfig.m_filterSizeRatio = sizeRatio;
1098 if (m_netConfig.m_filterSizeRatio > std::numeric_limits<double>::epsilon()) {
1099 m_applySizeFilterAfterNMS =
true;
1102 m_applySizeFilterAfterNMS =
false;
1112 void vpDetectorDNNOpenCV::setInputSize(
const int &width,
const int &height)
1114 m_netConfig.m_inputSize.width = width;
1115 m_netConfig.m_inputSize.height = height;
1125 void vpDetectorDNNOpenCV::setMean(
const double &meanR,
const double &meanG,
const double &meanB) { m_netConfig.m_mean = cv::Scalar(meanR, meanG, meanB); }
1133 void vpDetectorDNNOpenCV::setPreferableBackend(
const int &backendId) { m_net.setPreferableBackend(backendId); }
1141 void vpDetectorDNNOpenCV::setPreferableTarget(
const int &targetId) { m_net.setPreferableTarget(targetId); }
1146 void vpDetectorDNNOpenCV::setScaleFactor(
const double &scaleFactor)
1148 m_netConfig.m_scaleFactor = scaleFactor;
1149 if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) {
1150 std::cout <<
"[vpDetectorDNNOpenCV::setParsingMethod] WARNING: scale factor should be 1/255. to normalize pixels value." << std::endl;
1159 void vpDetectorDNNOpenCV::setSwapRB(
const bool &swapRB) { m_netConfig.m_swapRB = swapRB; }
1168 void vpDetectorDNNOpenCV::setParsingMethod(
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &))
1170 m_netConfig.m_parsingMethodType = typeParsingMethod;
1171 m_parsingMethod = parsingMethod;
1172 if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) {
1173 m_netConfig.m_scaleFactor = 1 / 255.;
1174 std::cout <<
"[vpDetectorDNNOpenCV::setParsingMethod] NB: scale factor changed to 1/255. to normalize pixels value." << std::endl;
1177 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
1178 if (m_netConfig.m_parsingMethodType == SSD_MOBILENET) {
1179 std::cout <<
"[vpDetectorDNNOpenCV::setParsingMethod] WARNING: The chosen type of network is " << dnnResultsParsingTypeToString(m_netConfig.m_parsingMethodType) <<
" VISP_BUILD_DEPRECATED_FUNCTIONS is set to true." << std::endl;
1180 std::cout <<
"\tThe parsing method that worked with the networks quoted in the ViSP documentation was postProcess_ResNet_10 instead of postProcess_SSD_MobileNet." << std::endl;
1181 std::cout <<
"\tIf the SSD-MobileNet network does not seem to work, please try to recompile ViSP setting VISP_BUILD_DEPRECATED_FUNCTIONS as false." << std::endl << std::flush;
1187 #elif !defined(VISP_BUILD_SHARED_LIBS)
1189 void dummy_vpDetectorDNN() { };
error that can be emitted by ViSP classes.
@ badValue
Used to indicate that a value is not in the allowed range.
@ functionNotImplementedError
Function not implemented.
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)