35 #include <visp3/core/vpConfig.h>
38 #if (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(HAVE_OPENCV_DNN) && \
39 ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
41 #include <visp3/core/vpImageConvert.h>
42 #include <visp3/detection/vpDetectorDNNOpenCV.h>
43 #include <visp3/core/vpIoTools.h>
51 std::string vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes()
53 std::string list =
"[";
54 for (
unsigned int i = 0; i < vpDetectorDNNOpenCV::COUNT - 1; i++) {
55 list +=
"\"" + dnnResultsParsingTypeToString((vpDetectorDNNOpenCV::DNNResultsParsingType)i) +
"\", ";
57 list +=
"\"" + dnnResultsParsingTypeToString((vpDetectorDNNOpenCV::DNNResultsParsingType)(vpDetectorDNNOpenCV::COUNT - 1)) +
"\"]";
70 std::string vpDetectorDNNOpenCV::dnnResultsParsingTypeToString(
const DNNResultsParsingType &type)
93 name =
"ssd-mobilenet";
99 name =
"user-specified";
116 vpDetectorDNNOpenCV::DNNResultsParsingType vpDetectorDNNOpenCV::dnnResultsParsingTypeFromString(
const std::string &name)
118 vpDetectorDNNOpenCV::DNNResultsParsingType res(COUNT);
119 bool hasFoundMatch =
false;
121 for (
int id = 0;
id < COUNT && !hasFoundMatch;
id++) {
122 vpDetectorDNNOpenCV::DNNResultsParsingType temp = (vpDetectorDNNOpenCV::DNNResultsParsingType)
id;
123 if (dnnResultsParsingTypeToString(temp) == name_lowercase) {
125 hasFoundMatch =
true;
141 std::vector<std::string> vpDetectorDNNOpenCV::parseClassNamesFile(
const std::string &filename)
143 return NetConfig::parseClassNamesFile(filename);
146 vpDetectorDNNOpenCV::vpDetectorDNNOpenCV()
147 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
148 m_net(), m_netConfig(), m_outNames(), m_dnnRes(),
149 m_parsingMethod(vpDetectorDNNOpenCV::postProcess_unimplemented)
151 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
161 vpDetectorDNNOpenCV::vpDetectorDNNOpenCV(
const NetConfig &config,
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &))
162 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
163 m_net(), m_netConfig(config), m_outNames(), m_dnnRes()
165 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
166 setParsingMethod(typeParsingMethod, parsingMethod);
167 if (!m_netConfig.m_modelFilename.empty()) {
168 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
172 #ifdef VISP_HAVE_NLOHMANN_JSON
174 using json = nlohmann::json;
182 vpDetectorDNNOpenCV::vpDetectorDNNOpenCV(
const std::string &jsonPath,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &))
183 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
184 m_net(), m_netConfig(), m_outNames(), m_dnnRes()
186 initFromJSON(jsonPath);
187 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
188 setParsingMethod(m_netConfig.m_parsingMethodType, parsingMethod);
196 void vpDetectorDNNOpenCV::initFromJSON(
const std::string &jsonPath)
198 std::ifstream file(jsonPath);
200 std::stringstream ss;
201 ss <<
"Problem opening file " << jsonPath <<
". Make sure it exists and is readable" << std::endl;
206 j = json::parse(file);
208 catch (json::parse_error &e) {
209 std::stringstream msg;
210 msg <<
"Could not parse JSON file : \n";
212 msg << e.what() << std::endl;
213 msg <<
"Byte position of error: " << e.byte;
218 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
226 void vpDetectorDNNOpenCV::saveConfigurationInJSON(
const std::string &jsonPath)
const
228 std::ofstream file(jsonPath);
229 const json j = *
this;
238 vpDetectorDNNOpenCV::~vpDetectorDNNOpenCV() { }
249 bool vpDetectorDNNOpenCV::detect(
const vpImage<unsigned char> &I, std::vector<DetectedFeatures2D> &output)
253 return detect(m_I_color, output);
265 bool vpDetectorDNNOpenCV::detect(
const vpImage<unsigned char> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
269 return detect(m_I_color, output);
281 bool vpDetectorDNNOpenCV::detect(
const vpImage<unsigned char> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
285 return detect(m_I_color, output);
297 bool vpDetectorDNNOpenCV::detect(
const vpImage<vpRGBa> &I, std::vector<DetectedFeatures2D> &output)
301 return detect(m_img, output);
313 bool vpDetectorDNNOpenCV::detect(
const vpImage<vpRGBa> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
317 return detect(m_img, output);
327 bool vpDetectorDNNOpenCV::detect(
const vpImage<vpRGBa> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
331 return detect(m_img, output);
341 bool vpDetectorDNNOpenCV::detect(
const cv::Mat &I, std::vector<DetectedFeatures2D> &output)
346 cv::Size inputSize(m_netConfig.m_inputSize.width > 0 ? m_netConfig.m_inputSize.width : m_img.cols,
347 m_netConfig.m_inputSize.height > 0 ? m_netConfig.m_inputSize.height : m_img.rows);
348 cv::dnn::blobFromImage(m_img, m_blob, m_netConfig.m_scaleFactor, inputSize, m_netConfig.m_mean, m_netConfig.m_swapRB,
false);
350 m_net.setInput(m_blob);
352 m_net.forward(m_dnnRes, m_outNames);
354 catch (
const cv::Exception &e) {
355 std::cerr <<
"Caught an exception trying to run inference:" << std::endl <<
"\t"
357 <<
"\nCuda and/or GPU driver might not be correctly installed. Setting preferable backend to CPU and trying again." << std::endl;
358 m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
359 m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
360 m_net.forward(m_dnnRes, m_outNames);
363 DetectionCandidates proposals;
364 postProcess(proposals);
365 size_t nbClassNames = m_netConfig.m_classNames.size();
366 for (
size_t i = 0; i < m_indices.size(); ++i) {
367 int idx = m_indices[i];
368 cv::Rect box = proposals.m_boxes[idx];
369 std::optional<std::string> classname_opt;
370 if (nbClassNames > 0) {
371 classname_opt = m_netConfig.m_classNames[proposals.m_classIds[idx]];
373 output.emplace_back(box.x, box.x + box.width, box.y, box.y + box.height
374 , proposals.m_classIds[idx], proposals.m_confidences[idx]
379 if (m_applySizeFilterAfterNMS) {
381 output = filterDetectionMultiClassInput(output, m_netConfig.m_filterSizeRatio);
384 return !output.empty();
394 bool vpDetectorDNNOpenCV::detect(
const cv::Mat &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
399 cv::Size inputSize(m_netConfig.m_inputSize.width > 0 ? m_netConfig.m_inputSize.width : m_img.cols,
400 m_netConfig.m_inputSize.height > 0 ? m_netConfig.m_inputSize.height : m_img.rows);
401 cv::dnn::blobFromImage(m_img, m_blob, m_netConfig.m_scaleFactor, inputSize, m_netConfig.m_mean, m_netConfig.m_swapRB,
false);
403 m_net.setInput(m_blob);
405 m_net.forward(m_dnnRes, m_outNames);
407 catch (
const cv::Exception &e) {
408 std::cerr <<
"Caught an exception trying to run inference:" << std::endl <<
"\t"
410 <<
"\nCuda and/or GPU driver might not be correctly installed. Setting preferable backend to CPU and trying again." << std::endl;
411 m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
412 m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
413 m_net.forward(m_dnnRes, m_outNames);
416 DetectionCandidates proposals;
417 postProcess(proposals);
418 size_t nbClassNames = m_netConfig.m_classNames.size();
419 for (
size_t i = 0; i < m_indices.size(); ++i) {
420 int idx = m_indices[i];
421 cv::Rect box = proposals.m_boxes[idx];
422 std::string classname;
423 if (nbClassNames > 0) {
424 classname = m_netConfig.m_classNames[proposals.m_classIds[idx]];
427 classname = std::to_string(proposals.m_classIds[idx]);
429 std::optional<std::string> classname_opt = std::optional<std::string>(classname);
430 output[classname].emplace_back(box.x, box.x + box.width, box.y, box.y + box.height
431 , proposals.m_classIds[idx], proposals.m_confidences[idx]
436 if (m_applySizeFilterAfterNMS) {
437 output = filterDetectionMultiClassInput(output, m_netConfig.m_filterSizeRatio);
440 return !output.empty();
450 bool vpDetectorDNNOpenCV::detect(
const cv::Mat &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
452 std::map< std::string, std::vector<DetectedFeatures2D>> map_output;
453 bool returnStatus = detect(I, map_output);
454 for (
auto key_val : map_output) {
455 output.push_back(key_val);
460 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
466 std::vector<cv::String> vpDetectorDNNOpenCV::getOutputsNames()
468 static std::vector<cv::String> names;
470 std::vector<int> outLayers = m_net.getUnconnectedOutLayers();
471 std::vector<cv::String> layersNames = m_net.getLayerNames();
472 names.resize(outLayers.size());
473 for (
size_t i = 0; i < outLayers.size(); ++i)
474 names[i] = layersNames[outLayers[i] - 1];
488 void vpDetectorDNNOpenCV::postProcess(DetectionCandidates &proposals)
490 switch (m_netConfig.m_parsingMethodType) {
493 postProcess_YoloV3_V4(proposals, m_dnnRes, m_netConfig);
497 postProcess_YoloV5_V7(proposals, m_dnnRes, m_netConfig);
500 postProcess_YoloV8(proposals, m_dnnRes, m_netConfig);
503 postProcess_FasterRCNN(proposals, m_dnnRes, m_netConfig);
506 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
507 void postProcess_SSD_MobileNet(DetectionCandidates & proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig & netConfig);
511 postProcess_ResNet_10(proposals, m_dnnRes, m_netConfig);
515 postProcess_ResNet_10(proposals, m_dnnRes, m_netConfig);
518 m_parsingMethod(proposals, m_dnnRes, m_netConfig);
525 cv::dnn::NMSBoxes(proposals.m_boxes, proposals.m_confidences, m_netConfig.m_confThreshold, m_netConfig.m_nmsThreshold, m_indices);
539 std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>
540 vpDetectorDNNOpenCV::filterDetectionSingleClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk)
543 double originalNumberOfObj =
static_cast<double>(detected_features.size());
544 double meanFactor = 1. / originalNumberOfObj;
547 for (DetectedFeatures2D feature : detected_features) {
548 meanArea += feature.m_bbox.getArea();
550 meanArea *= meanFactor;
553 std::vector<DetectedFeatures2D> filtered_features;
554 for (DetectedFeatures2D feature : detected_features) {
555 if (feature.m_bbox.getArea() >= minRatioOfAreaOk * meanArea && feature.m_bbox.getArea() < meanArea / minRatioOfAreaOk) {
556 filtered_features.push_back(feature);
560 return filtered_features;
573 std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>
574 vpDetectorDNNOpenCV::filterDetectionMultiClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk)
576 #ifndef DOXYGEN_SHOULD_SKIP_THIS
581 class MeanAreaComputer
584 std::map<int, std::pair<int, double>> m_map_id_pairOccurrencesAreas;
587 std::map<int, double> m_mapMeans;
594 double computeMeanArea(
const int &class_id)
596 return m_map_id_pairOccurrencesAreas[class_id].second / (double)m_map_id_pairOccurrencesAreas[class_id].first;
605 for (
const auto &classID_pair : m_map_id_pairOccurrencesAreas) {
606 m_mapMeans[classID_pair.first] = computeMeanArea(classID_pair.first);
610 double getMean(
const int &class_id)
612 if (m_map_id_pairOccurrencesAreas.find(class_id) == m_map_id_pairOccurrencesAreas.end()) {
613 throw(
vpException(
vpException::badValue,
"[MeanAreaComputer::getMean] Asking for class_id \"" + std::to_string(class_id) +
"\" that is not present in m_mapMeans. Did you call computeMeans ?"));
615 return m_mapMeans[class_id];
623 void operator()(
const DetectedFeatures2D &feature)
625 int class_id = feature.getClassId();
626 double area = feature.getBoundingBox().getArea();
627 if (m_map_id_pairOccurrencesAreas.find(class_id) == m_map_id_pairOccurrencesAreas.end()) {
628 m_map_id_pairOccurrencesAreas[class_id] = std::pair<int, double>(1, area);
631 std::pair<int, double> prev_state = m_map_id_pairOccurrencesAreas[class_id];
632 m_map_id_pairOccurrencesAreas[class_id] = std::pair<int, double>(prev_state.first + 1, prev_state.second + area);
639 MeanAreaComputer meanComputer;
640 std::for_each(detected_features.begin(), detected_features.end(), meanComputer);
641 meanComputer.computeMeans();
644 std::vector<DetectedFeatures2D> filtered_features;
645 for (DetectedFeatures2D feature : detected_features) {
646 double meanArea = meanComputer.getMean(feature.getClassId());
647 if (feature.m_bbox.getArea() >= minRatioOfAreaOk * meanArea
648 && feature.m_bbox.getArea() < meanArea / minRatioOfAreaOk) {
649 filtered_features.push_back(feature);
653 return filtered_features;
665 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>>
666 vpDetectorDNNOpenCV::filterDetectionMultiClassInput(
const std::map< std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> &detected_features,
const double minRatioOfAreaOk)
668 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> output;
669 for (
auto keyval : detected_features) {
670 output[keyval.first] = filterDetectionSingleClassInput(detected_features.at(keyval.first), minRatioOfAreaOk);
688 void vpDetectorDNNOpenCV::postProcess_YoloV3_V4(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
690 size_t nbBatches = dnnRes.size();
692 for (
size_t i = 0; i < nbBatches; i++) {
695 int num_proposal = dnnRes[i].size[0];
696 int nout = dnnRes[i].size[1];
697 if (dnnRes[i].dims > 2) {
698 num_proposal = dnnRes[i].size[1];
699 nout = dnnRes[i].size[2];
700 dnnRes[i] = dnnRes[i].reshape(0, num_proposal);
703 int n = 0, row_ind = 0;
704 float *pdata = (
float *)dnnRes[i].data;
707 for (n = 0; n < num_proposal; n++) {
708 float box_score = pdata[4];
709 if (box_score > netConfig.m_confThreshold) {
710 cv::Mat scores = dnnRes[i].row(row_ind).colRange(5, nout);
711 cv::Point classIdPoint;
712 double max_class_score;
714 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
716 max_class_score *= box_score;
719 if (max_class_score > netConfig.m_confThreshold) {
720 const int class_idx = classIdPoint.x;
721 float cx = pdata[0] * m_img.cols;
722 float cy = pdata[1] * m_img.rows;
723 float w = pdata[2] * m_img.cols;
724 float h = pdata[3] * m_img.rows;
726 int left = int(cx - 0.5 * w);
727 int top = int(cy - 0.5 * h);
729 proposals.m_confidences.push_back((
float)max_class_score);
730 proposals.m_boxes.push_back(cv::Rect(left, top, (
int)(w), (
int)(h)));
731 proposals.m_classIds.push_back(class_idx);
751 void vpDetectorDNNOpenCV::postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
755 float ratioh = (float)m_img.rows / netConfig.m_inputSize.height, ratiow = (
float)m_img.cols / netConfig.m_inputSize.width;
756 size_t nbBatches = dnnRes.size();
758 for (
size_t i = 0; i < nbBatches; i++) {
760 int num_proposal = dnnRes[i].size[0];
761 int nout = dnnRes[i].size[1];
762 if (dnnRes[i].dims > 2) {
763 num_proposal = dnnRes[i].size[1];
764 nout = dnnRes[i].size[2];
765 dnnRes[i] = dnnRes[i].reshape(0, num_proposal);
768 int n = 0, row_ind = 0;
769 float *pdata = (
float *)dnnRes[i].data;
772 for (n = 0; n < num_proposal; n++) {
773 float box_score = pdata[4];
775 if (box_score > netConfig.m_confThreshold) {
776 cv::Mat scores = dnnRes[i].row(row_ind).colRange(5, nout);
777 cv::Point classIdPoint;
778 double max_class_score;
780 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
781 max_class_score *= box_score;
784 if (max_class_score > netConfig.m_confThreshold) {
785 const int class_idx = classIdPoint.x;
786 float cx = pdata[0] * ratiow;
787 float cy = pdata[1] * ratioh;
788 float w = pdata[2] * ratiow;
789 float h = pdata[3] * ratioh;
791 int left = int(cx - 0.5 * w);
792 int top = int(cy - 0.5 * h);
794 proposals.m_confidences.push_back((
float)max_class_score);
795 proposals.m_boxes.push_back(cv::Rect(left, top, (
int)(w), (
int)(h)));
796 proposals.m_classIds.push_back(class_idx);
816 void vpDetectorDNNOpenCV::postProcess_YoloV8(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
821 float ratioh = (float)m_img.rows / netConfig.m_inputSize.height, ratiow = (
float)m_img.cols / netConfig.m_inputSize.width;
822 size_t nbBatches = dnnRes.size();
824 for (
size_t i = 0; i < nbBatches; i++) {
826 int num_proposal = dnnRes[i].size[1];
827 int nout = dnnRes[i].size[0];
828 if (dnnRes[i].dims > 2) {
829 num_proposal = dnnRes[i].size[2];
830 nout = dnnRes[i].size[1];
831 dnnRes[i] = dnnRes[i].reshape(0, nout);
833 cv::transpose(dnnRes[i], dnnRes[i]);
835 int n = 0, row_ind = 0;
836 float *pdata = (
float *)dnnRes[i].data;
839 for (n = 0; n < num_proposal; n++) {
840 cv::Mat scores = dnnRes[i].row(row_ind).colRange(4, nout);
841 cv::Point classIdPoint;
842 double max_class_score;
844 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
847 if (max_class_score > netConfig.m_confThreshold) {
848 const int class_idx = classIdPoint.x;
849 float cx = pdata[0] * ratiow;
850 float cy = pdata[1] * ratioh;
851 float w = pdata[2] * ratiow;
852 float h = pdata[3] * ratioh;
854 int left = int(cx - 0.5 * w);
855 int top = int(cy - 0.5 * h);
857 proposals.m_confidences.push_back((
float)max_class_score);
858 proposals.m_boxes.push_back(cv::Rect(left, top, (
int)(w), (
int)(h)));
859 proposals.m_classIds.push_back(class_idx);
879 void vpDetectorDNNOpenCV::postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
887 size_t nbBatches = dnnRes.size();
888 for (
size_t j = 0; j < nbBatches; j++) {
889 float *data = (
float *)dnnRes[j].data;
890 for (
size_t i = 0; i < dnnRes[j].total(); i += 7) {
891 float confidence = data[i + 2];
892 if (confidence > netConfig.m_confThreshold) {
893 int left = (int)(data[i + 3] * m_img.cols);
894 int top = (int)(data[i + 4] * m_img.rows);
895 int right = (int)(data[i + 5] * m_img.cols);
896 int bottom = (int)(data[i + 6] * m_img.rows);
897 int classId = (int)(data[i + 1]);
899 proposals.m_confidences.push_back((
float)confidence);
900 proposals.m_boxes.push_back(cv::Rect(left, top, right - left + 1, bottom - top + 1));
901 proposals.m_classIds.push_back(classId);
908 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
921 void vpDetectorDNNOpenCV::postProcess_SSD_MobileNet(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
928 int scores_index = m_outNames[0] ==
"scores" ? 0 : 1;
929 int boxes_index = m_outNames[0] ==
"boxes" ? 0 : 1;
931 int N = dnnRes[scores_index].size[1], C = dnnRes[scores_index].size[2];
933 float *confidence = (
float *)dnnRes[scores_index].data;
934 float *bbox = (
float *)dnnRes[boxes_index].data;
937 for (
int i = 0; i < N; i++) {
938 uint32_t maxClass = 0;
939 float maxScore = -1000.0f;
941 for (
int j = 1; j < C; j++)
943 const float score = confidence[i * C + j];
945 if (score < netConfig.m_confThreshold)
948 if (score > maxScore) {
954 if (maxScore > netConfig.m_confThreshold) {
955 int left = (int)(bbox[4 * i] * m_img.cols);
956 int top = (int)(bbox[4 * i + 1] * m_img.rows);
957 int right = (int)(bbox[4 * i + 2] * m_img.cols);
958 int bottom = (int)(bbox[4 * i + 3] * m_img.rows);
959 int width = right - left + 1;
960 int height = bottom - top + 1;
962 int classId = maxClass;
963 proposals.m_confidences.push_back(maxScore);
964 proposals.m_boxes.push_back(cv::Rect(left, top, width, height));
965 proposals.m_classIds.push_back(classId);
982 void vpDetectorDNNOpenCV::postProcess_ResNet_10(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
989 CV_Assert(dnnRes.size() == 1);
990 float *data = (
float *)dnnRes[0].data;
991 for (
size_t i = 0; i < dnnRes[0].total(); i += 7) {
992 float confidence = data[i + 2];
993 if (confidence > netConfig.m_confThreshold) {
994 int left = (int)(data[i + 3] * m_img.cols);
995 int top = (int)(data[i + 4] * m_img.rows);
996 int right = (int)(data[i + 5] * m_img.cols);
997 int bottom = (int)(data[i + 6] * m_img.rows);
998 int classId = (int)(data[i + 1]) - 1;
1000 proposals.m_confidences.push_back((
float)confidence);
1001 proposals.m_boxes.push_back(cv::Rect(left, top, right - left + 1, bottom - top + 1));
1002 proposals.m_classIds.push_back(classId);
1015 void vpDetectorDNNOpenCV::postProcess_unimplemented(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
1042 void vpDetectorDNNOpenCV::readNet(
const std::string &model,
const std::string &config,
const std::string &framework)
1044 m_netConfig.m_modelFilename = model;
1045 m_netConfig.m_modelConfigFilename = config;
1046 m_netConfig.m_framework = framework;
1047 m_net = cv::dnn::readNet(model, config, framework);
1048 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
1049 m_outNames = getOutputsNames();
1051 m_outNames = m_net.getUnconnectedOutLayersNames();
1061 void vpDetectorDNNOpenCV::setNetConfig(
const NetConfig &config)
1063 m_netConfig = config;
1064 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
1065 setParsingMethod(m_netConfig.m_parsingMethodType);
1066 if (!m_netConfig.m_modelFilename.empty()) {
1067 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
1076 void vpDetectorDNNOpenCV::setConfidenceThreshold(
const float &confThreshold) { m_netConfig.m_confThreshold = confThreshold; }
1084 void vpDetectorDNNOpenCV::setNMSThreshold(
const float &nmsThreshold) { m_netConfig.m_nmsThreshold = nmsThreshold; }
1093 void vpDetectorDNNOpenCV::setDetectionFilterSizeRatio(
const double &sizeRatio)
1095 m_netConfig.m_filterSizeRatio = sizeRatio;
1096 if (m_netConfig.m_filterSizeRatio > std::numeric_limits<double>::epsilon()) {
1097 m_applySizeFilterAfterNMS =
true;
1100 m_applySizeFilterAfterNMS =
false;
1110 void vpDetectorDNNOpenCV::setInputSize(
const int &width,
const int &height)
1112 m_netConfig.m_inputSize.width = width;
1113 m_netConfig.m_inputSize.height = height;
1123 void vpDetectorDNNOpenCV::setMean(
const double &meanR,
const double &meanG,
const double &meanB) { m_netConfig.m_mean = cv::Scalar(meanR, meanG, meanB); }
1131 void vpDetectorDNNOpenCV::setPreferableBackend(
const int &backendId) { m_net.setPreferableBackend(backendId); }
1139 void vpDetectorDNNOpenCV::setPreferableTarget(
const int &targetId) { m_net.setPreferableTarget(targetId); }
1144 void vpDetectorDNNOpenCV::setScaleFactor(
const double &scaleFactor)
1146 m_netConfig.m_scaleFactor = scaleFactor;
1147 if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) {
1148 std::cout <<
"[vpDetectorDNNOpenCV::setParsingMethod] WARNING: scale factor should be 1/255. to normalize pixels value." << std::endl;
1157 void vpDetectorDNNOpenCV::setSwapRB(
const bool &swapRB) { m_netConfig.m_swapRB = swapRB; }
1166 void vpDetectorDNNOpenCV::setParsingMethod(
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &))
1168 m_netConfig.m_parsingMethodType = typeParsingMethod;
1169 m_parsingMethod = parsingMethod;
1170 if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8) && m_netConfig.m_scaleFactor != 1 / 255.) {
1171 m_netConfig.m_scaleFactor = 1 / 255.;
1172 std::cout <<
"[vpDetectorDNNOpenCV::setParsingMethod] NB: scale factor changed to 1/255. to normalize pixels value." << std::endl;
1175 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
1176 if (m_netConfig.m_parsingMethodType == SSD_MOBILENET) {
1177 std::cout <<
"[vpDetectorDNNOpenCV::setParsingMethod] WARNING: The chosen type of network is " << dnnResultsParsingTypeToString(m_netConfig.m_parsingMethodType) <<
" VISP_BUILD_DEPRECATED_FUNCTIONS is set to true." << std::endl;
1178 std::cout <<
"\tThe parsing method that worked with the networks quoted in the ViSP documentation was postProcess_ResNet_10 instead of postProcess_SSD_MobileNet." << std::endl;
1179 std::cout <<
"\tIf the SSD-MobileNet network does not seem to work, please try to recompile ViSP setting VISP_BUILD_DEPRECATED_FUNCTIONS as false." << std::endl << std::flush;
1184 #elif !defined(VISP_BUILD_SHARED_LIBS)
1186 void dummy_vpDetectorDNN() { };
error that can be emitted by ViSP classes.
@ badValue
Used to indicate that a value is not in the allowed range.
@ functionNotImplementedError
Function not implemented.
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)