34 #include <visp3/core/vpConfig.h>
37 #if (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(HAVE_OPENCV_DNN) && \
38 ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
40 #include <visp3/core/vpImageConvert.h>
41 #include <visp3/detection/vpDetectorDNNOpenCV.h>
42 #include <visp3/core/vpIoTools.h>
52 std::string vpDetectorDNNOpenCV::getAvailableDnnResultsParsingTypes()
54 std::string list =
"[";
55 for (
unsigned int i = 0; i < vpDetectorDNNOpenCV::COUNT - 1; i++) {
56 list +=
"\"" + dnnResultsParsingTypeToString((vpDetectorDNNOpenCV::DNNResultsParsingType)i) +
"\", ";
58 list +=
"\"" + dnnResultsParsingTypeToString((vpDetectorDNNOpenCV::DNNResultsParsingType)(vpDetectorDNNOpenCV::COUNT - 1)) +
"\"]";
71 std::string vpDetectorDNNOpenCV::dnnResultsParsingTypeToString(
const DNNResultsParsingType &type)
97 name =
"ssd-mobilenet";
103 name =
"user-specified";
120 vpDetectorDNNOpenCV::DNNResultsParsingType vpDetectorDNNOpenCV::dnnResultsParsingTypeFromString(
const std::string &name)
122 vpDetectorDNNOpenCV::DNNResultsParsingType res(COUNT);
123 bool hasFoundMatch =
false;
125 for (
int id = 0;
id < COUNT && !hasFoundMatch;
id++) {
126 vpDetectorDNNOpenCV::DNNResultsParsingType temp = (vpDetectorDNNOpenCV::DNNResultsParsingType)
id;
127 if (dnnResultsParsingTypeToString(temp) == name_lowercase) {
129 hasFoundMatch =
true;
145 std::vector<std::string> vpDetectorDNNOpenCV::parseClassNamesFile(
const std::string &filename)
147 return NetConfig::parseClassNamesFile(filename);
150 vpDetectorDNNOpenCV::vpDetectorDNNOpenCV()
151 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
152 m_net(), m_netConfig(), m_outNames(), m_dnnRes(),
153 m_parsingMethod(vpDetectorDNNOpenCV::postProcess_unimplemented)
155 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
165 vpDetectorDNNOpenCV::vpDetectorDNNOpenCV(
const NetConfig &config,
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &))
166 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
167 m_net(), m_netConfig(config), m_outNames(), m_dnnRes()
169 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
170 setParsingMethod(typeParsingMethod, parsingMethod);
171 if (!m_netConfig.m_modelFilename.empty()) {
172 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
176 #ifdef VISP_HAVE_NLOHMANN_JSON
178 using json = nlohmann::json;
186 vpDetectorDNNOpenCV::vpDetectorDNNOpenCV(
const std::string &jsonPath,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &))
187 : m_applySizeFilterAfterNMS(false), m_blob(), m_I_color(), m_img(),
188 m_net(), m_netConfig(), m_outNames(), m_dnnRes()
190 initFromJSON(jsonPath);
191 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
192 setParsingMethod(m_netConfig.m_parsingMethodType, parsingMethod);
200 void vpDetectorDNNOpenCV::initFromJSON(
const std::string &jsonPath)
202 std::ifstream file(jsonPath);
204 std::stringstream ss;
205 ss <<
"Problem opening file " << jsonPath <<
". Make sure it exists and is readable" << std::endl;
210 j = json::parse(file);
212 catch (json::parse_error &e) {
213 std::stringstream msg;
214 msg <<
"Could not parse JSON file : \n";
216 msg << e.what() << std::endl;
217 msg <<
"Byte position of error: " << e.byte;
222 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
230 void vpDetectorDNNOpenCV::saveConfigurationInJSON(
const std::string &jsonPath)
const
232 std::ofstream file(jsonPath);
233 const json j = *
this;
242 vpDetectorDNNOpenCV::~vpDetectorDNNOpenCV() { }
253 bool vpDetectorDNNOpenCV::detect(
const vpImage<unsigned char> &I, std::vector<DetectedFeatures2D> &output)
257 return detect(m_I_color, output);
269 bool vpDetectorDNNOpenCV::detect(
const vpImage<unsigned char> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
273 return detect(m_I_color, output);
285 bool vpDetectorDNNOpenCV::detect(
const vpImage<unsigned char> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
289 return detect(m_I_color, output);
301 bool vpDetectorDNNOpenCV::detect(
const vpImage<vpRGBa> &I, std::vector<DetectedFeatures2D> &output)
305 return detect(m_img, output);
317 bool vpDetectorDNNOpenCV::detect(
const vpImage<vpRGBa> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
321 return detect(m_img, output);
331 bool vpDetectorDNNOpenCV::detect(
const vpImage<vpRGBa> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
335 return detect(m_img, output);
345 bool vpDetectorDNNOpenCV::detect(
const cv::Mat &I, std::vector<DetectedFeatures2D> &output)
350 cv::Size inputSize(m_netConfig.m_inputSize.width > 0 ? m_netConfig.m_inputSize.width : m_img.cols,
351 m_netConfig.m_inputSize.height > 0 ? m_netConfig.m_inputSize.height : m_img.rows);
352 cv::dnn::blobFromImage(m_img, m_blob, m_netConfig.m_scaleFactor, inputSize, m_netConfig.m_mean, m_netConfig.m_swapRB,
false);
354 m_net.setInput(m_blob);
356 m_net.forward(m_dnnRes, m_outNames);
358 catch (
const cv::Exception &e) {
359 std::cerr <<
"Caught an exception trying to run inference:" << std::endl <<
"\t"
361 <<
"\nCuda and/or GPU driver might not be correctly installed. Setting preferable backend to CPU and trying again." << std::endl;
362 m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
363 m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
364 m_net.forward(m_dnnRes, m_outNames);
367 DetectionCandidates proposals;
368 postProcess(proposals);
369 size_t nbClassNames = m_netConfig.m_classNames.size();
370 for (
size_t i = 0; i < m_indices.size(); ++i) {
371 int idx = m_indices[i];
372 cv::Rect box = proposals.m_boxes[idx];
373 std::optional<std::string> classname_opt;
374 if (nbClassNames > 0) {
375 classname_opt = m_netConfig.m_classNames[proposals.m_classIds[idx]];
377 output.emplace_back(box.x, box.x + box.width, box.y, box.y + box.height
378 , proposals.m_classIds[idx], proposals.m_confidences[idx]
383 if (m_applySizeFilterAfterNMS) {
385 output = filterDetectionMultiClassInput(output, m_netConfig.m_filterSizeRatio);
388 return !output.empty();
398 bool vpDetectorDNNOpenCV::detect(
const cv::Mat &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output)
403 cv::Size inputSize(m_netConfig.m_inputSize.width > 0 ? m_netConfig.m_inputSize.width : m_img.cols,
404 m_netConfig.m_inputSize.height > 0 ? m_netConfig.m_inputSize.height : m_img.rows);
405 cv::dnn::blobFromImage(m_img, m_blob, m_netConfig.m_scaleFactor, inputSize, m_netConfig.m_mean, m_netConfig.m_swapRB,
false);
407 m_net.setInput(m_blob);
409 m_net.forward(m_dnnRes, m_outNames);
411 catch (
const cv::Exception &e) {
412 std::cerr <<
"Caught an exception trying to run inference:" << std::endl <<
"\t"
414 <<
"\nCuda and/or GPU driver might not be correctly installed. Setting preferable backend to CPU and trying again." << std::endl;
415 m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
416 m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
417 m_net.forward(m_dnnRes, m_outNames);
420 DetectionCandidates proposals;
421 postProcess(proposals);
422 size_t nbClassNames = m_netConfig.m_classNames.size();
423 for (
size_t i = 0; i < m_indices.size(); ++i) {
424 int idx = m_indices[i];
425 cv::Rect box = proposals.m_boxes[idx];
426 std::string classname;
427 if (nbClassNames > 0) {
428 classname = m_netConfig.m_classNames[proposals.m_classIds[idx]];
431 classname = std::to_string(proposals.m_classIds[idx]);
433 std::optional<std::string> classname_opt = std::optional<std::string>(classname);
434 output[classname].emplace_back(box.x, box.x + box.width, box.y, box.y + box.height
435 , proposals.m_classIds[idx], proposals.m_confidences[idx]
440 if (m_applySizeFilterAfterNMS) {
441 output = filterDetectionMultiClassInput(output, m_netConfig.m_filterSizeRatio);
444 return !output.empty();
454 bool vpDetectorDNNOpenCV::detect(
const cv::Mat &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output)
456 std::map< std::string, std::vector<DetectedFeatures2D>> map_output;
457 bool returnStatus = detect(I, map_output);
458 for (
auto key_val : map_output) {
459 output.push_back(key_val);
464 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
470 std::vector<cv::String> vpDetectorDNNOpenCV::getOutputsNames()
472 static std::vector<cv::String> names;
474 std::vector<int> outLayers = m_net.getUnconnectedOutLayers();
475 std::vector<cv::String> layersNames = m_net.getLayerNames();
476 names.resize(outLayers.size());
477 for (
size_t i = 0; i < outLayers.size(); ++i)
478 names[i] = layersNames[outLayers[i] - 1];
492 void vpDetectorDNNOpenCV::postProcess(DetectionCandidates &proposals)
494 switch (m_netConfig.m_parsingMethodType) {
497 postProcess_YoloV3_V4(proposals, m_dnnRes, m_netConfig);
501 postProcess_YoloV5_V7(proposals, m_dnnRes, m_netConfig);
505 postProcess_YoloV8_V11(proposals, m_dnnRes, m_netConfig);
508 postProcess_FasterRCNN(proposals, m_dnnRes, m_netConfig);
511 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
512 void postProcess_SSD_MobileNet(DetectionCandidates & proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig & netConfig);
516 postProcess_ResNet_10(proposals, m_dnnRes, m_netConfig);
520 postProcess_ResNet_10(proposals, m_dnnRes, m_netConfig);
523 m_parsingMethod(proposals, m_dnnRes, m_netConfig);
530 cv::dnn::NMSBoxes(proposals.m_boxes, proposals.m_confidences, m_netConfig.m_confThreshold, m_netConfig.m_nmsThreshold, m_indices);
544 std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>
545 vpDetectorDNNOpenCV::filterDetectionSingleClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk)
548 double originalNumberOfObj =
static_cast<double>(detected_features.size());
549 double meanFactor = 1. / originalNumberOfObj;
552 for (DetectedFeatures2D feature : detected_features) {
553 meanArea += feature.m_bbox.getArea();
555 meanArea *= meanFactor;
558 std::vector<DetectedFeatures2D> filtered_features;
559 for (DetectedFeatures2D feature : detected_features) {
560 if (feature.m_bbox.getArea() >= minRatioOfAreaOk * meanArea && feature.m_bbox.getArea() < meanArea / minRatioOfAreaOk) {
561 filtered_features.push_back(feature);
565 return filtered_features;
578 std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>
579 vpDetectorDNNOpenCV::filterDetectionMultiClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk)
581 #ifndef DOXYGEN_SHOULD_SKIP_THIS
586 class MeanAreaComputer
589 std::map<int, std::pair<int, double>> m_map_id_pairOccurrencesAreas;
592 std::map<int, double> m_mapMeans;
599 double computeMeanArea(
const int &class_id)
601 return m_map_id_pairOccurrencesAreas[class_id].second / (double)m_map_id_pairOccurrencesAreas[class_id].first;
610 for (
const auto &classID_pair : m_map_id_pairOccurrencesAreas) {
611 m_mapMeans[classID_pair.first] = computeMeanArea(classID_pair.first);
615 double getMean(
const int &class_id)
617 if (m_map_id_pairOccurrencesAreas.find(class_id) == m_map_id_pairOccurrencesAreas.end()) {
618 throw(
vpException(
vpException::badValue,
"[MeanAreaComputer::getMean] Asking for class_id \"" + std::to_string(class_id) +
"\" that is not present in m_mapMeans. Did you call computeMeans ?"));
620 return m_mapMeans[class_id];
628 void operator()(
const DetectedFeatures2D &feature)
630 int class_id = feature.getClassId();
631 double area = feature.getBoundingBox().getArea();
632 if (m_map_id_pairOccurrencesAreas.find(class_id) == m_map_id_pairOccurrencesAreas.end()) {
633 m_map_id_pairOccurrencesAreas[class_id] = std::pair<int, double>(1, area);
636 std::pair<int, double> prev_state = m_map_id_pairOccurrencesAreas[class_id];
637 m_map_id_pairOccurrencesAreas[class_id] = std::pair<int, double>(prev_state.first + 1, prev_state.second + area);
644 MeanAreaComputer meanComputer;
645 std::for_each(detected_features.begin(), detected_features.end(), meanComputer);
646 meanComputer.computeMeans();
649 std::vector<DetectedFeatures2D> filtered_features;
650 for (DetectedFeatures2D feature : detected_features) {
651 double meanArea = meanComputer.getMean(feature.getClassId());
652 if (feature.m_bbox.getArea() >= minRatioOfAreaOk * meanArea
653 && feature.m_bbox.getArea() < meanArea / minRatioOfAreaOk) {
654 filtered_features.push_back(feature);
658 return filtered_features;
670 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>>
671 vpDetectorDNNOpenCV::filterDetectionMultiClassInput(
const std::map< std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> &detected_features,
const double minRatioOfAreaOk)
673 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> output;
674 for (
auto keyval : detected_features) {
675 output[keyval.first] = filterDetectionSingleClassInput(detected_features.at(keyval.first), minRatioOfAreaOk);
693 void vpDetectorDNNOpenCV::postProcess_YoloV3_V4(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
695 size_t nbBatches = dnnRes.size();
697 for (
size_t i = 0; i < nbBatches; i++) {
700 int num_proposal = dnnRes[i].size[0];
701 int nout = dnnRes[i].size[1];
702 if (dnnRes[i].dims > 2) {
703 num_proposal = dnnRes[i].size[1];
704 nout = dnnRes[i].size[2];
705 dnnRes[i] = dnnRes[i].reshape(0, num_proposal);
708 int n = 0, row_ind = 0;
709 float *pdata = (
float *)dnnRes[i].data;
712 for (n = 0; n < num_proposal; n++) {
713 float box_score = pdata[4];
714 if (box_score > netConfig.m_confThreshold) {
715 cv::Mat scores = dnnRes[i].row(row_ind).colRange(5, nout);
716 cv::Point classIdPoint;
717 double max_class_score;
719 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
721 max_class_score *= box_score;
724 if (max_class_score > netConfig.m_confThreshold) {
725 const int class_idx = classIdPoint.x;
726 float cx = pdata[0] * m_img.cols;
727 float cy = pdata[1] * m_img.rows;
728 float w = pdata[2] * m_img.cols;
729 float h = pdata[3] * m_img.rows;
731 int left = int(cx - 0.5 * w);
732 int top = int(cy - 0.5 * h);
734 proposals.m_confidences.push_back((
float)max_class_score);
735 proposals.m_boxes.push_back(cv::Rect(left, top, (
int)(w), (
int)(h)));
736 proposals.m_classIds.push_back(class_idx);
756 void vpDetectorDNNOpenCV::postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
760 float ratioh = (float)m_img.rows / netConfig.m_inputSize.height, ratiow = (
float)m_img.cols / netConfig.m_inputSize.width;
761 size_t nbBatches = dnnRes.size();
763 for (
size_t i = 0; i < nbBatches; i++) {
765 int num_proposal = dnnRes[i].size[0];
766 int nout = dnnRes[i].size[1];
767 if (dnnRes[i].dims > 2) {
768 num_proposal = dnnRes[i].size[1];
769 nout = dnnRes[i].size[2];
770 dnnRes[i] = dnnRes[i].reshape(0, num_proposal);
773 int n = 0, row_ind = 0;
774 float *pdata = (
float *)dnnRes[i].data;
777 for (n = 0; n < num_proposal; n++) {
778 float box_score = pdata[4];
780 if (box_score > netConfig.m_confThreshold) {
781 cv::Mat scores = dnnRes[i].row(row_ind).colRange(5, nout);
782 cv::Point classIdPoint;
783 double max_class_score;
785 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
786 max_class_score *= box_score;
789 if (max_class_score > netConfig.m_confThreshold) {
790 const int class_idx = classIdPoint.x;
791 float cx = pdata[0] * ratiow;
792 float cy = pdata[1] * ratioh;
793 float w = pdata[2] * ratiow;
794 float h = pdata[3] * ratioh;
796 int left = int(cx - 0.5 * w);
797 int top = int(cy - 0.5 * h);
799 proposals.m_confidences.push_back((
float)max_class_score);
800 proposals.m_boxes.push_back(cv::Rect(left, top, (
int)(w), (
int)(h)));
801 proposals.m_classIds.push_back(class_idx);
821 void vpDetectorDNNOpenCV::postProcess_YoloV8_V11(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
826 float ratioh = (float)m_img.rows / netConfig.m_inputSize.height, ratiow = (
float)m_img.cols / netConfig.m_inputSize.width;
827 size_t nbBatches = dnnRes.size();
829 for (
size_t i = 0; i < nbBatches; i++) {
831 int num_proposal = dnnRes[i].size[1];
832 int nout = dnnRes[i].size[0];
833 if (dnnRes[i].dims > 2) {
834 num_proposal = dnnRes[i].size[2];
835 nout = dnnRes[i].size[1];
836 dnnRes[i] = dnnRes[i].reshape(0, nout);
838 cv::transpose(dnnRes[i], dnnRes[i]);
840 int n = 0, row_ind = 0;
841 float *pdata = (
float *)dnnRes[i].data;
844 for (n = 0; n < num_proposal; n++) {
845 cv::Mat scores = dnnRes[i].row(row_ind).colRange(4, nout);
846 cv::Point classIdPoint;
847 double max_class_score;
849 cv::minMaxLoc(scores, 0, &max_class_score, 0, &classIdPoint);
852 if (max_class_score > netConfig.m_confThreshold) {
853 const int class_idx = classIdPoint.x;
854 float cx = pdata[0] * ratiow;
855 float cy = pdata[1] * ratioh;
856 float w = pdata[2] * ratiow;
857 float h = pdata[3] * ratioh;
859 int left = int(cx - 0.5 * w);
860 int top = int(cy - 0.5 * h);
862 proposals.m_confidences.push_back((
float)max_class_score);
863 proposals.m_boxes.push_back(cv::Rect(left, top, (
int)(w), (
int)(h)));
864 proposals.m_classIds.push_back(class_idx);
884 void vpDetectorDNNOpenCV::postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
892 size_t nbBatches = dnnRes.size();
893 for (
size_t j = 0; j < nbBatches; j++) {
894 float *data = (
float *)dnnRes[j].data;
895 for (
size_t i = 0; i < dnnRes[j].total(); i += 7) {
896 float confidence = data[i + 2];
897 if (confidence > netConfig.m_confThreshold) {
898 int left = (int)(data[i + 3] * m_img.cols);
899 int top = (int)(data[i + 4] * m_img.rows);
900 int right = (int)(data[i + 5] * m_img.cols);
901 int bottom = (int)(data[i + 6] * m_img.rows);
902 int classId = (int)(data[i + 1]);
904 proposals.m_confidences.push_back((
float)confidence);
905 proposals.m_boxes.push_back(cv::Rect(left, top, right - left + 1, bottom - top + 1));
906 proposals.m_classIds.push_back(classId);
913 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
926 void vpDetectorDNNOpenCV::postProcess_SSD_MobileNet(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
933 int scores_index = m_outNames[0] ==
"scores" ? 0 : 1;
934 int boxes_index = m_outNames[0] ==
"boxes" ? 0 : 1;
936 int N = dnnRes[scores_index].size[1], C = dnnRes[scores_index].size[2];
938 float *confidence = (
float *)dnnRes[scores_index].data;
939 float *bbox = (
float *)dnnRes[boxes_index].data;
942 for (
int i = 0; i < N; i++) {
943 uint32_t maxClass = 0;
944 float maxScore = -1000.0f;
946 for (
int j = 1; j < C; j++)
948 const float score = confidence[i * C + j];
950 if (score < netConfig.m_confThreshold)
953 if (score > maxScore) {
959 if (maxScore > netConfig.m_confThreshold) {
960 int left = (int)(bbox[4 * i] * m_img.cols);
961 int top = (int)(bbox[4 * i + 1] * m_img.rows);
962 int right = (int)(bbox[4 * i + 2] * m_img.cols);
963 int bottom = (int)(bbox[4 * i + 3] * m_img.rows);
964 int width = right - left + 1;
965 int height = bottom - top + 1;
967 int classId = maxClass;
968 proposals.m_confidences.push_back(maxScore);
969 proposals.m_boxes.push_back(cv::Rect(left, top, width, height));
970 proposals.m_classIds.push_back(classId);
987 void vpDetectorDNNOpenCV::postProcess_ResNet_10(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
994 CV_Assert(dnnRes.size() == 1);
995 float *data = (
float *)dnnRes[0].data;
996 for (
size_t i = 0; i < dnnRes[0].total(); i += 7) {
997 float confidence = data[i + 2];
998 if (confidence > netConfig.m_confThreshold) {
999 int left = (int)(data[i + 3] * m_img.cols);
1000 int top = (int)(data[i + 4] * m_img.rows);
1001 int right = (int)(data[i + 5] * m_img.cols);
1002 int bottom = (int)(data[i + 6] * m_img.rows);
1003 int classId = (int)(data[i + 1]) - 1;
1005 proposals.m_confidences.push_back((
float)confidence);
1006 proposals.m_boxes.push_back(cv::Rect(left, top, right - left + 1, bottom - top + 1));
1007 proposals.m_classIds.push_back(classId);
1020 void vpDetectorDNNOpenCV::postProcess_unimplemented(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig)
1047 void vpDetectorDNNOpenCV::readNet(
const std::string &model,
const std::string &config,
const std::string &framework)
1049 m_netConfig.m_modelFilename = model;
1050 m_netConfig.m_modelConfigFilename = config;
1051 m_netConfig.m_framework = framework;
1052 m_net = cv::dnn::readNet(model, config, framework);
1053 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
1054 m_outNames = getOutputsNames();
1056 m_outNames = m_net.getUnconnectedOutLayersNames();
1066 void vpDetectorDNNOpenCV::setNetConfig(
const NetConfig &config)
1068 m_netConfig = config;
1069 setDetectionFilterSizeRatio(m_netConfig.m_filterSizeRatio);
1070 setParsingMethod(m_netConfig.m_parsingMethodType);
1071 if (!m_netConfig.m_modelFilename.empty()) {
1072 readNet(m_netConfig.m_modelFilename, m_netConfig.m_modelConfigFilename, m_netConfig.m_framework);
1081 void vpDetectorDNNOpenCV::setConfidenceThreshold(
const float &confThreshold) { m_netConfig.m_confThreshold = confThreshold; }
1089 void vpDetectorDNNOpenCV::setNMSThreshold(
const float &nmsThreshold) { m_netConfig.m_nmsThreshold = nmsThreshold; }
1098 void vpDetectorDNNOpenCV::setDetectionFilterSizeRatio(
const double &sizeRatio)
1100 m_netConfig.m_filterSizeRatio = sizeRatio;
1101 if (m_netConfig.m_filterSizeRatio > std::numeric_limits<double>::epsilon()) {
1102 m_applySizeFilterAfterNMS =
true;
1105 m_applySizeFilterAfterNMS =
false;
1115 void vpDetectorDNNOpenCV::setInputSize(
const int &width,
const int &height)
1117 m_netConfig.m_inputSize.width = width;
1118 m_netConfig.m_inputSize.height = height;
1128 void vpDetectorDNNOpenCV::setMean(
const double &meanR,
const double &meanG,
const double &meanB) { m_netConfig.m_mean = cv::Scalar(meanR, meanG, meanB); }
1136 void vpDetectorDNNOpenCV::setPreferableBackend(
const int &backendId) { m_net.setPreferableBackend(backendId); }
1144 void vpDetectorDNNOpenCV::setPreferableTarget(
const int &targetId) { m_net.setPreferableTarget(targetId); }
1149 void vpDetectorDNNOpenCV::setScaleFactor(
const double &scaleFactor)
1151 m_netConfig.m_scaleFactor = scaleFactor;
1152 if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8 || m_netConfig.m_parsingMethodType == YOLO_V11) && m_netConfig.m_scaleFactor != 1 / 255.) {
1153 std::cout <<
"[vpDetectorDNNOpenCV::setParsingMethod] WARNING: scale factor should be 1/255. to normalize pixels value." << std::endl;
1162 void vpDetectorDNNOpenCV::setSwapRB(
const bool &swapRB) { m_netConfig.m_swapRB = swapRB; }
1171 void vpDetectorDNNOpenCV::setParsingMethod(
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &))
1173 m_netConfig.m_parsingMethodType = typeParsingMethod;
1174 m_parsingMethod = parsingMethod;
1175 if ((m_netConfig.m_parsingMethodType == YOLO_V7 || m_netConfig.m_parsingMethodType == YOLO_V8 || m_netConfig.m_parsingMethodType == YOLO_V11) && m_netConfig.m_scaleFactor != 1 / 255.) {
1176 m_netConfig.m_scaleFactor = 1 / 255.;
1177 std::cout <<
"[vpDetectorDNNOpenCV::setParsingMethod] NB: scale factor changed to 1/255. to normalize pixels value." << std::endl;
1180 #if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
1181 if (m_netConfig.m_parsingMethodType == SSD_MOBILENET) {
1182 std::cout <<
"[vpDetectorDNNOpenCV::setParsingMethod] WARNING: The chosen type of network is " << dnnResultsParsingTypeToString(m_netConfig.m_parsingMethodType) <<
" VISP_BUILD_DEPRECATED_FUNCTIONS is set to true." << std::endl;
1183 std::cout <<
"\tThe parsing method that worked with the networks quoted in the ViSP documentation was postProcess_ResNet_10 instead of postProcess_SSD_MobileNet." << std::endl;
1184 std::cout <<
"\tIf the SSD-MobileNet network does not seem to work, please try to recompile ViSP setting VISP_BUILD_DEPRECATED_FUNCTIONS as false." << std::endl << std::flush;
1190 #elif !defined(VISP_BUILD_SHARED_LIBS)
1192 void dummy_vpDetectorDNN() { };
error that can be emitted by ViSP classes.
@ badValue
Used to indicate that a value is not in the allowed range.
@ functionNotImplementedError
Function not implemented.
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)