35 #include <visp3/core/vpConfig.h>
37 #if (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(VISP_HAVE_OPENCV_DNN)
38 #include <visp3/core/vpImageConvert.h>
39 #include <visp3/detection/vpDetectorDNN.h>
42 : m_blob(), m_boxes(), m_classIds(), m_confidences(), m_confidenceThreshold(0.5), m_I_color(), m_img(),
43 m_inputSize(300, 300), m_mean(127.5, 127.5, 127.5), m_net(), m_nmsThreshold(0.4f), m_outNames(), m_outs(),
44 m_scaleFactor(2.0 / 255.0), m_swapRB(true)
62 std::vector<vpRect> boundingBoxes;
63 return detect(m_I_color, boundingBoxes);
77 cv::Size inputSize(m_inputSize.width > 0 ? m_inputSize.width : m_img.cols,
78 m_inputSize.height > 0 ? m_inputSize.height : m_img.rows);
79 cv::dnn::blobFromImage(m_img, m_blob, m_scaleFactor, inputSize, m_mean, m_swapRB,
false);
81 m_net.setInput(m_blob);
82 m_net.forward(m_outs, m_outNames);
86 boundingBoxes.resize(m_boxesNMS.size());
87 for (
size_t i = 0; i < m_boxesNMS.size(); i++) {
88 cv::Rect box = m_boxesNMS[i];
89 boundingBoxes[i] =
vpRect(box.x, box.y, box.width, box.height);
95 for (
size_t i = 0; i < boundingBoxes.size(); i++) {
96 std::vector<vpImagePoint> polygon;
98 double x = boundingBoxes[i].getLeft();
99 double y = boundingBoxes[i].getTop();
100 double w = boundingBoxes[i].getWidth();
101 double h = boundingBoxes[i].getHeight();
110 std::ostringstream oss;
111 oss << m_classIds[i] <<
" ; " << m_confidences[i] <<
" ; " << m_boxes[i];
115 return !boundingBoxes.empty();
125 std::vector<vpRect> bbs;
127 bbs.reserve(m_boxesNMS.size());
128 for (
size_t i = 0; i < m_boxesNMS.size(); i++) {
129 cv::Rect box = m_boxes[i];
130 bbs.push_back(
vpRect(box.x, box.y, box.width, box.height));
133 bbs.reserve(m_boxes.size());
134 for (
size_t i = 0; i < m_boxes.size(); i++) {
135 cv::Rect box = m_boxes[i];
136 bbs.push_back(
vpRect(box.x, box.y, box.width, box.height));
151 std::vector<int> classIds;
152 for (
size_t i = 0; i < m_indices.size(); i++) {
153 int idx = m_indices[i];
154 classIds.push_back(m_classIds[idx]);
168 std::vector<float> confidences;
169 for (
size_t i = 0; i < m_indices.size(); i++) {
170 int idx = m_indices[i];
171 confidences.push_back(m_confidences[idx]);
176 return m_confidences;
179 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
180 std::vector<cv::String> vpDetectorDNN::getOutputsNames()
182 static std::vector<cv::String> names;
184 std::vector<int> outLayers = m_net.getUnconnectedOutLayers();
185 std::vector<cv::String> layersNames = m_net.getLayerNames();
186 names.resize(outLayers.size());
187 for (
size_t i = 0; i < outLayers.size(); ++i)
188 names[i] = layersNames[outLayers[i] - 1];
194 void vpDetectorDNN::postProcess()
197 static std::vector<int> outLayers = m_net.getUnconnectedOutLayers();
198 static std::string outLayerType = m_net.getLayer(outLayers[0])->type;
201 m_confidences.clear();
203 if (m_net.getLayer(0)->outputNameToIndex(
"im_info") != -1)
208 CV_Assert(m_outs.size() == 1);
209 float *data = (
float *)m_outs[0].data;
210 for (
size_t i = 0; i < m_outs[0].total(); i += 7) {
211 float confidence = data[i + 2];
212 if (confidence > m_confidenceThreshold) {
213 int left = (int)data[i + 3];
214 int top = (int)data[i + 4];
215 int right = (int)data[i + 5];
216 int bottom = (int)data[i + 6];
217 int width = right - left + 1;
218 int height = bottom - top + 1;
219 m_classIds.push_back((
int)(data[i + 1]) - 1);
220 m_boxes.push_back(cv::Rect(left, top, width, height));
221 m_confidences.push_back(confidence);
224 }
else if (outLayerType ==
"DetectionOutput") {
228 CV_Assert(m_outs.size() == 1);
229 float *data = (
float *)m_outs[0].data;
230 for (
size_t i = 0; i < m_outs[0].total(); i += 7) {
231 float confidence = data[i + 2];
232 if (confidence > m_confidenceThreshold) {
233 int left = (int)(data[i + 3] * m_img.cols);
234 int top = (int)(data[i + 4] * m_img.rows);
235 int right = (int)(data[i + 5] * m_img.cols);
236 int bottom = (int)(data[i + 6] * m_img.rows);
237 int width = right - left + 1;
238 int height = bottom - top + 1;
239 m_classIds.push_back((
int)(data[i + 1]) - 1);
240 m_boxes.push_back(cv::Rect(left, top, width, height));
241 m_confidences.push_back(confidence);
244 }
else if (outLayerType ==
"Region") {
245 for (
size_t i = 0; i < m_outs.size(); ++i) {
249 float *data = (
float *)m_outs[i].data;
250 for (
int j = 0; j < m_outs[i].rows; ++j, data += m_outs[i].cols) {
251 cv::Mat scores = m_outs[i].row(j).colRange(5, m_outs[i].cols);
252 cv::Point classIdPoint;
254 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
255 if (confidence > m_confidenceThreshold) {
256 int centerX = (int)(data[0] * m_img.cols);
257 int centerY = (int)(data[1] * m_img.rows);
258 int width = (int)(data[2] * m_img.cols);
259 int height = (int)(data[3] * m_img.rows);
260 int left = centerX - width / 2;
261 int top = centerY - height / 2;
263 m_classIds.push_back(classIdPoint.x);
264 m_confidences.push_back((
float)confidence);
265 m_boxes.push_back(cv::Rect(left, top, width, height));
269 }
else if (outLayerType ==
"Identity" || outLayerType ==
"Softmax") {
278 int scores_index = m_outNames[0] ==
"scores" ? 0 : 1;
279 int boxes_index = m_outNames[0] ==
"boxes" ? 0 : 1;
281 int N = m_outs[scores_index].size[1], C = m_outs[scores_index].size[2];
283 float *confidence = (
float *)m_outs[scores_index].data;
284 float *bbox = (
float *)m_outs[boxes_index].data;
287 for (
int i = 0; i < N; i++) {
288 uint32_t maxClass = 0;
289 float maxScore = -1000.0f;
291 for (
int j = 1; j < C; j++)
293 const float score = confidence[i * C + j];
295 if (score < m_confidenceThreshold)
298 if (score > maxScore) {
304 if (maxScore > m_confidenceThreshold) {
305 int left = (int)(bbox[4 * i] * m_img.cols);
306 int top = (int)(bbox[4 * i + 1] * m_img.rows);
307 int right = (int)(bbox[4 * i + 2] * m_img.cols);
308 int bottom = (int)(bbox[4 * i + 3] * m_img.rows);
309 int width = right - left + 1;
310 int height = bottom - top + 1;
312 m_boxes.push_back(cv::Rect(left, top, width, height));
313 m_classIds.push_back(maxClass);
314 m_confidences.push_back(maxScore);
318 CV_Error(cv::Error::StsNotImplemented,
"Unknown output layer type: " + outLayerType);
320 cv::dnn::NMSBoxes(m_boxes, m_confidences, m_confidenceThreshold, m_nmsThreshold, m_indices);
321 m_boxesNMS.resize(m_indices.size());
322 for (
size_t i = 0; i < m_indices.size(); ++i) {
323 int idx = m_indices[i];
324 m_boxesNMS[i] = m_boxes[idx];
349 m_net = cv::dnn::readNet(model, config, framework);
350 #if (VISP_HAVE_OPENCV_VERSION == 0x030403)
351 m_outNames = getOutputsNames();
353 m_outNames = m_net.getUnconnectedOutLayersNames();
372 m_inputSize.width = width;
373 m_inputSize.height = height;
421 #elif !defined(VISP_BUILD_SHARED_LIBS)
424 void dummy_vpDetectorDNN(){};
std::vector< std::string > m_message
Message attached to each object.
std::vector< std::vector< vpImagePoint > > m_polygon
For each object, defines the polygon that contains the object.
size_t m_nb_objects
Number of detected objects.
void setPreferableBackend(int backendId)
std::vector< vpRect > getDetectionBBs(bool afterNMS=true) const
std::vector< float > getDetectionConfidence(bool afterNMS=true) const
void setMean(double meanR, double meanG, double meanB)
void setPreferableTarget(int targetId)
void setInputSize(int width, int height)
void setScaleFactor(double scaleFactor)
void readNet(const std::string &model, const std::string &config="", const std::string &framework="")
void setNMSThreshold(float nmsThreshold)
void setConfidenceThreshold(float confThreshold)
void setSwapRB(bool swapRB)
std::vector< int > getDetectionClassIds(bool afterNMS=true) const
virtual bool detect(const vpImage< unsigned char > &I)
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
Class that defines a 2D point in an image. This class is useful for image processing and stores only ...
Defines a rectangle in the plane.