16 #include <visp3/core/vpConfig.h> 18 #if defined(VISP_HAVE_TENSORRT) && defined(VISP_HAVE_OPENCV) 19 #include <opencv2/opencv_modules.hpp> 20 #if defined(HAVE_OPENCV_CUDEV) && defined(HAVE_OPENCV_CUDAWARPING) && defined(HAVE_OPENCV_CUDAARITHM) && \ 21 defined(VISP_HAVE_OPENCV_DNN) 22 #include <visp3/core/vpImageConvert.h> 23 #include <visp3/core/vpIoTools.h> 24 #include <visp3/gui/vpDisplayX.h> 27 #include <opencv2/core/cuda.hpp> 28 #include <opencv2/cudaarithm.hpp> 29 #include <opencv2/cudawarping.hpp> 30 #include <opencv2/dnn.hpp> 34 #include <cuda_runtime_api.h> 39 #include <NvOnnxParser.h> 45 void preprocessImage(cv::Mat &img,
float *gpu_input,
const nvinfer1::Dims &dims,
float meanR,
float meanG,
float meanB)
48 std::cerr <<
"Image is empty." << std::endl;
52 cv::cuda::GpuMat gpu_frame;
54 gpu_frame.upload(img);
57 auto input_width = dims.d[3];
58 auto input_height = dims.d[2];
59 auto channels = dims.d[1];
60 auto input_size = cv::Size(input_width, input_height);
63 cv::cuda::GpuMat resized;
64 cv::cuda::resize(gpu_frame, resized, input_size, 0, 0, cv::INTER_NEAREST);
67 cv::cuda::GpuMat flt_image;
68 resized.convertTo(flt_image, CV_32FC3);
69 cv::cuda::subtract(flt_image, cv::Scalar(meanR, meanG, meanB), flt_image, cv::noArray(), -1);
70 cv::cuda::divide(flt_image, cv::Scalar(127.5f, 127.5f, 127.5f), flt_image, 1, -1);
73 std::vector<cv::cuda::GpuMat> chw;
74 for (
int i = 0; i < channels; ++i)
75 chw.emplace_back(cv::cuda::GpuMat(input_size, CV_32FC1, gpu_input + i * input_width * input_height));
76 cv::cuda::split(flt_image, chw);
81 size_t getSizeByDim(
const nvinfer1::Dims &dims)
84 for (
int i = 0; i < dims.nbDims; ++i)
91 std::vector<cv::Rect> postprocessResults(std::vector<void *> buffers,
const std::vector<nvinfer1::Dims> &output_dims,
92 int batch_size,
int image_width,
int image_height,
float confThresh,
93 float nmsThresh, std::vector<int> &classIds)
96 std::vector<cv::Rect> m_boxes, m_boxesNMS;
97 std::vector<int> m_classIds;
98 std::vector<float> m_confidences;
99 std::vector<int> m_indices;
102 std::vector<std::vector<float> > cpu_outputs;
103 for (
size_t i = 0; i < output_dims.size(); i++) {
104 cpu_outputs.push_back(std::vector<float>(getSizeByDim(output_dims[i]) * batch_size));
105 cudaMemcpy(cpu_outputs[i].data(), (
float *)buffers[1 + i], cpu_outputs[i].size() *
sizeof(
float),
106 cudaMemcpyDeviceToHost);
110 int N = output_dims[0].d[1], C = output_dims[0].d[2];
112 for (
int i = 0; i < N; i++)
114 uint32_t maxClass = 0;
115 float maxScore = -1000.0f;
117 for (
int j = 1; j < C; j++)
119 const float score = cpu_outputs[0][i * C + j];
121 if (score < confThresh)
124 if (score > maxScore) {
130 if (maxScore > confThresh) {
131 int left = (int)(cpu_outputs[1][4 * i] * image_width);
132 int top = (int)(cpu_outputs[1][4 * i + 1] * image_height);
133 int right = (int)(cpu_outputs[1][4 * i + 2] * image_width);
134 int bottom = (int)(cpu_outputs[1][4 * i + 3] * image_height);
135 int width = right - left + 1;
136 int height = bottom - top + 1;
138 m_boxes.push_back(cv::Rect(left, top, width, height));
139 m_classIds.push_back(maxClass);
140 m_confidences.push_back(maxScore);
144 cv::dnn::NMSBoxes(m_boxes, m_confidences, confThresh, nmsThresh, m_indices);
145 m_boxesNMS.resize(m_indices.size());
146 for (
size_t i = 0; i < m_indices.size(); ++i) {
147 int idx = m_indices[i];
148 m_boxesNMS[i] = m_boxes[idx];
151 classIds = m_classIds;
156 class Logger :
public nvinfer1::ILogger
159 void log(Severity severity,
const char *msg) noexcept
161 if ((severity == Severity::kERROR) || (severity == Severity::kINTERNAL_ERROR) || (severity == Severity::kVERBOSE))
162 std::cout << msg << std::endl;
168 template <
class T>
void operator()(T *obj)
const 175 template <
class T>
using TRTUniquePtr = std::unique_ptr<T, TRTDestroy>;
178 bool parseOnnxModel(
const std::string &model_path, TRTUniquePtr<nvinfer1::ICudaEngine> &engine,
179 TRTUniquePtr<nvinfer1::IExecutionContext> &context)
183 char cache_prefix[FILENAME_MAX];
184 char cache_path[FILENAME_MAX];
186 sprintf(cache_prefix,
"%s", model_path.c_str());
187 sprintf(cache_path,
"%s.engine", cache_prefix);
189 std::cout <<
"attempting to open engine cache file " << cache_path << std::endl;
193 char *engineStream = NULL;
194 size_t engineSize = 0;
197 struct stat filestat;
198 stat(cache_path, &filestat);
199 engineSize = filestat.st_size;
202 engineStream = (
char *)malloc(engineSize);
205 FILE *cacheFile = NULL;
206 cacheFile = fopen(cache_path,
"rb");
209 const size_t bytesRead = fread(engineStream, 1, engineSize, cacheFile);
211 if (bytesRead != engineSize)
213 std::cerr <<
"Error reading serialized engine into memory." << std::endl;
221 TRTUniquePtr<nvinfer1::IRuntime> infer{nvinfer1::createInferRuntime(gLogger)};
222 engine.reset(infer->deserializeCudaEngine(engineStream, engineSize, NULL));
223 context.reset(engine->createExecutionContext());
232 std::cerr <<
"Could not parse ONNX model. File not found" << std::endl;
236 TRTUniquePtr<nvinfer1::IBuilder> builder{nvinfer1::createInferBuilder(gLogger)};
237 TRTUniquePtr<nvinfer1::INetworkDefinition> network{
238 builder->createNetworkV2(1U << (uint32_t)nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)};
239 TRTUniquePtr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network, gLogger)};
242 if (!parser->parseFromFile(model_path.c_str(),
static_cast<int>(nvinfer1::ILogger::Severity::kINFO))) {
243 std::cerr <<
"ERROR: could not parse the model." << std::endl;
247 TRTUniquePtr<nvinfer1::IBuilderConfig> config{builder->createBuilderConfig()};
249 config->setMaxWorkspaceSize(32 << 20);
251 if (builder->platformHasFastFp16()) {
252 config->setFlag(nvinfer1::BuilderFlag::kFP16);
255 builder->setMaxBatchSize(1);
257 engine.reset(builder->buildEngineWithConfig(*network, *config));
258 context.reset(engine->createExecutionContext());
260 TRTUniquePtr<nvinfer1::IHostMemory> serMem{engine->serialize()};
263 std::cout <<
"Failed to serialize CUDA engine." << std::endl;
267 const char *serData = (
char *)serMem->data();
268 const size_t serSize = serMem->size();
271 char *engineMemory = (
char *)malloc(serSize);
274 std::cout <<
"Failed to allocate memory to store CUDA engine." << std::endl;
278 memcpy(engineMemory, serData, serSize);
281 FILE *cacheFile = NULL;
282 cacheFile = fopen(cache_path,
"wb");
284 fwrite(engineMemory, 1, serSize, cacheFile);
292 int main(
int argc,
char **argv)
295 unsigned int opt_scale = 1;
296 std::string input =
"";
299 std::string config =
"";
300 float meanR = 127.5f, meanG = 127.5f, meanB = 127.5f;
301 float confThresh = 0.5f;
302 float nmsThresh = 0.4f;
304 for (
int i = 1; i < argc; i++) {
305 if (std::string(argv[i]) ==
"--device" && i + 1 < argc) {
306 opt_device = atoi(argv[i + 1]);
307 }
else if (std::string(argv[i]) ==
"--input" && i + 1 < argc) {
308 input = std::string(argv[i + 1]);
309 }
else if (std::string(argv[i]) ==
"--model" && i + 1 < argc) {
310 modelFile = std::string(argv[i + 1]);
311 }
else if (std::string(argv[i]) ==
"--config" && i + 1 < argc) {
312 config = std::string(argv[i + 1]);
313 }
else if (std::string(argv[i]) ==
"--input-scale" && i + 1 < argc) {
314 opt_scale = atoi(argv[i + 1]);
315 }
else if (std::string(argv[i]) ==
"--mean" && i + 3 < argc) {
316 meanR = atof(argv[i + 1]);
317 meanG = atof(argv[i + 2]);
318 meanB = atof(argv[i + 3]);
319 }
else if (std::string(argv[i]) ==
"--confThresh" && i + 1 < argc) {
320 confThresh = (float)atof(argv[i + 1]);
321 }
else if (std::string(argv[i]) ==
"--nmsThresh" && i + 1 < argc) {
322 nmsThresh = (float)atof(argv[i + 1]);
323 }
else if (std::string(argv[i]) ==
"--labels" && i + 1 < argc) {
324 labelFile = std::string(argv[i + 1]);
325 }
else if (std::string(argv[i]) ==
"--help" || std::string(argv[i]) ==
"-h") {
327 <<
" [--device <camera device number>] [--input <path to image or video>" 328 " (camera is used if input is empty)] [--model <path to net trained weights>]" 329 " [--config <path to net config file>]" 330 " [--input-scale <input scale factor>] [--mean <meanR meanG meanB>]" 331 " [--confThresh <confidence threshold>]" 332 " [--nmsThresh <NMS threshold>] [--labels <path to label file>]" 338 std::string model_path(modelFile);
341 std::vector<std::string> labels;
342 if (!labelFile.empty()) {
343 std::ifstream f_label(labelFile);
345 while (std::getline(f_label, line)) {
346 labels.push_back(line);
352 TRTUniquePtr<nvinfer1::ICudaEngine> engine{
nullptr};
353 TRTUniquePtr<nvinfer1::IExecutionContext> context{
nullptr};
354 if (!parseOnnxModel(model_path, engine, context))
356 std::cout <<
"Make sure the model file exists. To see available models, plese visit: " 357 "\n\twww.github.com/lagadic/visp-images/dnn/object_detection/" 363 std::vector<nvinfer1::Dims> input_dims;
364 std::vector<nvinfer1::Dims> output_dims;
365 std::vector<void *> buffers(engine->getNbBindings());
368 for (
int i = 0; i < engine->getNbBindings(); ++i) {
369 auto binding_size = getSizeByDim(engine->getBindingDimensions(i)) * batch_size *
sizeof(
float);
370 cudaMalloc(&buffers[i], binding_size);
372 if (engine->bindingIsInput(i)) {
373 input_dims.emplace_back(engine->getBindingDimensions(i));
375 output_dims.emplace_back(engine->getBindingDimensions(i));
379 if (input_dims.empty() || output_dims.empty()) {
380 std::cerr <<
"Expect at least one input and one output for network" << std::endl;
386 cv::VideoCapture capture;
389 capture.open(opt_device);
394 if (!capture.isOpened()) {
395 std::cout <<
"Failed to open the camera" << std::endl;
399 int cap_width = (int)capture.get(cv::CAP_PROP_FRAME_WIDTH);
400 int cap_height = (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT);
401 capture.set(cv::CAP_PROP_FRAME_WIDTH, cap_width / opt_scale);
402 capture.set(cv::CAP_PROP_FRAME_HEIGHT, cap_height / opt_scale);
411 while ((i++ < 20) && !capture.read(frame)) {
418 std::cout <<
"Image size: " << width <<
" x " << height << std::endl;
420 std::vector<cv::Rect> boxesNMS;
421 std::vector<int> classIds;
435 preprocessImage(frame, (
float *)buffers[0], input_dims[0], meanR, meanG, meanB);
438 context->enqueue(batch_size, buffers.data(), 0,
nullptr);
441 boxesNMS = postprocessResults(buffers, output_dims, batch_size, width, height, confThresh, nmsThresh, classIds);
448 for (
unsigned int i = 0; i < boxesNMS.size(); i++) {
458 for (
void *buf : buffers)
466 std::cout <<
"OpenCV is not built with CUDA." << std::endl;
475 std::cout <<
"ViSP is not built with TensorRT." << std::endl;
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
Use the X11 console to display images on unix-like OS. Thus to enable this class X11 should be instal...
static void flush(const vpImage< unsigned char > &I)
VISP_EXPORT double measureTimeMs()
static void display(const vpImage< unsigned char > &I)
static void displayRectangle(const vpImage< unsigned char > &I, const vpImagePoint &topLeft, unsigned int width, unsigned int height, const vpColor &color, bool fill=false, unsigned int thickness=1)
unsigned int getHeight() const
Defines a rectangle in the plane.
unsigned int getWidth() const