Introduction
Since ViSP 3.6.0, a 3rd party library was introduced to allow the seamless use of JSON (JavaScript Object Notation) in ViSP. The library that is used is JSON for modern C++. To install it on your system, look at JSON for modern C++ installation instructions for your system.
With the inclusion of JSON for modern C++ 3rd party library into ViSP, it is now possible to store and load a variety of ViSP datatypes in JSON format.
A case that is of particular interest is the configuration of the generic Model-Based Tracker. As this tracker is complex, providing an easy to use configuration file is essential. Loading the configuration for each camera is already possible with XML (already used in Tutorial: Markerless generic model-based tracking using a RGB-D camera). This however, does not load the full state of the tracker but rather the configuration of each camera and combining everything is left to the user.
With JSON serialization, a single file stores the full configuration of the generic tracker, containing the configuration of each camera, their associated name as well as their transformation with respect to the reference camera. Moreover, the path to the 3D model can be given in the JSON file and the model will be directly loaded.
JSON file structure
Let us first examine the structure of a JSON file used to load a vpMbGenericTracker, given below:
{
"referenceCameraName": "Color",
"trackers": {
"Color": {
"angleAppear": 65.0,
"angleDisappear": 75.00000000000001,
"camTref": {
"cols": 4,
"data": [
1.0,
0.0,
0.0,
0.0,
0.0,
1.0,
0.0,
0.0,
0.0,
0.0,
1.0,
0.0,
0.0,
0.0,
0.0,
1.0
],
"rows": 4,
"type": "vpHomogeneousMatrix"
},
"camera": {
"model": "perspectiveWithoutDistortion",
"px": 605.146728515625,
"py": 604.79150390625,
"u0": 325.53253173828125,
"v0": 244.95083618164063
},
"clipping": {
"far": 0.9,
"flags": [
"all"
],
"near": 0.1
},
"display": {
"features": true,
"projectionError": true
},
"edge": {
"maskSign": 0,
"maskSize": 5,
"minSampleStep": 4.0,
"mu": [
0.5,
0.5
],
"nMask": 180,
"ntotalSample": 0,
"pointsToTrack": 500,
"range": 7,
"sampleStep": 4.0,
"strip": 2,
"thresholdType": "old",
"threshold": 5000.0
},
"klt": {
"blockSize": 3,
"harris": 0.01,
"maskBorder": 5,
"maxFeatures": 300,
"minDistance": 5.0,
"pyramidLevels": 3,
"quality": 0.01,
"windowSize": 5
},
"lod": {
"minLineLengthThresholdGeneral": 50.0,
"minPolygonAreaThresholdGeneral": 2500.0,
"useLod": false
},
"type": [
"edge",
"klt"
],
"visibilityTest": {
"ogre": false,
"scanline": true
}
},
"Depth": {
"angleAppear": 70.0,
"angleDisappear": 80.0,
"camTref": {
"cols": 4,
"data": [
0.999972403049469,
-0.006713358219712973,
-0.003179509425535798,
-0.01465611346065998,
0.006699616555124521,
0.9999682307243347,
-0.004313052631914616,
9.024870814755559e-05,
0.003208363428711891,
0.00429163221269846,
0.9999856352806091,
-0.0004482123476918787,
0.0,
0.0,
0.0,
1.0
],
"rows": 4,
"type": "vpHomogeneousMatrix"
},
"camera": {
"model": "perspectiveWithoutDistortion",
"px": 381.7528076171875,
"py": 381.7528076171875,
"u0": 323.3261413574219,
"v0": 236.82505798339844
},
"clipping": {
"far": 2.0,
"flags": [
"all"
],
"near": 0.01
},
"dense": {
"sampling": {
"x": 1,
"y": 1
}
},
"normals": {
"featureEstimationMethod": "robust",
"pcl": {
"method": 2,
"ransacMaxIter": 200,
"ransacThreshold": 0.001
},
"sampling": {
"x": 2,
"y": 2
}
},
"type": [
"depthDense",
"depthNormal"
],
"display": {
"features": true,
"projectionError": true
},
"lod": {
"minLineLengthThresholdGeneral": 50.0,
"minPolygonAreaThresholdGeneral": 2500.0,
"useLod": false
},
"visibilityTest": {
"ogre": false,
"scanline": true
}
}
},
"vvs" :{
"lambda": 1.0,
"maxIter": 30,
"initialMu": 0.01
},
"version": "1.0"
}
Many settings are optional: if a setting is not present in the .json file, then the value already set in the tracker is kept. This means that the .json can contain a partial configuration, while the rest of the configuration is done in the code. Beware however, that previously defined cameras that are not in the .json file are removed.
As this file is fairly dense, let us examine each part separetely so that you may tweak it for your use case.
Global tracker settings
Let us first examine the global settings (located at the root of the JSON structure). Some settings are optional such as:
"model": "../models/cube/cube.cao",
"display": {
"features": true,
"projectionError": true
},
"visibilityTest": {
"ogre": false,
"scanline": true
}
If they are defined globally, they will take precedence over the values defined per tracker. Some settings such as the "vvs" options, are only defined globally
Individual camera tracker settings
Each camera has a name (the key in the "trackers" dictionary, e.g., "Color", "Depth") and is associated to a combination of tracker.
First, consider, the tracker for the key "Color":
"Color": {
"angleAppear": 65.0,
"angleDisappear": 75.00000000000001,
//! [Transformation]
"camTref": {
"cols": 4,
"data": [
1.0,
0.0,
0.0,
0.0,
0.0,
1.0,
0.0,
0.0,
0.0,
0.0,
1.0,
0.0,
0.0,
0.0,
0.0,
1.0
],
"rows": 4,
"type": "vpHomogeneousMatrix"
},
//! [Transformation]
//! [Camera]
"camera": {
"model": "perspectiveWithoutDistortion",
"px": 605.146728515625,
"py": 604.79150390625,
"u0": 325.53253173828125,
"v0": 244.95083618164063
},
//! [Camera]
"clipping": {
"far": 0.9,
"flags": [
"all"
],
"near": 0.1
},
"display": {
"features": true,
"projectionError": true
},
//! [Edge]
"edge": {
"maskSign": 0,
"maskSize": 5,
"minSampleStep": 4.0,
"mu": [
0.5,
0.5
],
"nMask": 180,
"ntotalSample": 0,
"pointsToTrack": 500,
"range": 7,
"sampleStep": 4.0,
"strip": 2,
"thresholdType": "old",
"threshold": 5000.0
},
//! [Edge]
//! [KLT]
"klt": {
"blockSize": 3,
"harris": 0.01,
"maskBorder": 5,
"maxFeatures": 300,
"minDistance": 5.0,
"pyramidLevels": 3,
"quality": 0.01,
"windowSize": 5
},
//! [KLT]
"lod": {
"minLineLengthThresholdGeneral": 50.0,
"minPolygonAreaThresholdGeneral": 2500.0,
"useLod": false
},
//! [Features]
"type": [
"edge",
"klt"
],
//! [Features]
"visibilityTest": {
"ogre": false,
"scanline": true
}
},
This JSON object contains multiple components which we'll go through one by one.
First each camera-specific tracker definition must contain its type. Here, it is defined as:
"type": [
"edge",
"klt"
],
stating that this tracker uses both edge (see the vpMe class) and KLT (see vpKltOpencv) features. other possible values are "depthDense" and "depthNormal" for depth-related features.
The next important definition is:
"camTref": {
"cols": 4,
"data": [
1.0,
0.0,
0.0,
0.0,
0.0,
1.0,
0.0,
0.0,
0.0,
0.0,
1.0,
0.0,
0.0,
0.0,
0.0,
1.0
],
"rows": 4,
"type": "vpHomogeneousMatrix"
},
Describing the transformation between this camera and the reference camera. It can also be given as a vpPoseVector JSON representation. If the current camera is the reference, then "camTref" may be omitted or set as the identity transformation.
Next, we must define the camera intrinsics (see vpCameraParameters):
"camera": {
"model": "perspectiveWithoutDistortion",
"px": 605.146728515625,
"py": 604.79150390625,
"u0": 325.53253173828125,
"v0": 244.95083618164063
},
This JSON object follows the convention defined in vpCameraParameters::from_json().
Other common settings include:
Key | Type | Description | Reference | Optional |
angleAppear | float | Angle at which a face is considered as appearing, defined in degrees. | vpMbTracker::setAngleAppear() | Yes |
angleDisappear | float | Angle at which a face is considered as disappearing, defined in degrees. | vpMbTracker::setAngleDisappear() | Yes |
clipping (optional) |
flags | List of vpPolygon3D::vpPolygon3DClippingType | The clipping flags used for this tracker. a list that can combine different values in: ["none", "all", "fov", "left", "right", "top", "down", "near", "far"] | vpPolygon3D, vpMbTracker::setClipping() | Yes |
near | float | Near plane clipping distance. | vpMbTracker::setNearClippingDistance() | Yes |
far | float | Far plane clipping distance. | vpMbTracker::setFarClippingDistance() | Yes |
lod (optional) |
useLod | boolean | Whether to use level of detail. | vpMbTracker::setLod() | Yes |
minLineLengthThresholdGeneral | float | Minimum line length to be considered as visible in LOD. | vpMbTracker::setMinLineLengthThresh() | Yes |
minPolygonAreaThresholdGeneral | float | Minimum polygon area to be considered as visible in LOD. | vpMbTracker::setMinPolygonAreaThresh() | Yes |
display (optional) |
See Global tracker settings |
visibilityTest (optional) |
See Global tracker settings |
Tracker Feature settings
Each type of tracked feature can be customized in the JSON file.
Edge
Edge feature parameters are defined by the conversion vpMe::from_json(). All parameters are optional: if not filled in the JSON file, then the value that is already set will be used.
Example configuration (all parameters):
"edge": {
"maskSign": 0,
"maskSize": 5,
"minSampleStep": 4.0,
"mu": [
0.5,
0.5
],
"nMask": 180,
"ntotalSample": 0,
"pointsToTrack": 500,
"range": 7,
"sampleStep": 4.0,
"strip": 2,
"thresholdType": "old",
"threshold": 5000.0
},
KLT
Each parameter of the KLT tracker (see vpKltOpencv) is optional: if not present it will be set to a fixed default value.
The default configuration is:
"klt": {
"blockSize": 3,
"harris": 0.01,
"maskBorder": 5,
"maxFeatures": 300,
"minDistance": 5.0,
"pyramidLevels": 3,
"quality": 0.01,
"windowSize": 5
},
Depth normals
The configuration for the depth normal features is the following
"normals": {
"featureEstimationMethod": "robust",
"pcl": {
"method": 2,
"ransacMaxIter": 200,
"ransacThreshold": 0.001
},
"sampling": {
"x": 2,
"y": 2
}
},
the "featureEstimationMethod" must be one of "robust", "robustSVD" or "pcl". The latter requires PCL to be installed following Point Cloud Library (PCL) installation instructions.
See vpMbDepthNormalTracker for more details.
Dense depth
The configuration for the dense depth features only contain the sampling step, as shown below:
"dense": {
"sampling": {
"x": 1,
"y": 1
}
},
Example: Initializing a tracker from a JSON file
Let's now look at an example of how to use JSON to track an object.
This example is similar to tutorial-mb-generic-tracker-rgbd-realsense.cpp, where an XML configuration file is used to configure camera per camera. Instead, here, we will use a JSON file to fully configure the tracker.
The full code can be found in tutorial-mb-generic-tracker-rgbd-realsense-json.cpp
To run the example:
// If no CAD model is defined in the JSON file
$ cd $VISP_WS/visp-build/tutorial/tracking/model-based/generic-rgbd
$ ./tutorial-mb-generic-tracker-rgbd-realsense-json \
--config model/cube/realsense-color-and-depth.json \
--model model/cube/cube.cao
// If CAD model is defined in JSON
$ ./tutorial-mb-generic-tracker-rgbd-realsense-json \
--config model/cube/realsense-color-and-depth.json
The main difference between tutorial-mb-generic-tracker-rgbd-realsense.cpp and tutorial-mb-generic-tracker-rgbd-realsense-json.cpp is that in the latter we specify which features to use in the JSON config file.
First, we initialize the camera (here, a vpRealsense2 camera) and query the camera intrinsics, as well as declare the vpImage we will use to store the data feed and display it:
int width = 640, height = 480;
int fps = 30;
rs2::config config;
config.enable_stream(RS2_STREAM_COLOR, width, height, RS2_FORMAT_RGBA8, fps);
config.enable_stream(RS2_STREAM_DEPTH, width, height, RS2_FORMAT_Z16, fps);
try {
}
std::cout <<
"Catch an exception: " << e.
what() << std::endl;
std::cout << "Check if the Realsense camera is connected..." << std::endl;
return EXIT_SUCCESS;
}
std::cout << "Sensor internal camera parameters for color camera: " << cam_color << std::endl;
std::cout << "Sensor internal camera parameters for depth camera: " << cam_depth << std::endl;
std::vector<vpColVector> pointcloud;
Generic class defining intrinsic camera parameters.
@ perspectiveProjWithoutDistortion
Perspective projection without distortion model.
error that can be emitted by ViSP classes.
const char * what() const
vpCameraParameters getCameraParameters(const rs2_stream &stream, vpCameraParameters::vpCameraParametersProjType type=vpCameraParameters::perspectiveProjWithDistortion, int index=-1) const
bool open(const rs2::config &cfg=rs2::config())
To load the json configuration, we simply call:
Real-time 6D object pose tracking using its CAD model.
virtual void loadConfigFile(const std::string &configFile, bool verbose=true) VP_OVERRIDE
We then query what features are used by the tracker and update its input accordingly
std::string color_key = "", depth_key = "";
std::cout << "tracker key == " << tracker_type.first << std::endl;
color_key = tracker_type.first;
mapOfImages[color_key] = &I_gray;
mapOfInitFiles[color_key] = init_file;
mapOfWidths[color_key] = width;
mapOfHeights[color_key] = height;
mapOfCameraIntrinsics[color_key] = cam_color;
}
depth_key = tracker_type.first;
mapOfImages[depth_key] = &I_depth;
mapOfWidths[depth_key] = width;
mapOfHeights[depth_key] = height;
mapOfCameraIntrinsics[depth_key] = cam_depth;
mapOfCameraTransformations[depth_key] = depth_M_color;
mapOfPointclouds[depth_key] = &pointcloud;
}
}
const bool use_depth = !depth_key.empty();
const bool use_color = !color_key.empty();
virtual std::map< std::string, int > getCameraTrackerTypes() const
To finish with the configuration, we load the 3D CAD model if none was defined in the JSON file. If none is given by the user, then an exception is thrown.
}
virtual void loadModel(const std::string &modelFile, bool verbose=false, const vpHomogeneousMatrix &T=vpHomogeneousMatrix()) VP_OVERRIDE
virtual unsigned int getNbPolygon() const VP_OVERRIDE
To ensure the best performance, we might still prefer to use the camera intrinsics and transformation between color and depth cameras provided by the Realsense SDK.
std::cout << "Updating configuration with parameters provided by RealSense SDK..." << std::endl;
if (use_color && use_depth) {
}
virtual void setCameraParameters(const vpCameraParameters &camera) VP_OVERRIDE
virtual void setCameraTransformationMatrix(const std::string &cameraName, const vpHomogeneousMatrix &cameraTransformationMatrix)
We then initialize tracking by click:
tracker.initClick(mapOfImages, mapOfInitFiles, true);
Finally, we can start tracking:
std::vector<double> times_vec;
try {
bool quit = false;
double loop_t = 0;
while (!quit) {
bool tracking_failed = false;
realsense.
acquire((
unsigned char *)I_color.bitmap, (
unsigned char *)I_depth_raw.bitmap, &pointcloud,
nullptr,
nullptr);
if (use_color) {
}
if (use_depth) {
}
if (use_color && use_depth) {
mapOfImages[color_key] = &I_gray;
mapOfPointclouds[depth_key] = &pointcloud;
mapOfWidths[depth_key] = width;
mapOfHeights[depth_key] = height;
}
else if (use_color) {
mapOfImages[color_key] = &I_gray;
}
else if (use_depth) {
mapOfPointclouds[depth_key] = &pointcloud;
}
try {
if (use_color && use_depth) {
tracker.
track(mapOfImages, mapOfPointclouds, mapOfWidths, mapOfHeights);
}
else if (use_color) {
}
else if (use_depth) {
tracker.
track(mapOfImages, mapOfPointclouds, mapOfWidths, mapOfHeights);
}
}
tracking_failed = true;
}
double proj_error = 0;
}
else {
}
if (proj_error > proj_error_threshold) {
std::cout << "Tracker needs to restart (projection error detected: " << proj_error << ")" << std::endl;
}
if (!tracking_failed) {
if (use_color && use_depth) {
tracker.
display(I_gray, I_depth, cMo, depth_M_color * cMo, cam_color, cam_depth,
vpColor::red, 3);
}
else if (use_color) {
}
else if (use_depth) {
}
{
std::stringstream ss;
}
{
std::stringstream ss;
}
}
std::stringstream ss;
times_vec.push_back(loop_t);
ss << "Loop time: " << loop_t << " ms";
if (use_color) {
quit = true;
}
}
}
if (use_depth) {
quit = true;
}
}
}
}
std::cout <<
"Caught an exception: " << e.
what() << std::endl;
}
if (!times_vec.empty()) {
<< std::endl;
}
unsigned int size() const
Return the number of elements of the 2D array.
static const vpColor none
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void display(const vpImage< unsigned char > &I)
static void displayFrame(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &cMo, const vpCameraParameters &cam, double size, const vpColor &color=vpColor::none, unsigned int thickness=1, const vpImagePoint &offset=vpImagePoint(0, 0), const std::string &frameName="", const vpColor &textColor=vpColor::black, const vpImagePoint &textOffset=vpImagePoint(15, 15))
static void flush(const vpImage< unsigned char > &I)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
const std::string & getStringMessage() const
Implementation of an homogeneous matrix and operations on such kind of matrices.
static void createDepthHistogram(const vpImage< uint16_t > &src_depth, vpImage< vpRGBa > &dest_rgba)
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
static double getMedian(const std::vector< double > &v)
static double getStdev(const std::vector< double > &v, bool useBesselCorrection=false)
static double getMean(const std::vector< double > &v)
virtual int getTrackerType() const
virtual void setDisplayFeatures(bool displayF) VP_OVERRIDE
virtual vpColVector getError() const VP_OVERRIDE
virtual unsigned int getNbFeaturesEdge() const
virtual double computeCurrentProjectionError(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &_cMo, const vpCameraParameters &_cam) VP_OVERRIDE
virtual void getPose(vpHomogeneousMatrix &cMo) const VP_OVERRIDE
virtual unsigned int getNbFeaturesKlt() const
virtual void track(const vpImage< unsigned char > &I) VP_OVERRIDE
virtual unsigned int getNbFeaturesDepthDense() const
virtual unsigned int getNbFeaturesDepthNormal() const
virtual void display(const vpImage< unsigned char > &I, const vpHomogeneousMatrix &cMo, const vpCameraParameters &cam, const vpColor &col, unsigned int thickness=1, bool displayFullModel=false) VP_OVERRIDE
virtual double getProjectionError() const
void acquire(vpImage< unsigned char > &grey, double *ts=nullptr)
VISP_EXPORT double measureTimeMs()
Next tutorial
If you want to introduce JSON usage in your code, you can continue with Tutorial: Using JSON serialization to save your data and read program arguments.