Introduction

Since ViSP 3.6.0, a 3rd party library was introduced to allow the seamless use of JSON (JavaScript Object Notation) in ViSP. The library that is used is JSON for modern C++. To install it on your system, look at JSON for modern C++ installation instructions for your system.

With the inclusion of JSON for modern C++ 3rd party library into ViSP, it is now possible to store and load a variety of ViSP datatypes in JSON format.

A case that is of particular interest is the configuration of the generic Model-Based Tracker. As this tracker is complex, providing an easy to use configuration file is essential. Loading the configuration for each camera is already possible with XML (already used in Tutorial: Markerless generic model-based tracking using a RGB-D camera). This however, does not load the full state of the tracker but rather the configuration of each camera and combining everything is left to the user.

With JSON serialization, a single file stores the full configuration of the generic tracker, containing the configuration of each camera, their associated name as well as their transformation with respect to the reference camera. Moreover, the path to the 3D model can be given in the JSON file and the model will be directly loaded.

JSON file structure

Let us first examine the structure of a JSON file used to load a vpMbGenericTracker, given below:

{
    "referenceCameraName": "Color",
    "trackers": {
        "Color": {
            "angleAppear": 65.0,
            "angleDisappear": 75.00000000000001,
            "camTref": {
                "cols": 4,
                "data": [
                    1.0,
                    0.0,
                    0.0,
                    0.0,
                    0.0,
                    1.0,
                    0.0,
                    0.0,
                    0.0,
                    0.0,
                    1.0,
                    0.0,
                    0.0,
                    0.0,
                    0.0,
                    1.0
                ],
                "rows": 4,
                "type": "vpHomogeneousMatrix"
            },
            "camera": {
                "model": "perspectiveWithoutDistortion",
                "px": 605.146728515625,
                "py": 604.79150390625,
                "u0": 325.53253173828125,
                "v0": 244.95083618164063
            },
            "clipping": {
                "far": 0.9,
                "flags": [
                    "all"
                ],
                "near": 0.1
            },
            "display": {
                "features": true,
                "projectionError": true
            },
            "edge": {
                "maskSign": 0,
                "maskSize": 5,
                "minSampleStep": 4.0,
                "mu": [
                    0.5,
                    0.5
                ],
                "nMask": 180,
                "ntotalSample": 0,
                "pointsToTrack": 500,
                "range": 7,
                "sampleStep": 4.0,
                "strip": 2,
                "thresholdType": "old",
                "threshold": 5000.0
            },
            "klt": {
                "blockSize": 3,
                "harris": 0.01,
                "maskBorder": 5,
                "maxFeatures": 300,
                "minDistance": 5.0,
                "pyramidLevels": 3,
                "quality": 0.01,
                "windowSize": 5
            },
            "lod": {
                "minLineLengthThresholdGeneral": 50.0,
                "minPolygonAreaThresholdGeneral": 2500.0,
                "useLod": false
            },
            "type": [
                "edge",
                "klt"
            ],
            "visibilityTest": {
                "ogre": false,
                "scanline": true
            }
        },
        "Depth": {
            "angleAppear": 70.0,
            "angleDisappear": 80.0,
            "camTref": {
                "cols": 4,
                "data": [
                    0.999972403049469,
                    -0.006713358219712973,
                    -0.003179509425535798,
                    -0.01465611346065998,
                    0.006699616555124521,
                    0.9999682307243347,
                    -0.004313052631914616,
                    9.024870814755559e-05,
                    0.003208363428711891,
                    0.00429163221269846,
                    0.9999856352806091,
                    -0.0004482123476918787,
                    0.0,
                    0.0,
                    0.0,
                    1.0
                ],
                "rows": 4,
                "type": "vpHomogeneousMatrix"
            },
            "camera": {
                "model": "perspectiveWithoutDistortion",
                "px": 381.7528076171875,
                "py": 381.7528076171875,
                "u0": 323.3261413574219,
                "v0": 236.82505798339844
            },
            "clipping": {
                "far": 2.0,
                "flags": [
                    "all"
                ],
                "near": 0.01
            },
            "dense": {
                "sampling": {
                    "x": 1,
                    "y": 1
                }
            },
            "normals": {
                "featureEstimationMethod": "robust",
                "pcl": {
                    "method": 2,
                    "ransacMaxIter": 200,
                    "ransacThreshold": 0.001
                },
                "sampling": {
                    "x": 2,
                    "y": 2
                }
            },
            "type": [
                "depthDense",
                "depthNormal"
            ],
            "display": {
                "features": true,
                "projectionError": true
            },
            "lod": {
                "minLineLengthThresholdGeneral": 50.0,
                "minPolygonAreaThresholdGeneral": 2500.0,
                "useLod": false
            },
            "visibilityTest": {
                "ogre": false,
                "scanline": true
            }
        }
    },
    "vvs" :{
      "lambda": 1.0,
      "maxIter": 30,
      "initialMu": 0.01
    },
    "version": "1.0"
}

Many settings are optional: if a setting is not present in the .json file, then the value already set in the tracker is kept. This means that the .json can contain a partial configuration, while the rest of the configuration is done in the code. Beware however, that previously defined cameras that are not in the .json file are removed.

As this file is fairly dense, let us examine each part separetely so that you may tweak it for your use case.

Global tracker settings

Let us first examine the global settings (located at the root of the JSON structure). Some settings are optional such as:

//...
"model": "../models/cube/cube.cao",
"display": {
    "features": true,
    "projectionError": true
},
"visibilityTest": {
    "ogre": false,
    "scanline": true
}

If they are defined globally, they will take precedence over the values defined per tracker. Some settings such as the "vvs" options, are only defined globally

Key		Type	Description	Optional
version		String	Version of the JSON parsing for the MBT tracker	No
referenceCameraName		String	The name of the reference camera, see vpMbGenericTracker::getReferenceCameraName()	No
trackers		Dictionary	The set of camera trackers. Each element of the dictionary associates a camera name to a tracker, that is parsed with vpMbGenericTracker::from_json()	No
model		String	The path to a .cao model file, describing the object to track. See vpMbGenericTracker::loadModel()	Yes
display:features		boolean	Show features when calling vpMbGenericTracker::display()	Yes
display:projectionError		boolean	Whether to display the projection error when calling vpMbGenericTracker::display()	Yes
visibilityTest:scanline		boolean	Whether to use scanline for visibility testing. see vpMbGenericTracker::setScanLineVisibilityTest()	Yes
visibilityTest:ogre		boolean	Whether to use ogre for visibility testing. OGRE must be installed, otherwise ignored. See vpMbGenericTracker::setOgreVisibilityTest()	Yes
vvs (optional)
lambda	float	Virtual visual servoing gain	vpMbGenericTracker::setLambda()	Yes
maxIter	integer > 0	Number of iterations for virtual visual servoing	vpMbGenericTracker::setMaxIter()	Yes
initialMu	float	Initial Mu for levenberg marquardt optimization	vpMbGenericTracker::setInitialMu(	Yes

Individual camera tracker settings

Each camera has a name (the key in the "trackers" dictionary, e.g., "Color", "Depth") and is associated to a combination of tracker.

First, consider, the tracker for the key "Color":

        "Color": {
            "angleAppear": 65.0,
            "angleDisappear": 75.00000000000001,
            //! [Transformation]
            "camTref": {
                "cols": 4,
                "data": [
                    1.0,
                    0.0,
                    0.0,
                    0.0,
                    0.0,
                    1.0,
                    0.0,
                    0.0,
                    0.0,
                    0.0,
                    1.0,
                    0.0,
                    0.0,
                    0.0,
                    0.0,
                    1.0
                ],
                "rows": 4,
                "type": "vpHomogeneousMatrix"
            },
            //! [Transformation]
            //! [Camera]
            "camera": {
                "model": "perspectiveWithoutDistortion",
                "px": 605.146728515625,
                "py": 604.79150390625,
                "u0": 325.53253173828125,
                "v0": 244.95083618164063
            },
            //! [Camera]
            "clipping": {
                "far": 0.9,
                "flags": [
                    "all"
                ],
                "near": 0.1
            },
            "display": {
                "features": true,
                "projectionError": true
            },
            //! [Edge]
            "edge": {
                "maskSign": 0,
                "maskSize": 5,
                "minSampleStep": 4.0,
                "mu": [
                    0.5,
                    0.5
                ],
                "nMask": 180,
                "ntotalSample": 0,
                "pointsToTrack": 500,
                "range": 7,
                "sampleStep": 4.0,
                "strip": 2,
                "thresholdType": "old",
                "threshold": 5000.0
            },
            //! [Edge]
            //! [KLT]
            "klt": {
                "blockSize": 3,
                "harris": 0.01,
                "maskBorder": 5,
                "maxFeatures": 300,
                "minDistance": 5.0,
                "pyramidLevels": 3,
                "quality": 0.01,
                "windowSize": 5
            },
            //! [KLT]
            "lod": {
                "minLineLengthThresholdGeneral": 50.0,
                "minPolygonAreaThresholdGeneral": 2500.0,
                "useLod": false
            },
            //! [Features]
            "type": [
                "edge",
                "klt"
            ],
            //! [Features]
            "visibilityTest": {
                "ogre": false,
                "scanline": true
            }
        },

This JSON object contains multiple components which we'll go through one by one.

First each camera-specific tracker definition must contain its type. Here, it is defined as:

            "type": [
                "edge",
                "klt"
            ],

stating that this tracker uses both edge (see the vpMe class) and KLT (see vpKltOpencv) features. other possible values are "depthDense" and "depthNormal" for depth-related features.

The next important definition is:

            "camTref": {
                "cols": 4,
                "data": [
                    1.0,
                    0.0,
                    0.0,
                    0.0,
                    0.0,
                    1.0,
                    0.0,
                    0.0,
                    0.0,
                    0.0,
                    1.0,
                    0.0,
                    0.0,
                    0.0,
                    0.0,
                    1.0
                ],
                "rows": 4,
                "type": "vpHomogeneousMatrix"
            },

Describing the transformation between this camera and the reference camera. It can also be given as a vpPoseVector JSON representation. If the current camera is the reference, then "camTref" may be omitted or set as the identity transformation.

Next, we must define the camera intrinsics (see vpCameraParameters):

            "camera": {
                "model": "perspectiveWithoutDistortion",
                "px": 605.146728515625,
                "py": 604.79150390625,
                "u0": 325.53253173828125,
                "v0": 244.95083618164063
            },

This JSON object follows the convention defined in vpCameraParameters::from_json().

Other common settings include:

Key	Type	Description	Reference	Optional
angleAppear	float	Angle at which a face is considered as appearing, defined in degrees.	vpMbTracker::setAngleAppear()	Yes
angleDisappear	float	Angle at which a face is considered as disappearing, defined in degrees.	vpMbTracker::setAngleDisappear()	Yes
clipping (optional)
flags	List of vpPolygon3D::vpPolygon3DClippingType	The clipping flags used for this tracker. a list that can combine different values in: ["none", "all", "fov", "left", "right", "top", "down", "near", "far"]	vpPolygon3D, vpMbTracker::setClipping()	Yes
near	float	Near plane clipping distance.	vpMbTracker::setNearClippingDistance()	Yes
far	float	Far plane clipping distance.	vpMbTracker::setFarClippingDistance()	Yes
lod (optional)
useLod	boolean	Whether to use level of detail.	vpMbTracker::setLod()	Yes
minLineLengthThresholdGeneral	float	Minimum line length to be considered as visible in LOD.	vpMbTracker::setMinLineLengthThresh()	Yes
minPolygonAreaThresholdGeneral	float	Minimum polygon area to be considered as visible in LOD.	vpMbTracker::setMinPolygonAreaThresh()	Yes
display (optional)
See Global tracker settings
visibilityTest (optional)
See Global tracker settings

Tracker Feature settings

Each type of tracked feature can be customized in the JSON file.

Edge

Edge feature parameters are defined by the conversion vpMe::from_json(). All parameters are optional: if not filled in the JSON file, then the value that is already set will be used.

Example configuration (all parameters):

            "edge": {
                "maskSign": 0,
                "maskSize": 5,
                "minSampleStep": 4.0,
                "mu": [
                    0.5,
                    0.5
                ],
                "nMask": 180,
                "ntotalSample": 0,
                "pointsToTrack": 500,
                "range": 7,
                "sampleStep": 4.0,
                "strip": 2,
                "thresholdType": "old",
                "threshold": 5000.0
            },

KLT

Each parameter of the KLT tracker (see vpKltOpencv) is optional: if not present it will be set to a fixed default value.

The default configuration is:

            "klt": {
                "blockSize": 3,
                "harris": 0.01,
                "maskBorder": 5,
                "maxFeatures": 300,
                "minDistance": 5.0,
                "pyramidLevels": 3,
                "quality": 0.01,
                "windowSize": 5
            },

Depth normals

The configuration for the depth normal features is the following

            "normals": {
                "featureEstimationMethod": "robust",
                "pcl": {
                    "method": 2,
                    "ransacMaxIter": 200,
                    "ransacThreshold": 0.001
                },
                "sampling": {
                    "x": 2,
                    "y": 2
                }
            },

the "featureEstimationMethod" must be one of "robust", "robustSVD" or "pcl". The latter requires PCL to be installed following Point Cloud Library (PCL) installation instructions.

See vpMbDepthNormalTracker for more details.

Dense depth

The configuration for the dense depth features only contain the sampling step, as shown below:

            "dense": {
                "sampling": {
                    "x": 1,
                    "y": 1
                }
            },

Example: Initializing a tracker from a JSON file

Let's now look at an example of how to use JSON to track an object.

This example is similar to tutorial-mb-generic-tracker-rgbd-realsense.cpp, where an XML configuration file is used to configure camera per camera. Instead, here, we will use a JSON file to fully configure the tracker.

The full code can be found in tutorial-mb-generic-tracker-rgbd-realsense-json.cpp

To run the example:

// If no CAD model is defined in the JSON file
$ cd $VISP_WS/visp-build/tutorial/tracking/model-based/generic-rgbd
$ ./tutorial-mb-generic-tracker-rgbd-realsense-json \
    --config model/cube/realsense-color-and-depth.json \
    --model model/cube/cube.cao
 
// If CAD model is defined in JSON
$ ./tutorial-mb-generic-tracker-rgbd-realsense-json \
    --config model/cube/realsense-color-and-depth.json

The main difference between tutorial-mb-generic-tracker-rgbd-realsense.cpp and tutorial-mb-generic-tracker-rgbd-realsense-json.cpp is that in the latter we specify which features to use in the JSON config file.

First, we initialize the camera (here, a vpRealsense2 camera) and query the camera intrinsics, as well as declare the vpImage we will use to store the data feed and display it:

  vpRealSense2 realsense;
  int width = 640, height = 480;
  int fps = 30;
  rs2::config config;
  config.enable_stream(RS2_STREAM_COLOR, width, height, RS2_FORMAT_RGBA8, fps);
  config.enable_stream(RS2_STREAM_DEPTH, width, height, RS2_FORMAT_Z16, fps);
 
  try {
    realsense.open(config);
  }
  catch (const vpException &e) {
    std::cout << "Catch an exception: " << e.what() << std::endl;
    std::cout << "Check if the Realsense camera is connected..." << std::endl;
    return EXIT_SUCCESS;
  }
 
  vpCameraParameters cam_color =
    realsense.getCameraParameters(RS2_STREAM_COLOR, vpCameraParameters::perspectiveProjWithoutDistortion);
  vpCameraParameters cam_depth =
    realsense.getCameraParameters(RS2_STREAM_DEPTH, vpCameraParameters::perspectiveProjWithoutDistortion);
 
  std::cout << "Sensor internal camera parameters for color camera: " << cam_color << std::endl;
  std::cout << "Sensor internal camera parameters for depth camera: " << cam_depth << std::endl;
 
  vpImage<vpRGBa> I_color(height, width); // acquired by the realsense
  vpImage<unsigned char> I_gray(height, width); // Fed to the tracker if we use edge or KLT features
  vpImage<unsigned char> I_depth(height, width); // Depth histogram used for display
  vpImage<uint16_t> I_depth_raw(height, width); // Raw depth acquired by the realsense
  std::vector<vpColVector> pointcloud; // fed to the tracker

To load the json configuration, we simply call:

vpMbGenericTracker tracker;

tracker.loadConfigFile(config_file);

vpMbGenericTracker

Real-time 6D object pose tracking using its CAD model.

Definition: vpMbGenericTracker.h:201

vpMbGenericTracker::loadConfigFile

virtual void loadConfigFile(const std::string &configFile, bool verbose=true) VP_OVERRIDE

Definition: vpMbGenericTracker.cpp:2868

We then query what features are used by the tracker and update its input accordingly

  std::string color_key = "", depth_key = "";
  for (const auto &tracker_type : tracker.getCameraTrackerTypes()) {
    std::cout << "tracker key == " << tracker_type.first << std::endl;
    // Initialise for color features
    if (tracker_type.second & vpMbGenericTracker::EDGE_TRACKER || tracker_type.second & vpMbGenericTracker::KLT_TRACKER) {
      color_key = tracker_type.first;
      mapOfImages[color_key] = &I_gray;
      mapOfInitFiles[color_key] = init_file;
      mapOfWidths[color_key] = width;
      mapOfHeights[color_key] = height;
      mapOfCameraIntrinsics[color_key] = cam_color;
    }
    // Initialize for depth features
    if (tracker_type.second & vpMbGenericTracker::DEPTH_DENSE_TRACKER || tracker_type.second & vpMbGenericTracker::DEPTH_NORMAL_TRACKER) {
      depth_key = tracker_type.first;
      mapOfImages[depth_key] = &I_depth;
      mapOfWidths[depth_key] = width;
      mapOfHeights[depth_key] = height;
      mapOfCameraIntrinsics[depth_key] = cam_depth;
      mapOfCameraTransformations[depth_key] = depth_M_color;
      mapOfPointclouds[depth_key] = &pointcloud;
    }
  }
  const bool use_depth = !depth_key.empty();
  const bool use_color = !color_key.empty();

To finish with the configuration, we load the 3D CAD model if none was defined in the JSON file. If none is given by the user, then an exception is thrown.

  if (tracker.getNbPolygon() == 0) { // Not already loaded by JSON
    tracker.loadModel(model);
  }

To ensure the best performance, we might still prefer to use the camera intrinsics and transformation between color and depth cameras provided by the Realsense SDK.

  std::cout << "Updating configuration with parameters provided by RealSense SDK..." << std::endl;
  tracker.setCameraParameters(mapOfCameraIntrinsics);
  if (use_color && use_depth) {
    tracker.setCameraTransformationMatrix(mapOfCameraTransformations);
  }

We then initialize tracking by click:

tracker.initClick(mapOfImages, mapOfInitFiles, true);

Finally, we can start tracking:

  std::vector<double> times_vec;
  try {
    bool quit = false;
    double loop_t = 0;
    vpHomogeneousMatrix cMo;
 
    while (!quit) {
      double t = vpTime::measureTimeMs();
      bool tracking_failed = false;
 
      // Acquire images and update tracker input data
      realsense.acquire((unsigned char *)I_color.bitmap, (unsigned char *)I_depth_raw.bitmap, &pointcloud, nullptr, nullptr);
 
      if (use_color) {
        vpImageConvert::convert(I_color, I_gray);
        vpDisplay::display(I_gray);
      }
      if (use_depth) {
        vpImageConvert::createDepthHistogram(I_depth_raw, I_depth);
        vpDisplay::display(I_depth);
      }
 
      if (use_color && use_depth) {
        mapOfImages[color_key] = &I_gray;
        mapOfPointclouds[depth_key] = &pointcloud;
        mapOfWidths[depth_key] = width;
        mapOfHeights[depth_key] = height;
      }
      else if (use_color) {
        mapOfImages[color_key] = &I_gray;
      }
      else if (use_depth) {
        mapOfPointclouds[depth_key] = &pointcloud;
      }
 
      // Run the tracker
      try {
        if (use_color && use_depth) {
          tracker.track(mapOfImages, mapOfPointclouds, mapOfWidths, mapOfHeights);
        }
        else if (use_color) {
          tracker.track(I_gray);
        }
        else if (use_depth) {
          tracker.track(mapOfImages, mapOfPointclouds, mapOfWidths, mapOfHeights);
        }
      }
      catch (const vpException &e) {
        std::cout << "Tracker exception: " << e.getStringMessage() << std::endl;
        tracking_failed = true;
      }
 
      // Get object pose
      cMo = tracker.getPose();
 
      // Check tracking errors
      double proj_error = 0;
      if (tracker.getTrackerType() & vpMbGenericTracker::EDGE_TRACKER) {
        // Check tracking errors
        proj_error = tracker.getProjectionError();
      }
      else {
        proj_error = tracker.computeCurrentProjectionError(I_gray, cMo, cam_color);
      }
 
      if (proj_error > proj_error_threshold) {
        std::cout << "Tracker needs to restart (projection error detected: " << proj_error << ")" << std::endl;
      }
 
      // Display tracking results
      if (!tracking_failed) {
        // Turn display features on
        tracker.setDisplayFeatures(true);
 
        if (use_color && use_depth) {
          tracker.display(I_gray, I_depth, cMo, depth_M_color * cMo, cam_color, cam_depth, vpColor::red, 3);
          vpDisplay::displayFrame(I_gray, cMo, cam_color, 0.05, vpColor::none, 3);
          vpDisplay::displayFrame(I_depth, depth_M_color * cMo, cam_depth, 0.05, vpColor::none, 3);
        }
        else if (use_color) {
          tracker.display(I_gray, cMo, cam_color, vpColor::red, 3);
          vpDisplay::displayFrame(I_gray, cMo, cam_color, 0.05, vpColor::none, 3);
        }
        else if (use_depth) {
          tracker.display(I_depth, cMo, cam_depth, vpColor::red, 3);
          vpDisplay::displayFrame(I_depth, cMo, cam_depth, 0.05, vpColor::none, 3);
        }
 
        { // Display total number of features
          std::stringstream ss;
          ss << "Nb features: " << tracker.getError().size();
          vpDisplay::displayText(I_gray, I_gray.getHeight() - 50, 20, ss.str(), vpColor::red);
        }
        { // Display number of feature per type
          std::stringstream ss;
          ss << "Features: edges " << tracker.getNbFeaturesEdge() << ", klt " << tracker.getNbFeaturesKlt()
            << ", depth dense " << tracker.getNbFeaturesDepthDense() << ", depth normal" << tracker.getNbFeaturesDepthNormal();
          vpDisplay::displayText(I_gray, I_gray.getHeight() - 30, 20, ss.str(), vpColor::red);
        }
      }
 
      std::stringstream ss;
      loop_t = vpTime::measureTimeMs() - t;
      times_vec.push_back(loop_t);
      ss << "Loop time: " << loop_t << " ms";
 
      vpMouseButton::vpMouseButtonType button;
      if (use_color) {
        vpDisplay::displayText(I_gray, 20, 20, ss.str(), vpColor::red);
        vpDisplay::displayText(I_gray, 35, 20, "Right click: quit", vpColor::red);
 
        vpDisplay::flush(I_gray);
 
        if (vpDisplay::getClick(I_gray, button, false)) {
          if (button == vpMouseButton::button3) {
            quit = true;
          }
        }
      }
      if (use_depth) {
        vpDisplay::displayText(I_depth, 20, 20, ss.str(), vpColor::red);
        vpDisplay::displayText(I_depth, 40, 20, "Click to quit", vpColor::red);
        vpDisplay::flush(I_depth);
 
        if (vpDisplay::getClick(I_depth, false)) {
          quit = true;
        }
      }
 
    }
  }
  catch (const vpException &e) {
    std::cout << "Caught an exception: " << e.what() << std::endl;
  }
  // Show tracking performance
  if (!times_vec.empty()) {
    std::cout << "\nProcessing time, Mean: " << vpMath::getMean(times_vec)
      << " ms ; Median: " << vpMath::getMedian(times_vec) << " ; Std: " << vpMath::getStdev(times_vec) << " ms"
      << std::endl;
  }

Next tutorial

If you want to introduce JSON usage in your code, you can continue with Tutorial: Using JSON serialization to save your data and read program arguments.

Table of Contents