Introduction

An alternative and very elegant solution to pose estimation from points has been proposed in [3] [6]. This algorithm is called POSIT.

Source code

The following source code that uses ViSP is also available in pose-dementhon-visp.cpp file. It allows to compute the pose of the camera from points.

#include <visp/vpColVector.h>
#include <visp/vpHomogeneousMatrix.h>
#include <visp/vpMatrix.h>
vpHomogeneousMatrix pose_dementhon(const std::vector< vpColVector > &wX, const std::vector< vpColVector > &x)
{
  int npoints = (int)wX.size();
  vpColVector r1, r2, r3;
  vpMatrix A(npoints, 4);
  for(int i = 0; i < npoints; i++) {
    for (int j = 0; j < 4; j++) {
      A[i][j] = wX[i][j];
    }
  }
  vpMatrix Ap = A.pseudoInverse();
  vpColVector eps(npoints);
  eps = 0; // Initialize epsilon_i = 0
  vpColVector Bx(npoints);
  vpColVector By(npoints);
  double tx, ty, tz;
  vpMatrix I, J;
  vpColVector Istar(3), Jstar(3);
  // POSIT loop
  for(unsigned int iter = 0; iter < 20; iter ++) {
    for(int i = 0; i < npoints; i++) {
      Bx[i] = x[i][0] * (eps[i] + 1.);
      By[i] = x[i][1] * (eps[i] + 1.);
    }
    I = Ap * Bx; // Notice that the pseudo inverse
    J = Ap * By; // of matrix A is a constant that has been precompiled.
    for (int i = 0; i < 3; i++) {
      Istar[i] = I[i][0];
      Jstar[i] = J[i][0];
    }
    // Estimation of the rotation matrix
    double normI = sqrt( Istar.sumSquare() );
    double normJ = sqrt( Jstar.sumSquare() );
    r1 = Istar / normI;
    r2 = Jstar / normJ;
    r3 = vpColVector::crossProd(r1, r2);
    // Estimation of the translation
    tz = 1/normI;
    tx = tz * I[3][0];
    ty = tz * J[3][0];
    // Update epsilon_i
    for(int i = 0; i < npoints; i++)
      eps[i] = (r3[0] * wX[i][0] + r3[1] * wX[i][1] + r3[2] * wX[i][2]) / tz;
  }
  vpHomogeneousMatrix cTw;
  // Update translation vector
  cTw[0][3] = tx;
  cTw[1][3] = ty;
  cTw[2][3] = tz;
  cTw[3][3] = 1.;
  // update rotation matrix
  for(int i = 0; i < 3; i++) {
    cTw[0][i] = r1[i];
    cTw[1][i] = r2[i];
    cTw[2][i] = r3[i];
  }
  return cTw;
}
int main()
{
  int npoints = 4;
  std::vector< vpColVector > wX(npoints); // 3D points
  std::vector< vpColVector >  x(npoints); // Their 2D coordinates in the image plane
  for (int i = 0; i < npoints; i++) {
    wX[i].resize(4);
    x[i].resize(3);
  }
  // Ground truth pose used to generate the data
  vpHomogeneousMatrix cTw_truth(-0.1, 0.1, 0.5, vpMath::rad(5), vpMath::rad(0), vpMath::rad(45));
  // Input data: 3D coordinates of at least 4 non coplanar points
  double L = 0.2;
  wX[0][0] =   -L; wX[0][1] = -L; wX[0][2] =  0;   wX[0][3] = 1; // wX_0 ( -L, -L,  0,   1)^T
  wX[1][0] =    L; wX[1][1] = -L; wX[1][2] =  0.2; wX[1][3] = 1; // wX_1 (  L, -L,  0.2, 1)^T
  wX[2][0] =    L; wX[2][1] =  L; wX[2][2] = -0.1; wX[2][3] = 1; // wX_2 (  L,  L, -0.1, 1)^T
  wX[3][0] =   -L; wX[3][1] =  L; wX[3][2] =  0;   wX[3][3] = 1; // wX_3 ( -L,  L,  0,   1)^T
  // Input data: 2D coordinates of the points on the image plane
  for(int i = 0; i < npoints; i++) {
    vpColVector cX = cTw_truth * wX[i];     // Update cX, cY, cZ
    x[i][0] = cX[0] / cX[2]; // x = cX/cZ
    x[i][1] = cX[1] / cX[2]; // y = cY/cZ
    x[i][2] = 1.;
  }
  vpHomogeneousMatrix cTw = pose_dementhon(wX, x);
  std::cout << "cTw (ground truth):\n" << cTw_truth << std::endl;
  std::cout << "cTw (from Dementhon):\n" << cTw << std::endl;
  return 0;
}

Source code explained

First of all we include ViSP headers that are requested to manipulate vectors and matrices.

#include <visp/vpColVector.h>
#include <visp/vpHomogeneousMatrix.h>
#include <visp/vpMatrix.h>

Then we introduce the function that does the pose estimation. It takes as input parameters ${^w}{\bf X} = (X,Y,Z,1)^T$ the 3D coordinates of the points in the world frame and ${\bf x} = (x,y,1)^T$ their normalized coordinates in the image plane. It returns the estimated pose as an homogeneous matrix transformation.

vpHomogeneousMatrix pose_dementhon(const std::vector< vpColVector > &wX, const std::vector< vpColVector > &x)

The implementation of the POSIT algorithm is done next.

  int npoints = (int)wX.size();
  vpColVector r1, r2, r3;
  vpMatrix A(npoints, 4);
  for(int i = 0; i < npoints; i++) {
    for (int j = 0; j < 4; j++) {
      A[i][j] = wX[i][j];
    }
  }
  vpMatrix Ap = A.pseudoInverse();
  vpColVector eps(npoints);
  eps = 0; // Initialize epsilon_i = 0
  vpColVector Bx(npoints);
  vpColVector By(npoints);
  double tx, ty, tz;
  vpMatrix I, J;
  vpColVector Istar(3), Jstar(3);
  // POSIT loop
  for(unsigned int iter = 0; iter < 20; iter ++) {
    for(int i = 0; i < npoints; i++) {
      Bx[i] = x[i][0] * (eps[i] + 1.);
      By[i] = x[i][1] * (eps[i] + 1.);
    }
    I = Ap * Bx; // Notice that the pseudo inverse
    J = Ap * By; // of matrix A is a constant that has been precompiled.
    for (int i = 0; i < 3; i++) {
      Istar[i] = I[i][0];
      Jstar[i] = J[i][0];
    }
    // Estimation of the rotation matrix
    double normI = sqrt( Istar.sumSquare() );
    double normJ = sqrt( Jstar.sumSquare() );
    r1 = Istar / normI;
    r2 = Jstar / normJ;
    r3 = vpColVector::crossProd(r1, r2);
    // Estimation of the translation
    tz = 1/normI;
    tx = tz * I[3][0];
    ty = tz * J[3][0];
    // Update epsilon_i
    for(int i = 0; i < npoints; i++)
      eps[i] = (r3[0] * wX[i][0] + r3[1] * wX[i][1] + r3[2] * wX[i][2]) / tz;
  }

After a minimal number of iterations, all the parameters are estimated and can be used to update the value of the homogenous transformation.

  vpHomogeneousMatrix cTw;
  // Update translation vector
  cTw[0][3] = tx;
  cTw[1][3] = ty;
  cTw[2][3] = tz;
  cTw[3][3] = 1.;
  // update rotation matrix
  for(int i = 0; i < 3; i++) {
    cTw[0][i] = r1[i];
    cTw[1][i] = r2[i];
    cTw[2][i] = r3[i];
  }

Finally we define the main function in which we will initialize the input data before calling the previous function and computing the pose using Dementhon POSIT algorithm.

int main()

First in the main we create the data structures that will contain the 3D points coordinates wX in the world frame, and their coordinates in the image plane x obtained after prerspective projection. Note here that at least 4 non coplanar points are requested to estimate the pose.

  int npoints = 4;
  std::vector< vpColVector > wX(npoints); // 3D points
  std::vector< vpColVector >  x(npoints); // Their 2D coordinates in the image plane
  for (int i = 0; i < npoints; i++) {
    wX[i].resize(4);
    x[i].resize(3);
  }

For our simulation we then initialize the input data from a ground truth pose cTw_truth. For each point we set in wX[i] the 3D coordinates in the world frame (wX, wY, wZ, 1) and compute in cX their 3D coordinates (cX, cY, cZ, 1) in the camera frame. Then in x[i] we update their coordinates (x, y) in the image plane, obtained by perspective projection.

  // Ground truth pose used to generate the data
  vpHomogeneousMatrix cTw_truth(-0.1, 0.1, 0.5, vpMath::rad(5), vpMath::rad(0), vpMath::rad(45));
  // Input data: 3D coordinates of at least 4 non coplanar points
  double L = 0.2;
  wX[0][0] =   -L; wX[0][1] = -L; wX[0][2] =  0;   wX[0][3] = 1; // wX_0 ( -L, -L,  0,   1)^T
  wX[1][0] =    L; wX[1][1] = -L; wX[1][2] =  0.2; wX[1][3] = 1; // wX_1 (  L, -L,  0.2, 1)^T
  wX[2][0] =    L; wX[2][1] =  L; wX[2][2] = -0.1; wX[2][3] = 1; // wX_2 (  L,  L, -0.1, 1)^T
  wX[3][0] =   -L; wX[3][1] =  L; wX[3][2] =  0;   wX[3][3] = 1; // wX_3 ( -L,  L,  0,   1)^T
  // Input data: 2D coordinates of the points on the image plane
  for(int i = 0; i < npoints; i++) {
    vpColVector cX = cTw_truth * wX[i];     // Update cX, cY, cZ
    x[i][0] = cX[0] / cX[2]; // x = cX/cZ
    x[i][1] = cX[1] / cX[2]; // y = cY/cZ
    x[i][2] = 1.;
  }

From here we have initialized ${^w}{\bf X} = (X,Y,Z,1)^T$ and ${\bf x} = (x,y,1)^T$ . We are now ready to call the function that does the pose estimation.

vpHomogeneousMatrix cTw = pose_dementhon(wX, x);

Resulting pose estimation

If you run the previous code, it we produce the following result that shows that the estimated pose is very close to the ground truth one used to generate the input data:

cTw (ground truth):
7072945484  -0.706170438  0.03252282796  -0.1  
706170438  0.7036809008  -0.078463382  0.1  
03252282796  0.078463382  0.9963863524  0.5  
0  0  1  
cTw (from Dementhon):
6172726698  -0.7813181607  0.09228425059  -0.1070274014  
690659622  0.4249461799  -0.5851581245  0.174123354  
4179788298  0.4249391234  0.8019325685  0.5236967119  
0  0  1 

Table of Contents

Introduction

Source code

Source code explained

Resulting pose estimation