Introduction
An alternative and very elegant solution to pose estimation from points has been proposed in [3] [6]. This algorithm is called POSIT.
Source code
The following source code that uses OpenCV is also available in pose-dementhon-opencv.cpp file. It allows to compute the pose of the camera from points.
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/calib3d/calib3d.hpp>
void pose_dementhon(const std::vector< cv::Point3d > &wX,
const std::vector< cv::Point2d > &x,
cv::Mat &ctw, cv::Mat &cRw)
{
int npoints = (int)wX.size();
cv::Mat r1, r2, r3;
cv::Mat A(npoints, 4, CV_64F);
for(int i = 0; i < npoints; i++) {
A.at<double>(i,0) = wX[i].x;
A.at<double>(i,1) = wX[i].y;
A.at<double>(i,2) = wX[i].z;
A.at<double>(i,3) = 1;
}
cv::Mat Ap = A.inv(cv::DECOMP_SVD);
cv::Mat eps(npoints, 1, CV_64F, cv::Scalar(0));
cv::Mat Bx(npoints, 1, CV_64F);
cv::Mat By(npoints, 1, CV_64F);
double tx, ty, tz;
cv::Mat I, J;
cv::Mat Istar(3, 1, CV_64F), Jstar(3, 1, CV_64F);
for(unsigned int iter = 0; iter < 20; iter ++) {
for(int i = 0; i < npoints; i++) {
Bx.at<double>(i,0) = x[i].x * (eps.at<double>(i,0) + 1.);
By.at<double>(i,0) = x[i].y * (eps.at<double>(i,0) + 1.);
}
I = Ap * Bx;
J = Ap * By;
for (int i = 0; i < 3; i++) {
Istar.at<double>(i,0) = I.at<double>(i,0);
Jstar.at<double>(i,0) = J.at<double>(i,0);
}
double normI=0, normJ=0;
for (int i = 0; i < 3; i++) {
normI += Istar.at<double>(i,0)*Istar.at<double>(i,0);
normJ += Jstar.at<double>(i,0)*Jstar.at<double>(i,0);
}
normI = sqrt(normI);
normJ = sqrt(normJ);
r1 = Istar / normI;
r2 = Jstar / normJ;
r3 = r1.cross(r2);
tz = 1/normI;
tx = tz * I.at<double>(3,0);
ty = tz * J.at<double>(3,0);
for(int i = 0; i < npoints; i++) {
eps.at<double>(i,0) = (r3.at<double>(0,0) * wX[i].x
+ r3.at<double>(1,0) * wX[i].y
+ r3.at<double>(2,0) * wX[i].z) / tz;
}
}
ctw.at<double>(0,0) = tx;
ctw.at<double>(1,0) = ty;
ctw.at<double>(2,0) = tz;
for (int i = 0 ; i < 3 ; i++) {
cRw.at<double>(0,i) = r1.at<double>(i, 0);
cRw.at<double>(1,i) = r2.at<double>(i, 0);
cRw.at<double>(2,i) = r3.at<double>(i, 0);
}
}
int main()
{
std::vector< cv::Point3d > wX;
std::vector< cv::Point2d > x;
cv::Mat ctw_truth = (cv::Mat_<double>(3,1) << -0.1, 0.1, 0.5);
cv::Mat crw_truth = (cv::Mat_<double>(3,1) << CV_PI/180*(5), CV_PI/180*(0), CV_PI/180*(45));
cv::Mat cRw_truth(3,3,cv::DataType<double>::type);
cv::Rodrigues(crw_truth, cRw_truth);
double L = 0.2;
wX.push_back( cv::Point3d( -L, -L, 0 ) );
wX.push_back( cv::Point3d( L, -L, 0.2) );
wX.push_back( cv::Point3d( L, L,-0.1) );
wX.push_back( cv::Point3d( -L, L, 0 ) );
for(int i = 0; i < wX.size(); i++) {
cv::Mat cX = cRw_truth*cv::Mat(wX[i]) + ctw_truth;
x.push_back( cv::Point2d( cX.at<double>(0, 0)/cX.at<double>(2, 0),
cX.at<double>(1, 0)/cX.at<double>(2, 0) ) );
}
cv::Mat ctw(3, 1, CV_64F);
cv::Mat cRw(3, 3, CV_64F);
pose_dementhon(wX, x, ctw, cRw);
std::cout << "ctw (ground truth):\n" << ctw_truth << std::endl;
std::cout << "ctw (computed with Dementhon):\n" << ctw << std::endl;
std::cout << "cRw (ground truth):\n" << cRw_truth << std::endl;
std::cout << "cRw (computed with Dementhon):\n" << cRw << std::endl;
return 0;
}
Source code explained
First of all we include OpenCV headers that are requested to manipulate vectors and matrices.
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/calib3d/calib3d.hpp>
Then we introduce the function that does the pose estimation. It takes as input parameters the 3D coordinates of the points in the world frame and their normalized coordinates in the image plane. It returns the estimated pose in ctw for the translation vector and in cRw for the rotation matrix.
void pose_dementhon(const std::vector< cv::Point3d > &wX,
const std::vector< cv::Point2d > &x,
cv::Mat &ctw, cv::Mat &cRw)
The implementation of the POSIT algorithm is done next.
int npoints = (int)wX.size();
cv::Mat r1, r2, r3;
cv::Mat A(npoints, 4, CV_64F);
for(int i = 0; i < npoints; i++) {
A.at<double>(i,0) = wX[i].x;
A.at<double>(i,1) = wX[i].y;
A.at<double>(i,2) = wX[i].z;
A.at<double>(i,3) = 1;
}
cv::Mat Ap = A.inv(cv::DECOMP_SVD);
cv::Mat eps(npoints, 1, CV_64F, cv::Scalar(0));
cv::Mat Bx(npoints, 1, CV_64F);
cv::Mat By(npoints, 1, CV_64F);
double tx, ty, tz;
cv::Mat I, J;
cv::Mat Istar(3, 1, CV_64F), Jstar(3, 1, CV_64F);
for(unsigned int iter = 0; iter < 20; iter ++) {
for(int i = 0; i < npoints; i++) {
Bx.at<double>(i,0) = x[i].x * (eps.at<double>(i,0) + 1.);
By.at<double>(i,0) = x[i].y * (eps.at<double>(i,0) + 1.);
}
I = Ap * Bx;
J = Ap * By;
for (int i = 0; i < 3; i++) {
Istar.at<double>(i,0) = I.at<double>(i,0);
Jstar.at<double>(i,0) = J.at<double>(i,0);
}
double normI=0, normJ=0;
for (int i = 0; i < 3; i++) {
normI += Istar.at<double>(i,0)*Istar.at<double>(i,0);
normJ += Jstar.at<double>(i,0)*Jstar.at<double>(i,0);
}
normI = sqrt(normI);
normJ = sqrt(normJ);
r1 = Istar / normI;
r2 = Jstar / normJ;
r3 = r1.cross(r2);
tz = 1/normI;
tx = tz * I.at<double>(3,0);
ty = tz * J.at<double>(3,0);
for(int i = 0; i < npoints; i++) {
eps.at<double>(i,0) = (r3.at<double>(0,0) * wX[i].x
+ r3.at<double>(1,0) * wX[i].y
+ r3.at<double>(2,0) * wX[i].z) / tz;
}
}
After a minimal number of iterations, all the parameters are estimated and can be used to update the value of the homogenous transformation, first in ctw for the translation, then in cRw for the rotation matrix.
ctw.at<double>(0,0) = tx;
ctw.at<double>(1,0) = ty;
ctw.at<double>(2,0) = tz;
for (int i = 0 ; i < 3 ; i++) {
cRw.at<double>(0,i) = r1.at<double>(i, 0);
cRw.at<double>(1,i) = r2.at<double>(i, 0);
cRw.at<double>(2,i) = r3.at<double>(i, 0);
}
Finally we define the main function in which we will initialize the input data before calling the previous function and computing the pose using Dementhon POSIT algorithm.
First in the main we create the data structures that will contain the 3D points coordinates wX in the world frame, and their coordinates in the image plane x obtained after prerspective projection. Note here that at least 4 non coplanar points are requested to estimate the pose.
std::vector< cv::Point3d > wX;
std::vector< cv::Point2d > x;
For our simulation we then initialize the input data from a ground truth pose with the translation in ctw_truth and the rotation matrix in cRw_truth. For each point we set in wX[i] the 3D coordinates in the world frame (wX, wY, wZ, 1) and compute in cX their 3D coordinates (cX, cY, cZ, 1) in the camera frame. Then in x[i] we update their coordinates (x, y) in the image plane, obtained by perspective projection.
cv::Mat ctw_truth = (cv::Mat_<double>(3,1) << -0.1, 0.1, 0.5);
cv::Mat crw_truth = (cv::Mat_<double>(3,1) << CV_PI/180*(5), CV_PI/180*(0), CV_PI/180*(45));
cv::Mat cRw_truth(3,3,cv::DataType<double>::type);
cv::Rodrigues(crw_truth, cRw_truth);
double L = 0.2;
wX.push_back( cv::Point3d( -L, -L, 0 ) );
wX.push_back( cv::Point3d( L, -L, 0.2) );
wX.push_back( cv::Point3d( L, L,-0.1) );
wX.push_back( cv::Point3d( -L, L, 0 ) );
for(int i = 0; i < wX.size(); i++) {
cv::Mat cX = cRw_truth*cv::Mat(wX[i]) + ctw_truth;
x.push_back( cv::Point2d( cX.at<double>(0, 0)/cX.at<double>(2, 0),
cX.at<double>(1, 0)/cX.at<double>(2, 0) ) );
}
From here we have initialized and . We are now ready to call the function that does the pose estimation.
cv::Mat ctw(3, 1, CV_64F);
cv::Mat cRw(3, 3, CV_64F);
pose_dementhon(wX, x, ctw, cRw);
Resulting pose estimation
If you run the previous code, it we produce the following result that shows that the estimated pose is very close to the ground truth one used to generate the input data:
ctw (ground truth):
[-0.1;
0.1;
0.5]
ctw (computed with DLT):
[-0.1070274014258891;
0.1741233539654255;
0.5236967119016803]
cRw (ground truth):
[0.7072945483755065, -0.7061704379962989, 0.03252282795827704;
0.7061704379962989, 0.7036809008245869, -0.07846338199958876;
0.03252282795827704, 0.07846338199958876, 0.9963863524490802]
cRw (computed with DLT):
[0.6172726698299887, -0.7813181606576134, 0.09228425059326956;
0.6906596219977137, 0.4249461799313228, -0.5851581245302429;
0.4179788297943932, 0.4249391234325819, 0.8019325685199985]