ソースを参照

add detection_ndt_pclomp. not complete.

yuchuli 1 年間 前
コミット
7dcd2c980c

+ 13 - 0
src/detection/detection_ndt_pclomp/detection_ndt_matching_gpu_multi.xml

@@ -0,0 +1,13 @@
+<xml>	
+	<node name="detection_ndt_matching_gpu_multi.xml">
+		<param name="lidarmsg" value="lidarpc_center" />
+		<param name="gpsmsg" value="ins550d_gpsimu" />
+		<param name="ndtposmsg" value="ndtpos" />
+		<param name="ndtgpsposmsg" value="ndtgpspos" />
+		<param name="AngleCorrect" value="0.0" />
+		<param name="HeightCorrect" value="0.0" />
+		<param name="SaveLastPos" value="true" />
+		<param name="Arm_LidarGPSBase_x" value="0.9" />
+		<param name="Arm_LidarGPSBase_y" value="0.0" />
+	</node>
+</xml>

+ 103 - 0
src/detection/detection_ndt_pclomp/detection_ndt_pclomp.pro

@@ -0,0 +1,103 @@
+QT -= gui
+QT += network
+CONFIG += console c++17
+CONFIG -= app_bundle
+
+QMAKE_CXXFLAGS += -std=gnu++17
+
+QMAKE_LFLAGS += -no-pie
+
+QMAKE_CXXFLAGS +=  -g
+
+# The following define makes your compiler emit warnings if you use
+# any feature of Qt which as been marked deprecated (the exact warnings
+# depend on your compiler). Please consult the documentation of the
+# deprecated API in order to know how to port your code away from it.
+DEFINES += QT_DEPRECATED_WARNINGS
+
+# You can also make your code fail to compile if you use deprecated APIs.
+# In order to do so, uncomment the following line.
+# You can also select to disable deprecated APIs only up to a certain version of Qt.
+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
+
+SOURCES += main.cpp \
+    ndt_matching.cpp \
+    ../../include/msgtype/ndtpos.pb.cc \
+    gnss_coordinate_convert.cpp \
+    ../../include/msgtype/imu.pb.cc \
+    ../../include/msgtype/gpsimu.pb.cc \
+    ../../include/msgtype/gps.pb.cc \
+    ../../include/msgtype/ndtgpspos.pb.cc \
+    pclomp/ndt_omp.cpp \
+    pclomp/voxel_grid_covariance_omp.cpp
+
+HEADERS += \
+    ndt_matching.h \
+    ../../include/msgtype/ndtpos.pb.h \
+    gnss_coordinate_convert.h \
+    ../../include/msgtype/imu.pb.h \
+    ../../include/msgtype/gpsimu.pb.h \
+    ../../include/msgtype/gps.pb.h \
+    ../../include/msgtype/ndtgpspos.pb.h \
+    pclomp/ndt_omp_impl.hpp \
+    pclomp/voxel_grid_covariance_omp_impl.hpp \
+    pclomp/pclomp/ndt_omp.h \
+    pclomp/pclomp/voxel_grid_covariance_omp.h
+
+INCLUDEPATH += /opt/ros/melodic/include
+INCLUDEPATH += /opt/ros/kinetic/include
+INCLUDEPATH += /usr/include/pcl-1.8
+INCLUDEPATH += /usr/include/pcl-1.7
+INCLUDEPATH += /usr/include/eigen3
+unix:INCLUDEPATH += /usr/include/pcl-1.10
+INCLUDEPATH += /opt/ros/noetic/include
+
+INCLUDEPATH += $$PWD/../../include/msgtype/
+
+INCLUDEPATH += $$PWD/pclomp
+
+INCLUDEPATH += /usr/local/cuda-10.0/targets/aarch64-linux/include
+INCLUDEPATH += /usr/local/cuda-10.2/targets/aarch64-linux/include
+INCLUDEPATH += /usr/local/cuda-10.2/targets/x86_64-linux/include
+INCLUDEPATH += /usr/local/cuda/targets/aarch64-linux/include
+
+DEFINES += CUDA_FOUND
+
+DEFINES += _OPENMP
+#DEFINES += USE_PCL_OPENMP
+
+unix:LIBS += -lboost_thread -lboost_system -lprotobuf
+
+unix:LIBS +=  -lpcl_common\
+        -lpcl_features\
+        -lpcl_filters\
+        -lpcl_io\
+        -lpcl_io_ply\
+        -lpcl_kdtree\
+        -lpcl_keypoints\
+        -lpcl_octree\
+        -lpcl_outofcore\
+        -lpcl_people\
+        -lpcl_recognition\
+        -lpcl_registration\
+        -lpcl_sample_consensus\
+        -lpcl_search\
+        -lpcl_segmentation\
+        -lpcl_surface\
+        -lpcl_tracking\
+        -lpcl_visualization
+
+
+INCLUDEPATH += $$PWD/../../common/ndt_gpu/
+
+
+!include(../../../include/common.pri ) {
+    error( "Couldn't find the common.pri file!" )
+}
+
+
+INCLUDEPATH += $$PWD/../../../include/
+LIBS += -L$$PWD/../../../bin/ -lxmlparam -lmodulecomm -livlog -livfault -lndt_gpu  -livexit -livbacktrace
+
+LIBS += -lmpi -lopen-pal -lgomp
+

+ 153 - 0
src/detection/detection_ndt_pclomp/gnss_coordinate_convert.cpp

@@ -0,0 +1,153 @@
+#include <gnss_coordinate_convert.h>
+
+void gps2xy(double J4, double K4, double *x, double *y)
+{
+    int L4 = (int)((K4 - 1.5) / 3) + 1;
+    double M4 = K4 - (L4 * 3);
+    double N4 = sin(J4 * 3.1415926536 / 180);
+    double O4 = cos(J4 * 3.1415926536 / 180);
+    double P4 = tan(J4 * 3.1415926536 / 180);
+    double Q4 = 111134.8611 * J4 - N4 * O4 * (32005.7799 + 133.9238 * N4 * N4 + 0.6973 * N4 * N4 * N4 * N4 + 0.0039 * N4 * N4 * N4 * N4 * N4 * N4);
+    double R4 = sqrt(0.006738525414683) * O4;
+    double S4 = sqrt(1 + R4 * R4);
+    double T4 = 6399698.901783 / S4;
+    double U4 = (T4 / S4) / S4;
+    double V4 = O4 * M4 * 3.1415926536 / 180;
+    double W4 = 0.5 + (5 - P4 * P4 + 9 * R4 * R4 + 4 * R4 * R4 * R4 * R4) * V4 * V4 / 24;
+    double X4 = V4 * V4 * V4 * V4 / 720 * (61 + (P4 * P4 - 58) * P4 * P4);
+    double Y4 = 1 + V4 * V4 * (1 - P4 * P4 + R4 * R4) / 6;
+    double Z4 = V4 * V4 * V4 * V4 * (5 - 18 * P4 * P4 * P4 * P4 * P4 * P4 + 14 * R4 * R4 - 58 * R4 * R4 * P4 * P4) / 120;
+
+    *y = Q4 + T4 * P4 * V4 * V4 * (W4 + X4);
+    *x = 500000 + T4 * V4 * (Y4 + Z4);
+}
+
+//高斯投影由经纬度(Unit:DD)反算大地坐标(含带号,Unit:Metres)
+void GaussProjCal(double longitude, double latitude, double *X, double *Y)
+{
+    int ProjNo = 0; int ZoneWide; ////带宽
+    double longitude1, latitude1, longitude0, latitude0, X0, Y0, xval, yval;
+    double a, f, e2, ee, NN, T, C, A, M, iPI;
+    iPI = 0.0174532925199433; ////3.1415926535898/180.0;
+    ZoneWide = 6; ////6度带宽
+    a = 6378245.0; f = 1.0 / 298.3; //54年北京坐标系参数
+                                    ////a=6378140.0; f=1/298.257; //80年西安坐标系参数
+    ProjNo = (int)(longitude / ZoneWide);
+    longitude0 = ProjNo * ZoneWide + ZoneWide / 2;
+    longitude0 = longitude0 * iPI;
+    latitude0 = 0;
+    longitude1 = longitude * iPI; //经度转换为弧度
+    latitude1 = latitude * iPI; //纬度转换为弧度
+    e2 = 2 * f - f * f;
+    ee = e2 * (1.0 - e2);
+    NN = a / sqrt(1.0 - e2 * sin(latitude1)*sin(latitude1));
+    T = tan(latitude1)*tan(latitude1);
+    C = ee * cos(latitude1)*cos(latitude1);
+    A = (longitude1 - longitude0)*cos(latitude1);
+    M = a * ((1 - e2 / 4 - 3 * e2*e2 / 64 - 5 * e2*e2*e2 / 256)*latitude1 - (3 * e2 / 8 + 3 * e2*e2 / 32 + 45 * e2*e2
+        *e2 / 1024)*sin(2 * latitude1)
+        + (15 * e2*e2 / 256 + 45 * e2*e2*e2 / 1024)*sin(4 * latitude1) - (35 * e2*e2*e2 / 3072)*sin(6 * latitude1));
+    xval = NN * (A + (1 - T + C)*A*A*A / 6 + (5 - 18 * T + T * T + 72 * C - 58 * ee)*A*A*A*A*A / 120);
+    yval = M + NN * tan(latitude1)*(A*A / 2 + (5 - T + 9 * C + 4 * C*C)*A*A*A*A / 24
+        + (61 - 58 * T + T * T + 600 * C - 330 * ee)*A*A*A*A*A*A / 720);
+    X0 = 1000000L * (ProjNo + 1) + 500000L;
+    Y0 = 0;
+    xval = xval + X0; yval = yval + Y0;
+    *X = xval;
+    *Y = yval;
+}
+
+
+
+//=======================zhaobo0904
+#define PI  3.14159265358979
+void ZBGaussProjCal(double lon, double lat, double *X, double *Y)
+{
+    // 1975 年国际椭球体长半轴 a, 第一离心率 e2, 第二离心率 ep2
+    double a = 6378140.0;
+    double e2 = 0.006694384999588;
+    double ep2 = e2/(1-e2);
+
+    // 原点所在经度
+    double lon_origin = 6.0*int(lon/6) + 3.0;
+    lon_origin *= PI / 180.0;
+
+    double k0 = 0.9996;
+
+    // 角度转弧度
+    double lat1 = lat * PI / 180.0;
+    double lon1 = lon * PI / 180.0;
+
+
+    // 经线在该点处的曲率半径,
+    double N = a / sqrt(1 - e2*sin(lat1)*sin(lat1));
+
+
+    // 赤道到该点的经线长度近似值 M, 使用泰勒展开逐项积分然后取前四项.
+    // 这个近似值是将 N 作为纬度 \phi 的函数展开为泰勒计数, 然后在区间 [0, lat1] 上积分得到的.
+    // 首先计算前四项的系数 a1~a4.
+    double a1 = 1 - e2/4 - (3*e2*e2)/64 - (5*e2*e2*e2)/256;
+    double a2 = (3*e2)/8 + (3*e2*e2)/32 + (45*e2*e2*e2)/1024;
+    double a3 = (15*e2*e2)/256 + (45*e2*e2*e2)/1024;
+    double a4 = (35*e2*e2*e2)/3072;
+    double M = a * (a1*lat1 - a2*sin(2*lat1) + a3*sin(4*lat1) - a4*sin(6*lat1));
+
+    // 辅助量
+    double T = tan(lat1)*tan(lat1);
+    double C = ep2*cos(lat1)*cos(lat1);
+    double A = (lon1 - lon_origin)*cos(lat1);
+
+    *X = 500000.0 + k0 * N * (A + (1 - T + C)*(A*A*A)/6 + (5 - 18*T + T*T + 72*C - 58*ep2)*(A*A*A*A*A)/120);
+    *Y = M + N * tan(lat1) * ((A*A)/2 +
+                              (5 - T + 9*C + 4*C*C)*(A*A*A*A)/24 +
+                              (61 - 58*T + T*T + 600*C - 330*ep2)*(A*A*A*A*A*A)/720);
+
+
+    *Y *= k0;
+    return;
+}
+//==========================================================
+
+
+
+
+
+//高斯投影由大地坐标(Unit:Metres)反算经纬度(Unit:DD)
+void GaussProjInvCal(double X, double Y, double *longitude, double *latitude)
+{
+    int ProjNo; int ZoneWide; ////带宽
+    double longitude1, latitude1, longitude0, latitude0, X0, Y0, xval, yval;
+    double e1, e2, f, a, ee, NN, T, C, M, D, R, u, fai, iPI;
+    iPI = 0.0174532925199433; ////3.1415926535898/180.0;
+    a = 6378245.0; f = 1.0 / 298.3; //54年北京坐标系参数
+    ////a=6378140.0; f=1/298.257; //80年西安坐标系参数
+    ZoneWide = 6; ////6度带宽
+    ProjNo = (int)(X / 1000000L); //查找带号
+    longitude0 = (ProjNo - 1) * ZoneWide + ZoneWide / 2;
+    longitude0 = longitude0 * iPI; //中央经线
+    X0 = ProjNo * 1000000L + 500000L;
+    Y0 = 0;
+    xval = X - X0; yval = Y - Y0; //带内大地坐标
+    e2 = 2 * f - f * f;
+    e1 = (1.0 - sqrt(1 - e2)) / (1.0 + sqrt(1 - e2));
+    ee = e2 / (1 - e2);
+    M = yval;
+    u = M / (a*(1 - e2 / 4 - 3 * e2*e2 / 64 - 5 * e2*e2*e2 / 256));
+    fai = u + (3 * e1 / 2 - 27 * e1*e1*e1 / 32)*sin(2 * u) + (21 * e1*e1 / 16 - 55 * e1*e1*e1*e1 / 32)*sin(
+                4 * u)
+            + (151 * e1*e1*e1 / 96)*sin(6 * u) + (1097 * e1*e1*e1*e1 / 512)*sin(8 * u);
+    C = ee * cos(fai)*cos(fai);
+    T = tan(fai)*tan(fai);
+    NN = a / sqrt(1.0 - e2 * sin(fai)*sin(fai));
+    R = a * (1 - e2) / sqrt((1 - e2 * sin(fai)*sin(fai))*(1 - e2 * sin(fai)*sin(fai))*(1 - e2 * sin
+                                                                                       (fai)*sin(fai)));
+    D = xval / NN;
+    //计算经度(Longitude) 纬度(Latitude)
+    longitude1 = longitude0 + (D - (1 + 2 * T + C)*D*D*D / 6 + (5 - 2 * C + 28 * T - 3 * C*C + 8 * ee + 24 * T*T)*D
+                               *D*D*D*D / 120) / cos(fai);
+    latitude1 = fai - (NN*tan(fai) / R)*(D*D / 2 - (5 + 3 * T + 10 * C - 4 * C*C - 9 * ee)*D*D*D*D / 24
+                                         + (61 + 90 * T + 298 * C + 45 * T*T - 256 * ee - 3 * C*C)*D*D*D*D*D*D / 720);
+    //转换为度 DD
+    *longitude = longitude1 / iPI;
+    *latitude = latitude1 / iPI;
+}

+ 27 - 0
src/detection/detection_ndt_pclomp/gnss_coordinate_convert.h

@@ -0,0 +1,27 @@
+#pragma once
+#ifndef _IV_PERCEPTION_GNSS_CONVERT_
+#define _IV_PERCEPTION_GNSS_CONVERT_
+
+#include <math.h>
+//double nmeaConvert2Lat(string lat)
+//{
+//	Console.WriteLine(lat);
+//	double nmea_d = double.Parse(lat.Substring(0, 2));
+//	double nmea_m = double.Parse(lat.Substring(2, 6));
+//	return nmea_d + nmea_m / 60;
+//}
+//
+//double nmeaConvert2Lon(string lon)
+//{
+//	Console.WriteLine(lon);
+//	double nmea_d = double.Parse(lon.Substring(0, 3));
+//	double nmea_m = double.Parse(lon.Substring(3, 7));
+//	return nmea_d + nmea_m / 60;
+//}
+
+void gps2xy(double , double , double *, double *);
+void GaussProjCal(double longitude, double latitude, double *X, double *Y);
+void ZBGaussProjCal(double longitude, double latitude, double *X, double *Y);
+void GaussProjInvCal(double X, double Y, double *longitude, double *latitude);
+
+#endif // !_IV_PERCEPTION_GNSS_CONVERT_

+ 590 - 0
src/detection/detection_ndt_pclomp/main.cpp

@@ -0,0 +1,590 @@
+#include <QCoreApplication>
+
+#include "modulecomm.h"
+
+#include <pcl/io/pcd_io.h>
+#include <pcl/point_types.h>
+
+#include <QTime>
+#include <QDir>
+#include <QFile>
+
+#include <thread>
+
+#include "ivfault.h"
+#include "ivlog.h"
+#include "ndt_matching.h"
+#include "gpsimu.pb.h"
+
+#include "ivexit.h"
+#include "ivbacktrace.h"
+#include "xmlparam.h"
+#include "ivversion.h"
+
+
+QTime gTime;    //a Time function. use calculate elapsed.
+
+int gndttime;    //ndt calculate time
+
+void * gpa,* gpb ,* gpc, * gpd;   //Modulecomm pointer
+
+iv::Ivfault *gfault = nullptr;   //fault diag
+iv::Ivlog *givlog = nullptr;   //log
+
+std::vector<iv::ndtmaptrace> gvector_trace;  //trace and map origin. use shift map
+
+iv::gpspos glastndtgpspos;    //last ndt pos(convert to gps)
+iv::gpspos gcurndtgpspos;     //cur ndt pos(conver to gps)
+iv::gpspos gcurgpsgpspos;    //cur gps pos
+bool gbGPSFix = false;   //GPS is Fix. If 300 times report rtk 6.
+int64_t gnLastGPSTime = 0; //ms
+
+std::thread * gpthread;   //State thread pointer.
+
+std::string gstr_lidarmsgname;   //lidar message name
+std::string gstr_gpsmsgname;   // gps message name
+std::string gstr_ndtposmsgname;  //ndtpos message name
+std::string gstr_ndtgpsposmsgname;   //gpspos message name
+std::string gstr_ndtmainsensormsgname;
+std::string gstr_arm_x;
+std::string gstr_arm_y;
+
+std::string gstr_yawcorrect;   //lidar yaw correct.
+std::string gstr_heightcorrect;  //gps height correct.
+double gyawcorrect = 0.0;        //lidar yaw correct.
+double gheightcorrect = 0.0;      //gps height correct.
+
+std::string gstr_savelastpos; //set save last pos. default true
+bool gbSaveLastPos = true;
+
+double garm_x = 0.0;
+double garm_y = 0.0;
+
+QFile * gpFileLastPos = 0;//Save Last Positin
+
+/**
+ * @brief readtrace read trace
+ * @param pFile
+ * @return
+ */
+std::vector<iv::ndttracepoint> readtrace(QFile * pFile)
+{
+    std::vector<iv::ndttracepoint> ntp;
+    QByteArray ba;
+    ba = pFile->readLine();
+
+    do
+    {
+        QString str;
+        str = ba;
+        QStringList strlist;
+        strlist = str.split("\t");
+        if(strlist.size() == 6)
+        {
+            iv::ndttracepoint tp;
+            QString strx;
+            int j;
+            double fv[6];
+            for(j=0;j<6;j++)
+            {
+                strx = strlist.at(j);
+                fv[j] = strx.toDouble();
+            }
+            tp.x = fv[0];
+            tp.y = fv[1];
+            tp.z = fv[2];
+            tp.pitch = fv[3];
+            tp.roll = fv[4];
+            tp.yaw = fv[5];
+            ntp.push_back(tp);
+        }
+        else
+        {
+            qDebug("len is %d, %d",ba.length(),strlist.size());
+        }
+        ba = pFile->readLine();
+    }while(ba.length()>0);
+
+    return ntp;
+}
+
+/**
+ * @brief readndtorigin read .pcd file's origin gps position.
+ * @param pFile
+ * @param pnori
+ * @return
+ */
+int readndtorigin(QFile * pFile,iv::gpspos * pnori)
+{
+    int nrtn = -1;
+    QByteArray ba;
+    ba = pFile->readLine();
+    QString str;
+    str = ba;
+    QStringList strlist;
+    strlist = str.split("\t");
+    if(strlist.size() == 6)
+    {
+        QString xstr[6];
+        int i;
+        for(i=0;i<6;i++)xstr[i] = strlist.at(i);
+        pnori->lon = xstr[0].toDouble();
+        pnori->lat = xstr[1].toDouble();
+        pnori->height = xstr[2].toDouble();
+        pnori->heading = xstr[3].toDouble();
+        pnori->pitch = xstr[4].toDouble();
+        pnori->roll = xstr[5].toDouble();
+        nrtn = 0;
+    }
+    return nrtn;
+
+}
+
+/**
+ * @brief LoadLastPos Load last position
+ * @param strlastposfilepath
+ */
+static void LoadLastPos(const char * strlastposfilepath)
+{
+    QFile * xFile = new QFile();
+    xFile->setFileName(strlastposfilepath);
+    if(xFile->open(QIODevice::ReadWrite))
+    {
+        int nrtn = readndtorigin(xFile,&glastndtgpspos);
+        if(nrtn < 0)
+        {
+            givlog->warn("load last pos fail.");
+        }
+        if(gbSaveLastPos)gpFileLastPos = xFile;
+        else
+        {
+            gpFileLastPos = 0;
+            xFile->close();
+        }
+    }
+}
+
+/**
+ * @brief LoadTrace
+ */
+static void LoadTrace()
+{
+    std::string strpath = getenv("HOME");
+    strpath = strpath + "/map/";
+
+    std::string strlastposfile = strpath;
+    strlastposfile = strlastposfile + "lastpos.txt";
+    LoadLastPos(strlastposfile.data());
+
+    QString strpathdir = strpath.data();
+    QDir dir(strpath.data());
+    QStringList xfilter;
+    xfilter<<"*.pcd";
+    foreach(QString strfilename,dir.entryList(xfilter,QDir::Files))
+    {
+        qDebug(strfilename.toLatin1().data());
+        QString stroripath;
+        QString strtracepath;
+        QString strpcdpath;
+        stroripath = strfilename;
+        stroripath = stroripath.left(stroripath.length() - 4);
+        stroripath = strpathdir +  stroripath + ".ori";
+        strtracepath = strfilename;
+        strtracepath = strtracepath.left(strtracepath.length() - 4);
+        strtracepath = strpathdir +  strtracepath + ".txt";
+        strpcdpath = strfilename;
+        strpcdpath = strpathdir + strpcdpath;
+        QFile xFileori,xFile,xFilepcd;
+
+        xFileori.setFileName(stroripath);
+        xFilepcd.setFileName(strpcdpath);
+        xFile.setFileName(strtracepath);
+        if(xFileori.exists() && xFile.exists())
+        {
+            qDebug("file name ok.");
+            if(xFile.open(QIODevice::ReadOnly) && xFileori.open(QIODevice::ReadOnly))
+            {
+                std::vector<iv::ndttracepoint> xv =  readtrace(&xFile);
+                iv::gpspos xnori;
+                int nrtn = readndtorigin(&xFileori,&xnori);
+                if((xv.size() > 0)&&(nrtn == 0))
+                {
+                    iv::ndtmaptrace nmt;
+                    nmt.mvector_trace = xv;
+                    nmt.mstrpcdpath = strpcdpath.toLatin1().data();
+                    nmt.mndtorigin = xnori;
+                    gvector_trace.push_back(nmt);
+                    qDebug("this is ok.");
+                }
+
+            }
+            xFile.close();
+            xFileori.close();
+
+        }
+        else
+        {
+            qDebug(" file not ok.");
+        }
+    }
+    return;
+}
+
+int gnothavedatatime = 0;
+/**
+ * @brief ListenPointCloud
+ * @param strdata
+ * @param nSize
+ * @param index
+ * @param dt
+ * @param strmemname
+ */
+void ListenPointCloud(const char * strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
+{
+
+    QTime xTime;
+    xTime.start();
+
+    gnothavedatatime = 0;
+    if(nSize <=16)return;
+    unsigned int * pHeadSize = (unsigned int *)strdata;
+    if(*pHeadSize > nSize)
+    {
+        std::cout<<"ListenPointCloud data is small headsize ="<<*pHeadSize<<"  data size is"<<nSize<<std::endl;
+    }
+
+
+    pcl::PointCloud<pcl::PointXYZ>::Ptr point_cloud(
+                new pcl::PointCloud<pcl::PointXYZ>());
+    int nNameSize;
+    nNameSize = *pHeadSize - 4-4-8;
+    char * strName = new char[nNameSize+1];strName[nNameSize] = 0;
+    memcpy(strName,(char *)((char *)strdata +4),nNameSize);
+    point_cloud->header.frame_id = strName;
+    memcpy(&point_cloud->header.seq,(char *)strdata+4+nNameSize,4);
+    memcpy(&point_cloud->header.stamp,(char *)strdata+4+nNameSize+4,8);
+    int nPCount = (nSize - *pHeadSize)/sizeof(pcl::PointXYZI);
+    int i;
+    pcl::PointXYZI * p;
+    p = (pcl::PointXYZI *)((char *)strdata + *pHeadSize);
+    for(i=0;i<nPCount;i++)
+    {
+        pcl::PointXYZ xp;
+        xp.x = p->y;
+        xp.y = p->x * (-1.0);
+        xp.z = p->z;
+
+        if(gyawcorrect != 0.0)
+        {
+            double yaw =  gyawcorrect;
+            double x1,y1;
+            x1 = xp.x;
+            y1 = xp.y;
+            xp.x = x1*cos(yaw) -y1*sin(yaw);
+            xp.y = x1*sin(yaw) + y1*cos(yaw);
+
+        }
+
+ //       xp.intensity = p->intensity;
+        point_cloud->push_back(xp);
+        p++;
+    }
+
+    givlog->verbose("point cloud size is %d",
+                    point_cloud->size());
+
+
+
+    point_ndt(point_cloud);
+    gndttime = xTime.elapsed();
+    givlog->verbose("ndt time is %d, gtime is %d", xTime.elapsed(), gTime.elapsed());
+ //   return;
+
+
+ //   gw->UpdatePointCloud(point_cloud);
+ //   DBSCAN_PC(point_cloud);
+
+    gfault->SetFaultState(0, 0, "ok");
+}
+
+/**
+ * @brief ListenMapUpdate
+ * @param strdata
+ * @param nSize
+ * @param index
+ * @param dt
+ * @param strmemname
+ */
+void ListenMapUpdate(const char * strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
+{
+    char * strpath = new char[nSize+1];
+    memcpy(strpath,strdata,nSize);
+    strpath[nSize] = 0;
+
+    pcl::PointCloud<pcl::PointXYZ>::Ptr mapx = boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>>(new pcl::PointCloud<pcl::PointXYZ>);;
+    if(0 == pcl::io::loadPCDFile(strpath,*mapx))
+    {
+        int size = mapx->size();
+        givlog->verbose("mapx size = %d", size);
+        ndt_match_SetMap(mapx);
+
+    }
+
+    gfault->SetFaultState(0, 0, "ok");
+    delete strpath;
+}
+
+/**
+ * @brief ListenNDTRunstate
+ * @param strdata
+ * @param nSize
+ * @param index
+ * @param dt
+ * @param strmemname
+ */
+void ListenNDTRunstate(const char * strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
+{
+    bool bState;
+    if(nSize < sizeof(bool))
+    {
+        gfault->SetFaultState(1, 0, "ListenNDTRunstate data size is small");
+        givlog->error("ListenNDTRunstate data size is small");
+        return;
+    }
+    memcpy(&bState,strdata,sizeof(bool));
+
+    SetRunState(bState);
+    gfault->SetFaultState(0, 0, "ok");
+}
+
+/**
+ * @brief pausecomm
+ */
+void pausecomm()
+{
+    iv::modulecomm::PauseComm(gpa);
+    iv::modulecomm::PauseComm(gpb);
+    iv::modulecomm::PauseComm(gpc);
+    iv::modulecomm::PauseComm(gpd);
+}
+
+/**
+ * @brief continuecomm
+ */
+void continuecomm()
+{
+    iv::modulecomm::ContintuComm(gpa);
+    iv::modulecomm::ContintuComm(gpb);
+    iv::modulecomm::ContintuComm(gpc);
+    iv::modulecomm::ContintuComm(gpd);
+}
+
+/**
+ * @brief ListenRaw
+ * @param strdata
+ * @param nSize
+ * @param index
+ * @param dt
+ * @param strmemname
+ */
+void ListenRaw(const char * strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
+{
+    static unsigned int nFixCount = 0;
+    iv::gps::gpsimu xgpsimu;
+    if(!xgpsimu.ParseFromArray(strdata,nSize))
+    {
+        std::cout<<"ListenRaw Parse error."<<std::endl;
+    }
+
+    gcurgpsgpspos.lat = xgpsimu.lat();
+    gcurgpsgpspos.lon = xgpsimu.lon();
+    gcurgpsgpspos.height = xgpsimu.height() + gheightcorrect;
+    gcurgpsgpspos.heading = xgpsimu.heading();
+    gcurgpsgpspos.pitch = xgpsimu.pitch();
+    gcurgpsgpspos.roll = xgpsimu.roll();
+    gcurgpsgpspos.ve = xgpsimu.ve();
+    gcurgpsgpspos.vn = xgpsimu.vn();
+
+    givlog->verbose("gps lat:%11.7f lon:%11.7f heading:%11.7f height:%6.3f rtk:%d",
+                    xgpsimu.lat(),xgpsimu.lon(),xgpsimu.heading(),
+                    xgpsimu.height(),xgpsimu.rtk_state());
+
+    if(xgpsimu.has_acce_x())
+    {
+        setuseimu(true);
+        imu_update(xgpsimu.time(),
+                   xgpsimu.roll(),xgpsimu.pitch(),
+                   xgpsimu.heading(),xgpsimu.acce_x(),
+                   xgpsimu.acce_y(),xgpsimu.acce_z());
+    }
+
+
+    if(xgpsimu.rtk_state() == 6)
+    {
+        nFixCount++;
+    }
+    else
+    {
+        nFixCount = 0;
+    }
+
+    if(nFixCount < 300)gbGPSFix = false;
+    else gbGPSFix = true;
+
+    gnLastGPSTime = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
+
+}
+
+
+bool gbstate = true;
+
+/**
+ * @brief statethread Monitor ndt state thread.
+ */
+void statethread()
+{
+    int nstate = 0;
+    int nlaststate = 0;
+    while(gbstate)
+    {
+        std::this_thread::sleep_for(std::chrono::milliseconds(10));
+        if(gnothavedatatime < 100000) gnothavedatatime++;
+
+        if(gnothavedatatime < 100)
+        {
+            nstate = 0;
+        }
+        if(gnothavedatatime > 1000)
+        {
+            nstate = 1;
+        }
+        if(gnothavedatatime > 6000)
+        {
+            nstate = 2;
+        }
+        if(nstate != nlaststate)
+        {
+            nlaststate = nstate;
+            switch (nstate) {
+            case 0:
+                givlog->info("ndt matching  is ok");
+                gfault->SetFaultState(0,0,"data is ok.");
+                break;
+            case 1:
+                givlog->warn("more than 10 seconds not have lidar pointcloud.");
+                gfault->SetFaultState(1,1,"more than 10 seconds not have lidar pointcloud.");
+                break;
+            case 2:
+                givlog->error("more than 60 seconds not have lidar pointcloud.");
+                gfault->SetFaultState(2,2,"more than 60 seconds not have lidar pointcloud.");
+                break;
+            default:
+                break;
+            }
+        }
+    }
+}
+
+/**
+ * @brief exitfunc when receive exit command.
+ */
+void exitfunc()
+{
+    std::cout<<"enter exitfunc."<<std::endl;
+    gbstate = false;
+    gpthread->join();
+    iv::modulecomm::Unregister(gpa);
+    iv::modulecomm::Unregister(gpb);
+    iv::modulecomm::Unregister(gpc);
+    iv::modulecomm::Unregister(gpd);
+    if(gpFileLastPos != 0)gpFileLastPos->close();
+    std::cout<<"Complete exitfunc."<<std::endl;
+}
+
+int main(int argc, char *argv[])
+{
+    showversion("detection_ndt_matching_gpu_multi");
+    QCoreApplication a(argc, argv);
+
+    QString strpath = QCoreApplication::applicationDirPath();
+
+   if(argc < 2)
+       strpath = strpath + "/detection_ndt_matching_gpu_multi.xml";
+   else
+       strpath = argv[1];
+   std::cout<<strpath.toStdString()<<std::endl;
+
+   iv::xmlparam::Xmlparam xparam(strpath.toStdString());
+
+   gstr_lidarmsgname = xparam.GetParam("lidarmsg","lidar_pc");
+   gstr_gpsmsgname = xparam.GetParam("gpsmsg","hcp2_gpsimu");
+   gstr_ndtposmsgname = xparam.GetParam("ndtposmsg","ndtpos");
+   gstr_ndtgpsposmsgname = xparam.GetParam("ndtgpsposmsg","ndtgpspos");
+   gstr_ndtmainsensormsgname = xparam.GetParam("mainsensor","gps");
+
+   gstr_yawcorrect = xparam.GetParam("AngleCorrect","0.0");
+   gstr_heightcorrect = xparam.GetParam("HeightCorrect","0.0");
+   gstr_savelastpos = xparam.GetParam("SaveLastPos","true");
+
+   gstr_arm_x = xparam.GetParam("Arm_LidarGPSBase_x","0.0");
+   gstr_arm_y = xparam.GetParam("Arm_LidarGPSBase_y","0.0");
+
+   gyawcorrect = atof(gstr_yawcorrect.data()) * M_PI/180.0;
+   gheightcorrect = atof(gstr_heightcorrect.data());
+
+   if(gstr_savelastpos == "true")gbSaveLastPos = true;
+   else gbSaveLastPos = false;
+
+   garm_x = atof(gstr_arm_x.data());
+   garm_y = atof(gstr_arm_y.data());
+
+
+   std::cout<<"lidar message is "<<gstr_lidarmsgname<<std::endl;
+   std::cout<<"gps message is "<<gstr_gpsmsgname<<std::endl;
+   std::cout<<"ndtpos message is "<<gstr_ndtposmsgname<<std::endl;
+   std::cout<<"ndtgpspos message is "<<gstr_ndtgpsposmsgname<<std::endl;
+   std::cout<<"AngleCorrect  is "<<gstr_yawcorrect<<std::endl;
+   std::cout<<"HeightCorrect is "<<gstr_heightcorrect<<std::endl;
+   std::cout<<"SaveLastPos is "<<gstr_savelastpos<<std::endl;
+   std::cout<<"Arm_LidarGPSBase_x is "<<gstr_arm_x<<std::endl;
+   std::cout<<"Arm_LidarGPSBase_x is "<<gstr_arm_y<<std::endl;
+
+
+    gfault = new iv::Ivfault("detection_ndt_matching_gpu");
+    givlog = new iv::Ivlog("detection_ndt_matching_gpu");
+
+    gfault->SetFaultState(0,0,"Initialize.");
+
+    glastndtgpspos.lat = 39;
+    glastndtgpspos.lon = 119;
+    glastndtgpspos.heading = 0;
+    glastndtgpspos.height = 0;
+    glastndtgpspos.pitch = 0;
+    glastndtgpspos.roll = 0;
+    gcurndtgpspos = glastndtgpspos;
+
+    iv::gpspos xpos = glastndtgpspos;
+    xpos.lat = 39.1;
+    xpos.heading = 90;
+    pose xp = CalcPose(glastndtgpspos,xpos);
+    iv::gpspos xx = PoseToGPS(glastndtgpspos,xp);
+    LoadTrace();
+    std::cout<<"trace size  is "<<gvector_trace.size()<<std::endl;
+
+    gpa = iv::modulecomm::RegisterRecv(gstr_lidarmsgname.data(),ListenPointCloud);
+    gpb = iv::modulecomm::RegisterRecv(gstr_gpsmsgname.data(),ListenRaw);
+    gpc = iv::modulecomm::RegisterSend(gstr_ndtposmsgname.data(),10000,1);
+    gpd = iv::modulecomm::RegisterSend(gstr_ndtgpsposmsgname.data(),10000,1);
+    ndt_match_Init_nomap();
+
+    iv::ivexit::RegIVExitCall(exitfunc);
+    RegisterIVBackTrace();
+
+    gpthread = new std::thread(statethread);
+//    gpb = iv::modulecomm::RegisterRecv("ndt_mappath",ListenMapUpdate);
+ //   gpc = iv::modulecomm::RegisterRecv("ndt_runstate",ListenNDTRunstate);
+
+
+
+    return a.exec();
+}

+ 1882 - 0
src/detection/detection_ndt_pclomp/ndt_matching.cpp

@@ -0,0 +1,1882 @@
+/*
+ * Copyright 2015-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ Localization program using Normal Distributions Transform
+
+ Yuki KITSUKAWA
+ */
+
+#include <pthread.h>
+#include <chrono>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include <thread>
+
+#include <boost/filesystem.hpp>
+
+
+#include <tf/tf.h>
+#include <tf/transform_broadcaster.h>
+#include <tf/transform_datatypes.h>
+#include <tf/transform_listener.h>
+
+#include <pcl/io/io.h>
+#include <pcl/io/pcd_io.h>
+#include <pcl/point_types.h>
+#include <pcl_conversions/pcl_conversions.h>
+
+//#include <ndt_cpu/NormalDistributionsTransform.h>
+#include <pcl/registration/ndt.h>
+
+
+#include <pcl/io/pcd_io.h>
+#include <pcl/point_types.h>
+#include <pcl/point_cloud.h>
+#include <pcl/registration/ndt.h>
+#include <pcl/registration/gicp.h>
+#include <pcl/filters/voxel_grid.h>
+//#include <pcl/visualization/pcl_visualizer.h>
+
+#include <pclomp/ndt_omp.h>
+
+#include <QFile>
+
+#include <ndt_gpu/NormalDistributionsTransform.h>
+#include "ndtpos.pb.h"
+#include "ndtgpspos.pb.h"
+
+#ifdef USE_PCL_OPENMP
+#include <pcl_omp_registration/ndt.h>
+#endif
+
+#include <pcl_ros/point_cloud.h>
+#include <pcl_ros/transforms.h>
+
+
+
+#include "ndt_matching.h"
+
+#include "modulecomm.h"
+
+
+
+#include "gnss_coordinate_convert.h"
+
+//the following are UBUNTU/LINUX ONLY terminal color codes.
+#define RESET   "\033[0m"
+#define BLACK   "\033[30m"      /* Black */
+#define RED     "\033[31m"      /* Red */
+#define GREEN   "\033[32m"      /* Green */
+#define YELLOW  "\033[33m"      /* Yellow */
+#define BLUE    "\033[34m"      /* Blue */
+#define MAGENTA "\033[35m"      /* Magenta */
+#define CYAN    "\033[36m"      /* Cyan */
+#define WHITE   "\033[37m"      /* White */
+#define BOLDBLACK   "\033[1m\033[30m"      /* Bold Black */
+#define BOLDRED     "\033[1m\033[31m"      /* Bold Red */
+#define BOLDGREEN   "\033[1m\033[32m"      /* Bold Green */
+#define BOLDYELLOW  "\033[1m\033[33m"      /* Bold Yellow */
+#define BOLDBLUE    "\033[1m\033[34m"      /* Bold Blue */
+#define BOLDMAGENTA "\033[1m\033[35m"      /* Bold Magenta */
+#define BOLDCYAN    "\033[1m\033[36m"      /* Bold Cyan */
+#define BOLDWHITE   "\033[1m\033[37m"      /* Bold White */
+
+
+#define PREDICT_POSE_THRESHOLD 0.5
+
+#define Wa 0.4
+#define Wb 0.3
+#define Wc 0.3
+
+#include "ivfault.h"
+#include "ivlog.h"
+
+extern iv::Ivfault *gfault ;
+extern iv::Ivlog *givlog;
+
+static int gindex = 0;
+iv::lidar_pose glidar_pose;
+
+void * gpset;
+void * gppose;
+
+static bool g_hasmap = false;
+
+enum class MethodType
+{
+  PCL_GENERIC = 0,
+  PCL_ANH = 1,
+  PCL_ANH_GPU = 2,
+  PCL_OPENMP = 3,
+};
+static MethodType _method_type = MethodType::PCL_GENERIC;
+
+static pose initial_pose, predict_pose, predict_pose_imu, predict_pose_odom, predict_pose_imu_odom, previous_pose,
+    ndt_pose, current_pose, current_pose_imu, current_pose_odom, current_pose_imu_odom, localizer_pose;
+
+static double offset_x, offset_y, offset_z, offset_yaw;  // current_pos - previous_pose
+static double offset_imu_x, offset_imu_y, offset_imu_z, offset_imu_roll, offset_imu_pitch, offset_imu_yaw;
+static double offset_odom_x, offset_odom_y, offset_odom_z, offset_odom_roll, offset_odom_pitch, offset_odom_yaw;
+static double offset_imu_odom_x, offset_imu_odom_y, offset_imu_odom_z, offset_imu_odom_roll, offset_imu_odom_pitch,
+    offset_imu_odom_yaw;
+
+// Can't load if typed "pcl::PointCloud<pcl::PointXYZRGB> map, add;"
+static pcl::PointCloud<pcl::PointXYZ> map, add;
+
+// If the map is loaded, map_loaded will be 1.
+static int map_loaded = 0;
+static int _use_gnss = 1;
+static int init_pos_set = 0;
+
+static pcl::NormalDistributionsTransform<pcl::PointXYZ, pcl::PointXYZ> ndt;
+//static cpu::NormalDistributionsTransform<pcl::PointXYZ, pcl::PointXYZ> anh_ndt;
+#ifdef CUDA_FOUND
+static std::shared_ptr<gpu::GNormalDistributionsTransform> anh_gpu_ndt_ptr =
+    std::make_shared<gpu::GNormalDistributionsTransform>();
+#endif
+#ifdef USE_PCL_OPENMP
+static pcl_omp::NormalDistributionsTransform<pcl::PointXYZ, pcl::PointXYZ> omp_ndt;
+#endif
+
+pclomp::NormalDistributionsTransform<pcl::PointXYZ, pcl::PointXYZ>::Ptr ndt_omp(new pclomp::NormalDistributionsTransform<pcl::PointXYZ, pcl::PointXYZ>());
+
+// Default values
+static int max_iter = 30;        // Maximum iterations
+static float ndt_res = 1.0;      // Resolution
+static double step_size = 0.1;   // Step size
+static double trans_eps = 0.01;  // Transformation epsilon
+
+
+static double exe_time = 0.0;
+static bool has_converged;
+static int iteration = 0;
+static double fitness_score = 0.0;
+static double trans_probability = 0.0;
+
+static double diff = 0.0;
+static double diff_x = 0.0, diff_y = 0.0, diff_z = 0.0, diff_yaw;
+
+static double current_velocity = 0.0, previous_velocity = 0.0, previous_previous_velocity = 0.0;  // [m/s]
+static double current_velocity_x = 0.0, previous_velocity_x = 0.0;
+static double current_velocity_y = 0.0, previous_velocity_y = 0.0;
+static double current_velocity_z = 0.0, previous_velocity_z = 0.0;
+// static double current_velocity_yaw = 0.0, previous_velocity_yaw = 0.0;
+static double current_velocity_smooth = 0.0;
+
+static double current_velocity_imu_x = 0.0;
+static double current_velocity_imu_y = 0.0;
+static double current_velocity_imu_z = 0.0;
+
+static double current_accel = 0.0, previous_accel = 0.0;  // [m/s^2]
+static double current_accel_x = 0.0;
+static double current_accel_y = 0.0;
+static double current_accel_z = 0.0;
+// static double current_accel_yaw = 0.0;
+
+static double angular_velocity = 0.0;
+
+static int use_predict_pose = 0;
+
+
+static std::chrono::time_point<std::chrono::system_clock> matching_start, matching_end;
+
+
+static int _queue_size = 1000;
+
+
+static double predict_pose_error = 0.0;
+
+static double _tf_x, _tf_y, _tf_z, _tf_roll, _tf_pitch, _tf_yaw;
+static Eigen::Matrix4f tf_btol;
+
+static std::string _localizer = "velodyne";
+static std::string _offset = "linear";  // linear, zero, quadratic
+
+
+
+static bool _get_height = false;
+static bool _use_local_transform = false;
+static bool _use_imu = false;
+static bool _use_odom = false;
+static bool _imu_upside_down = false;
+static bool _output_log_data = false;
+
+static std::string _imu_topic = "/imu_raw";
+
+static std::ofstream ofs;
+static std::string filename;
+
+//static sensor_msgs::Imu imu;
+//static nav_msgs::Odometry odom;
+
+// static tf::TransformListener local_transform_listener;
+static tf::StampedTransform local_transform;
+
+static unsigned int points_map_num = 0;
+
+pthread_mutex_t mutex;
+
+pthread_mutex_t mutex_read;
+
+pthread_mutex_t mutex_pose;
+
+bool gbNeedGPSUpdateNDT = false;
+
+pcl::PointCloud<pcl::PointXYZ>::Ptr local_map_ptr;
+pcl::PointCloud<pcl::PointXYZ>::Ptr global_map_ptr;
+
+pose glastmappose;
+
+static double imu_angular_velocity_x=0;
+static double imu_angular_velocity_y=0;
+static double imu_angular_velocity_z=0;
+static double imu_linear_acceleration_x=0;
+static double imu_linear_acceleration_y=0;
+static double imu_linear_acceleration_z =0;
+
+extern QFile * gpFileLastPos;//Save Last Positin
+static bool gbFileNDTUpdate;
+
+extern double garm_x ;
+extern double garm_y ;
+
+#include <QDateTime>
+
+//cv::Mat gmatimage;
+void ListenPoseSet(const char * strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
+{
+    iv::lidar::ndtpos pos;
+
+    if(false == pos.ParseFromArray(strdata,nSize))
+    {
+        std::cout<<" ndtpos parse error."<<std::endl;
+        return;
+    }
+
+    SetCurPose(pos.pose_x(),pos.pose_y(),pos.pose_yaw(),pos.pose_z(),pos.pose_pitch(),pos.pose_roll());
+
+
+}
+
+static void SetLocalMap()
+{
+    pcl::PointCloud<pcl::PointXYZ>::Ptr local_ptr(new pcl::PointCloud<pcl::PointXYZ>());
+
+    int nSize = global_map_ptr->size();
+
+    int i;
+    for(i=0;i<nSize;i++)
+    {
+        pcl::PointXYZ xp = global_map_ptr->at(i);
+        if(sqrt(pow(xp.x - current_pose.x,2)+pow(xp.y-current_pose.y,2)+pow(xp.z-current_pose.z,2))<30)
+        {
+            local_ptr->push_back(xp);
+        }
+    }
+
+    glastmappose = current_pose;
+
+    local_map_ptr = local_ptr;
+    std::cout<<"current map size is "<<local_map_ptr->size()<<std::endl;
+}
+
+static bool gbNDTRun = true;
+
+static bool gbGFRun = true;
+static bool gbLMRun= true;
+static std::thread * gpmapthread, * gpgpsfixthread,*gpsavelastthread;
+static bool gbNeedUpdate = false;
+static std::shared_ptr<gpu::GNormalDistributionsTransform> glocal_ndt;
+
+extern iv::gpspos glastndtgpspos;
+extern iv::gpspos gcurndtgpspos;
+extern iv::gpspos gcurgpsgpspos;
+extern bool gbGPSFix;
+extern int64_t gnLastGPSTime;
+
+static bool gbgpsupdatendt = false;
+
+static int gusemapindex = -1;
+static int gcurmapindex = -1;
+
+extern std::vector<iv::ndtmaptrace> gvector_trace;
+
+extern void * gpa,* gpb ,* gpc, * gpd;
+
+iv::gpspos PoseToGPS(iv::gpspos xori,pose xpose)
+{
+    double x_o,y_o;
+    GaussProjCal(xori.lon,xori.lat,&x_o,&y_o);
+    double lon,lat;
+    double hdg_o = (90 - xori.heading)*M_PI/180.0;
+    double rel_x = xpose.x * cos(hdg_o) - xpose.y * sin(hdg_o);
+    double rel_y = xpose.x * sin(hdg_o) + xpose.y * cos(hdg_o);
+    GaussProjInvCal(x_o + rel_x,y_o + rel_y,&lon,&lat);
+    double hdg_c = hdg_o + xpose.yaw;
+
+    hdg_c = M_PI/2.0 - hdg_c;
+    double heading = hdg_c * 180.0/M_PI;
+    while(heading < 0)heading = heading + 360;
+    while(heading >360)heading = heading - 360;
+    iv::gpspos xgpspos;
+    xgpspos.lon = lon;
+    xgpspos.lat = lat;
+    xgpspos.height = xpose.z + xori.height;
+    xgpspos.heading = heading;
+    xgpspos.pitch = xpose.pitch + xori.pitch;
+    xgpspos.roll  = xpose.roll + xori.roll;
+    xgpspos.ve = xpose.vx * cos(hdg_o) - xpose.vy * sin(hdg_o);
+    xgpspos.vn = xpose.vx * sin(hdg_o) + xpose.vy * cos(hdg_o);
+
+    if((garm_x != 0)||(garm_y != 0)) //arm use to Convert pos to GPS Posision
+    {
+        GaussProjCal(xgpspos.lon,xgpspos.lat,&x_o,&y_o);
+        hdg_o = (90 - xgpspos.heading)*M_PI/180.0;
+        rel_x = garm_x *(-1) * cos(hdg_o) - garm_y *(-1) * sin(hdg_o);
+        rel_y = garm_x *(-1) * sin(hdg_o) + garm_y *(-1) * cos(hdg_o);
+        GaussProjInvCal(x_o + rel_x,y_o + rel_y,&lon,&lat);
+        xgpspos.lon = lon;
+        xgpspos.lat = lat;
+    }
+    return xgpspos;
+
+}
+
+pose CalcPose(iv::gpspos xori, iv::gpspos xcur)
+{
+    double lon,lat;
+    if((garm_x != 0)||(garm_y != 0)) //arm use to Convert pos to GPS Posision
+    {
+        double x_o,y_o;
+        GaussProjCal(xcur.lon,xcur.lat,&x_o,&y_o);
+        double hdg_o = (90 - xcur.heading)*M_PI/180.0;
+        double rel_x = garm_x *(1) * cos(hdg_o) - garm_y *(1) * sin(hdg_o);
+        double rel_y = garm_x *(1) * sin(hdg_o) + garm_y *(1) * cos(hdg_o);
+        GaussProjInvCal(x_o + rel_x,y_o + rel_y,&lon,&lat);
+        xcur.lon = lon;
+        xcur.lat = lat;
+    }
+    double x_o,y_o,hdg_o;
+    double x_c,y_c,hdg_c;
+    GaussProjCal(xori.lon,xori.lat,&x_o,&y_o);
+    GaussProjCal(xcur.lon,xcur.lat,&x_c,&y_c);
+    double dis = sqrt(pow(x_c-x_o,2)+ pow(y_c-y_o,2));
+    double rel_x0,rel_y0;
+    rel_x0 = x_c -x_o;
+    rel_y0 = y_c -y_o;
+    double rel_x,rel_y;
+
+    hdg_o = (90 - xori.heading)*M_PI/180.0;
+    hdg_c = (90 - xcur.heading)*M_PI/180.0;
+    rel_x = rel_x0 * cos(-hdg_o) - rel_y0 * sin(-hdg_o);
+    rel_y = rel_x0 * sin(-hdg_o) + rel_y0 * cos(-hdg_o);
+
+
+    double rel_hdg;
+    rel_hdg = hdg_c - hdg_o;
+    pose posex;
+    posex.x = rel_x;
+    posex.y = rel_y;
+    posex.z = xcur.height - xori.height;
+    posex.pitch = xcur.pitch - xori.pitch;
+    posex.roll = xcur.roll - xori.roll;
+    posex.yaw = rel_hdg;
+
+    posex.vx = xcur.ve * cos(-hdg_o) - xcur.vn * sin(-hdg_o);
+    posex.vy = xcur.ve * sin(-hdg_o) + xcur.vn * cos(-hdg_o);
+
+    return posex;
+
+}
+
+static double gettracedis(int index,pose posex)
+{
+    double fdismin = 1000000.;
+    double zdiff = 0;
+    int neari;
+    if(index < 0)return 1000000.0;
+    if(index>= gvector_trace.size())
+    {
+        return 1000000.0;
+    }
+    int i;
+    std::vector<iv::ndttracepoint> vt = gvector_trace.at(index).mvector_trace;
+    int nsize = vt.size();
+//    std::cout<<"size is "<<nsize<<std::endl;
+    for(i=0;i<nsize;i++)
+    {
+        double fdis = sqrt(pow(posex.x - vt.at(i).x,2) + pow(posex.y - vt.at(i).y,2));
+//        std::cout<<"fdis is "<<fdis<<std::endl;
+        if((fdis < fdismin) &&(fabs(posex.z - vt.at(i).z)<5))
+        {
+            fdismin = fdis;
+            neari = i;
+            zdiff = fabs(posex.z - vt.at(i).z);
+        }
+    }
+
+    return fdismin;
+}
+
+static void getmindistrace(int & index, double & ftracedis)
+{
+    double fdismin = 1000000.0;
+    int xindexmin = -1;
+    int i;
+    int nsize = gvector_trace.size();
+
+
+    for(i=0;i<nsize;i++)
+    {
+        pose posex = CalcPose(gvector_trace[i].mndtorigin,gcurndtgpspos);
+        double fdis = gettracedis(i,posex);
+        if(fdis<fdismin)
+        {
+            fdismin = fdis;
+            xindexmin = i;
+        }
+    }
+
+    if(xindexmin != -1)
+    {
+        ftracedis = fdismin;
+        index = xindexmin;
+    }
+    else
+    {
+        index = -1;
+        ftracedis = 100000.0;
+    }
+}
+
+std::vector<std::shared_ptr<gpu::GNormalDistributionsTransform>> gvectoranh;
+#include <QTime>
+
+extern void pausecomm();
+extern void continuecomm();
+static void UseMap(int index)
+{
+    pcl::PointCloud<pcl::PointXYZ>::Ptr xmap;
+
+    xmap = boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>>(new pcl::PointCloud<pcl::PointXYZ>);
+
+
+    QTime xTime;
+    xTime.start();
+    if(0 == pcl::io::loadPCDFile(gvector_trace.at(index).mstrpcdpath.data(),*xmap))
+    {
+
+    }
+    else
+    {
+        std::cout<<"map size is 0"<<std::endl;
+        return;
+    }
+
+    qDebug("load map time is %d",xTime.elapsed());
+
+    pausecomm();   //Because cuda can pause process, avoid mutex comm, stop this process's comm
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    pcl::PointCloud<pcl::PointXYZ>::Ptr map_ptr(new pcl::PointCloud<pcl::PointXYZ>(*xmap));
+
+    std::shared_ptr<gpu::GNormalDistributionsTransform> new_anh_gpu_ndt_ptr =
+        std::make_shared<gpu::GNormalDistributionsTransform>();
+    new_anh_gpu_ndt_ptr->setResolution(ndt_res);
+    new_anh_gpu_ndt_ptr->setInputTarget(map_ptr);
+    new_anh_gpu_ndt_ptr->setMaximumIterations(max_iter);
+    new_anh_gpu_ndt_ptr->setStepSize(step_size);
+    new_anh_gpu_ndt_ptr->setTransformationEpsilon(trans_eps);
+
+    pcl::PointCloud<pcl::PointXYZ>::Ptr dummy_scan_ptr(new pcl::PointCloud<pcl::PointXYZ>());
+    pcl::PointXYZ dummy_point;
+    dummy_scan_ptr->push_back(dummy_point);
+    new_anh_gpu_ndt_ptr->setInputSource(dummy_scan_ptr);
+
+    new_anh_gpu_ndt_ptr->align(Eigen::Matrix4f::Identity());
+
+    pthread_mutex_lock(&mutex);
+    glocal_ndt = new_anh_gpu_ndt_ptr;
+    pthread_mutex_unlock(&mutex);
+
+    continuecomm();
+//    gvectoranh.push_back(new_anh_gpu_ndt_ptr);
+
+    qDebug("ndt load finish time is %d",xTime.elapsed());
+    gbNeedUpdate = true;
+    gcurmapindex = index;
+
+    ndt_omp->setResolution(ndt_res);
+    ndt_omp->setInputTarget(map_ptr);
+    ndt_omp->setMaximumIterations(max_iter);
+    ndt_omp->setStepSize(step_size);
+    ndt_omp->setTransformationEpsilon(trans_eps);
+    std::cout<<" ndt_omp init success. "<<std::endl;
+
+
+    std::cout<<"change map, index is "<<index<<std::endl;
+}
+
+void LocalMapUpdate(int n)
+{
+    std::cout<<"LocalMap n is "<<n<<std::endl;
+
+    int index;
+    double ftracedis;
+    int ncurindex = -1;
+
+    int i;
+    for(i=0;i<gvector_trace.size();i++)
+    {
+//        UseMap(i);
+//        std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+    }
+    while(gbLMRun)
+    {
+
+        getmindistrace(index,ftracedis);
+
+        if(g_hasmap == false)
+        {
+
+            if(index >= 0)
+            {
+                if(ftracedis < 30)
+                {
+                    UseMap(index);
+                    ncurindex = index;
+                    g_hasmap = true;
+                }
+            }
+        }
+        else
+        {
+           if(index != ncurindex)
+           {
+                pose posex = CalcPose(gvector_trace[ncurindex].mndtorigin,gcurndtgpspos);
+               double fnowdis = gettracedis(ncurindex,posex);
+               if((fnowdis - ftracedis)>5)
+               {
+                   UseMap(index);
+                   ncurindex = index;
+                   g_hasmap = true;
+               }
+           }
+        }
+
+        if(ftracedis > 50)
+        {
+            std::cout<<std::chrono::system_clock::now().time_since_epoch().count()/1000000<<" trace dis is "<<ftracedis<<std::endl;
+            std::cout<<" Out Range."<<std::endl;
+            g_hasmap = false;
+        }
+
+
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+}
+
+
+void SaveMonitor(bool * pbRun)
+{
+    iv::gpspos xoldgpspos;
+    xoldgpspos.lat = 39;
+    xoldgpspos.lon = 117;
+    while(*pbRun)
+    {
+       if(gpFileLastPos != 0)
+       {
+           if(gbFileNDTUpdate)
+           {
+               if((fabs(xoldgpspos.lat - gcurndtgpspos.lat)>0.0000005)||((fabs(xoldgpspos.lon - gcurndtgpspos.lon)>0.0000005)))
+               {
+                   xoldgpspos = gcurndtgpspos;
+                   char str[1000];
+                   snprintf(str,1000,"%11.7f\t%11.7f\t%9.3f\t%6.2f\t%6.2f\t%6.2f\n                ",
+                            xoldgpspos.lon,xoldgpspos.lat,xoldgpspos.height,
+                            xoldgpspos.heading,xoldgpspos.pitch,xoldgpspos.roll);
+                   gpFileLastPos->seek(0);
+                   gpFileLastPos->write(str,strnlen(str,1000));
+                   gpFileLastPos->flush();
+               }
+               gbFileNDTUpdate = false;
+           }
+       }
+       std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+}
+
+void GPSPosMonitor(bool * pbRun)
+{
+    if(gbGPSFix == false)
+    {
+        gcurndtgpspos =  glastndtgpspos;
+    }
+    while(*pbRun)
+    {
+        int64_t nmsnow = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
+        if(abs(nmsnow - gnLastGPSTime)>1000)
+        {
+            std::this_thread::sleep_for(std::chrono::milliseconds(10));
+            continue;
+        }
+        if(gbGPSFix == true)
+        {
+            double x0,y0;
+            double x,y;
+            GaussProjCal(gcurndtgpspos.lon,gcurndtgpspos.lat,&x0,&y0);
+            GaussProjCal(gcurgpsgpspos.lon,gcurgpsgpspos.lat,&x,&y);
+            double dis = sqrt(pow(x-x0,2)+ pow(y-y0,2));
+            double headdiff = fabs(gcurgpsgpspos.heading - gcurndtgpspos.heading);
+            double zdiff = fabs(gcurgpsgpspos.height - gcurndtgpspos.height);
+            if((dis> 10)||((headdiff>10)&&(headdiff<350))||(zdiff>5)||(g_hasmap == false)||(gbNeedGPSUpdateNDT))
+            {
+                givlog->info("NDTFIX","gps fix ndt pos. dis:%f headdiff:%f zdiff:%f map %d ndt %d",
+                             dis,headdiff,zdiff,g_hasmap,gbNeedGPSUpdateNDT);
+//                gcurndtgpspos = gcurgpsgpspos;
+                  gbgpsupdatendt = true;
+//                current_velocity_x = 0;
+//                current_velocity_y = 0;
+//                current_velocity_z = 0;
+//                angular_velocity = 0;
+//                gbNeedGPSUpdateNDT = false;
+                if(g_hasmap == true)
+                {
+                    int index = gcurmapindex;
+                    if((index >=0)&&(index < gvector_trace.size()))
+                    {
+
+                    }
+                }
+            }
+//            else
+//            {
+//                if(gbgpsupdatendt)
+//                {
+//                    gcurndtgpspos = gcurgpsgpspos;
+//                    gbgpsupdatendt = true;
+//                    current_velocity_x = 0;
+//                    current_velocity_y = 0;
+//                    current_velocity_z = 0;
+//                    angular_velocity = 0;
+//                }
+//            }
+
+        }
+
+        std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    }
+}
+
+
+void ndt_match_Init(pcl::PointCloud<pcl::PointXYZ>::Ptr mappcd)
+{
+    _tf_x = 0;
+    _tf_y = 0;
+    _tf_z = 0;
+    _tf_roll = 0;
+    _tf_pitch = 0;
+    _tf_yaw = 0;
+
+    Eigen::Translation3f tl_btol(_tf_x, _tf_y, _tf_z);                 // tl: translation
+    Eigen::AngleAxisf rot_x_btol(_tf_roll, Eigen::Vector3f::UnitX());  // rot: rotation
+    Eigen::AngleAxisf rot_y_btol(_tf_pitch, Eigen::Vector3f::UnitY());
+    Eigen::AngleAxisf rot_z_btol(_tf_yaw, Eigen::Vector3f::UnitZ());
+    tf_btol = (tl_btol * rot_z_btol * rot_y_btol * rot_x_btol).matrix();
+
+
+
+    init_pos_set = 1;
+
+    initial_pose.x = 0;
+    initial_pose.y = 0;
+    initial_pose.z = 0;
+    initial_pose.roll = 0;
+    initial_pose.pitch = 0;
+    initial_pose.yaw = 0;
+
+    localizer_pose.x = initial_pose.x;
+    localizer_pose.y = initial_pose.y;
+    localizer_pose.z = initial_pose.z;
+    localizer_pose.roll = initial_pose.roll;
+    localizer_pose.pitch = initial_pose.pitch;
+    localizer_pose.yaw = initial_pose.yaw;
+
+    previous_pose.x = initial_pose.x;
+    previous_pose.y = initial_pose.y;
+    previous_pose.z = initial_pose.z;
+    previous_pose.roll = initial_pose.roll;
+    previous_pose.pitch = initial_pose.pitch;
+    previous_pose.yaw = initial_pose.yaw;
+
+    current_pose.x = initial_pose.x;
+    current_pose.y = initial_pose.y;
+    current_pose.z = initial_pose.z;
+    current_pose.roll = initial_pose.roll;
+    current_pose.pitch = initial_pose.pitch;
+    current_pose.yaw = initial_pose.yaw;
+
+    current_velocity = 0;
+    current_velocity_x = 0;
+    current_velocity_y = 0;
+    current_velocity_z = 0;
+    angular_velocity = 0;
+
+
+//    ndt.setResolution(ndt_res);
+//    ndt.setStepSize(step_size);
+//    ndt.setTransformationEpsilon(trans_eps);
+//    ndt.setMaximumIterations(max_iter);
+
+  //  anh_gpu_ndt_ptr->setResolution();
+
+    anh_gpu_ndt_ptr->setResolution(ndt_res);
+    anh_gpu_ndt_ptr->setStepSize(step_size);
+    anh_gpu_ndt_ptr->setTransformationEpsilon(trans_eps);
+    anh_gpu_ndt_ptr->setMaximumIterations(max_iter);
+
+
+//    ndt.align(*output_cloud, Eigen::Matrix4f::Identity());
+
+
+    pcl::PointCloud<pcl::PointXYZ>::Ptr map_ptr(new pcl::PointCloud<pcl::PointXYZ>(*mappcd));
+
+
+    std::cout<<"map size is"<<mappcd->size()<<std::endl;
+    std::cout<<"ptr size is "<<map_ptr->size()<<std::endl;;
+
+    global_map_ptr = map_ptr;
+
+ //   SetLocalMap();
+
+
+//    pcl::PointCloud<pcl::PointXYZ>::Ptr filtered_scan(new pcl::PointCloud<pcl::PointXYZ>());
+
+//    pcl::VoxelGrid<pcl::PointXYZ> voxel_grid_filter;
+//    voxel_grid_filter.setLeafSize(1.0,1.0,1.0);
+//    voxel_grid_filter.setInputCloud(map_ptr);
+//    voxel_grid_filter.filter(*filtered_scan);
+
+//    std::cout<<"filter map  size is "<<filtered_scan->size()<<std::endl;;
+
+ //   ndt.setInputTarget(map_ptr);
+//    pcl::PointCloud<pcl::PointXYZ>::Ptr output_cloud(new pcl::PointCloud<pcl::PointXYZ>);
+//    ndt.align(*output_cloud, Eigen::Matrix4f::Identity());
+
+
+    gpset = iv::modulecomm::RegisterRecv("ndtposeset",ListenPoseSet);
+    gppose = iv::modulecomm::RegisterSend("ndtpose",10000,3);
+
+    if(map_ptr->size() == 0)
+    {
+        gbNDTRun = false;
+        return;
+    }
+
+    std::shared_ptr<gpu::GNormalDistributionsTransform> new_anh_gpu_ndt_ptr =
+        std::make_shared<gpu::GNormalDistributionsTransform>();
+    new_anh_gpu_ndt_ptr->setResolution(ndt_res);
+    new_anh_gpu_ndt_ptr->setInputTarget(map_ptr);
+    new_anh_gpu_ndt_ptr->setMaximumIterations(max_iter);
+    new_anh_gpu_ndt_ptr->setStepSize(step_size);
+    new_anh_gpu_ndt_ptr->setTransformationEpsilon(trans_eps);
+
+    pcl::PointCloud<pcl::PointXYZ>::Ptr dummy_scan_ptr(new pcl::PointCloud<pcl::PointXYZ>());
+    pcl::PointXYZ dummy_point;
+    dummy_scan_ptr->push_back(dummy_point);
+    new_anh_gpu_ndt_ptr->setInputSource(dummy_scan_ptr);
+
+    new_anh_gpu_ndt_ptr->align(Eigen::Matrix4f::Identity());
+
+    pthread_mutex_lock(&mutex);
+    anh_gpu_ndt_ptr = new_anh_gpu_ndt_ptr;
+    pthread_mutex_unlock(&mutex);
+
+    ndt_omp->setResolution(ndt_res);
+    ndt_omp->setInputTarget(map_ptr);
+    ndt_omp->setMaximumIterations(max_iter);
+    ndt_omp->setStepSize(step_size);
+    ndt_omp->setTransformationEpsilon(trans_eps);
+    std::cout<<" ndt_omp init success. "<<std::endl;
+
+
+
+    if(map_ptr->size()>0)map_loaded = 1;
+
+ //   gpmapthread = new std::thread(LocalMapUpdate,1);
+
+
+
+
+}
+
+void ndt_match_Init_nomap()
+{
+    _tf_x = 0;
+    _tf_y = 0;
+    _tf_z = 0;
+    _tf_roll = 0;
+    _tf_pitch = 0;
+    _tf_yaw = 0;
+
+    Eigen::Translation3f tl_btol(_tf_x, _tf_y, _tf_z);                 // tl: translation
+    Eigen::AngleAxisf rot_x_btol(_tf_roll, Eigen::Vector3f::UnitX());  // rot: rotation
+    Eigen::AngleAxisf rot_y_btol(_tf_pitch, Eigen::Vector3f::UnitY());
+    Eigen::AngleAxisf rot_z_btol(_tf_yaw, Eigen::Vector3f::UnitZ());
+    tf_btol = (tl_btol * rot_z_btol * rot_y_btol * rot_x_btol).matrix();
+
+
+
+    init_pos_set = 1;
+
+
+
+
+    initial_pose.x = 0;
+    initial_pose.y = 0;
+    initial_pose.z = 0;
+    initial_pose.roll = 0;
+    initial_pose.pitch = 0;
+    initial_pose.yaw = 0;
+
+    localizer_pose.x = initial_pose.x;
+    localizer_pose.y = initial_pose.y;
+    localizer_pose.z = initial_pose.z;
+    localizer_pose.roll = initial_pose.roll;
+    localizer_pose.pitch = initial_pose.pitch;
+    localizer_pose.yaw = initial_pose.yaw;
+
+    previous_pose.x = initial_pose.x;
+    previous_pose.y = initial_pose.y;
+    previous_pose.z = initial_pose.z;
+    previous_pose.roll = initial_pose.roll;
+    previous_pose.pitch = initial_pose.pitch;
+    previous_pose.yaw = initial_pose.yaw;
+
+    current_pose.x = initial_pose.x;
+    current_pose.y = initial_pose.y;
+    current_pose.z = initial_pose.z;
+    current_pose.roll = initial_pose.roll;
+    current_pose.pitch = initial_pose.pitch;
+    current_pose.yaw = initial_pose.yaw;
+
+    current_velocity = 0;
+    current_velocity_x = 0;
+    current_velocity_y = 0;
+    current_velocity_z = 0;
+    angular_velocity = 0;
+
+    anh_gpu_ndt_ptr->setResolution(ndt_res);
+    anh_gpu_ndt_ptr->setStepSize(step_size);
+    anh_gpu_ndt_ptr->setTransformationEpsilon(trans_eps);
+    anh_gpu_ndt_ptr->setMaximumIterations(max_iter);
+
+    gpmapthread = new std::thread(LocalMapUpdate,1);
+    gbGFRun = true;
+    gpgpsfixthread = new std::thread(GPSPosMonitor,&gbGFRun);
+    gpsavelastthread = new std::thread(SaveMonitor,&gbGFRun);
+
+
+
+
+}
+
+
+void ndt_match_SetMap(pcl::PointCloud<pcl::PointXYZ>::Ptr mappcd)
+{
+    if(mappcd->size() == 0 )return;
+
+    pcl::PointCloud<pcl::PointXYZ>::Ptr map_ptr(new pcl::PointCloud<pcl::PointXYZ>(*mappcd));
+
+
+    std::shared_ptr<gpu::GNormalDistributionsTransform> new_anh_gpu_ndt_ptr =
+        std::make_shared<gpu::GNormalDistributionsTransform>();
+    new_anh_gpu_ndt_ptr->setResolution(ndt_res);
+    new_anh_gpu_ndt_ptr->setInputTarget(map_ptr);
+    new_anh_gpu_ndt_ptr->setMaximumIterations(max_iter);
+    new_anh_gpu_ndt_ptr->setStepSize(step_size);
+    new_anh_gpu_ndt_ptr->setTransformationEpsilon(trans_eps);
+
+    pcl::PointCloud<pcl::PointXYZ>::Ptr dummy_scan_ptr(new pcl::PointCloud<pcl::PointXYZ>());
+    pcl::PointXYZ dummy_point;
+    dummy_scan_ptr->push_back(dummy_point);
+    new_anh_gpu_ndt_ptr->setInputSource(dummy_scan_ptr);
+
+    new_anh_gpu_ndt_ptr->align(Eigen::Matrix4f::Identity());
+
+    pthread_mutex_lock(&mutex);
+    anh_gpu_ndt_ptr = new_anh_gpu_ndt_ptr;
+    pthread_mutex_unlock(&mutex);
+
+
+    ndt_omp->setResolution(ndt_res);
+    ndt_omp->setInputTarget(map_ptr);
+    ndt_omp->setMaximumIterations(max_iter);
+    ndt_omp->setStepSize(step_size);
+    ndt_omp->setTransformationEpsilon(trans_eps);
+
+    ndt_omp->setNumThreads(6);
+    ndt_omp->setNeighborhoodSearchMethod(pclomp::DIRECT7);
+    std::cout<<" ndt_omp init success. "<<std::endl;
+
+
+
+    if(map_ptr->size()>0)map_loaded = 1;
+}
+
+
+
+static double wrapToPm(double a_num, const double a_max)
+{
+  if (a_num >= a_max)
+  {
+    a_num -= 2.0 * a_max;
+  }
+  return a_num;
+}
+
+static double wrapToPmPi(const double a_angle_rad)
+{
+  return wrapToPm(a_angle_rad, M_PI);
+}
+
+static double calcDiffForRadian(const double lhs_rad, const double rhs_rad)
+{
+  double diff_rad = lhs_rad - rhs_rad;
+  if (diff_rad >= M_PI)
+    diff_rad = diff_rad - 2 * M_PI;
+  else if (diff_rad < -M_PI)
+    diff_rad = diff_rad + 2 * M_PI;
+  return diff_rad;
+}
+
+
+static unsigned int g_seq_old = 0;
+
+#include <QTime>
+
+
+
+bool isfirst = true;
+
+#include <QMutex>
+std::vector<iv::imudata> gvectorimu;
+QMutex gMuteximu;
+
+static void lidar_imu_calc(qint64 current_lidar_time)
+{
+    int nsize;
+
+    int i;
+    gMuteximu.lock();
+    nsize = gvectorimu.size();
+    for(i=0;i<nsize;i++)
+    {
+        iv::imudata ximudata = gvectorimu[i];
+        double ximu_angular_velocity_x,ximu_angular_velocity_y,ximu_angular_velocity_z;
+        double ximu_linear_acceleration_x,ximu_linear_acceleration_y,ximu_linear_acceleration_z;
+        if(current_lidar_time >  ximudata.imutime)
+        {
+            ximu_angular_velocity_x = ximudata.imu_angular_velocity_x;
+            ximu_angular_velocity_y = ximudata.imu_angular_velocity_y;
+            ximu_angular_velocity_z = ximudata.imu_angular_velocity_z;
+            ximu_linear_acceleration_x = ximudata.imu_linear_acceleration_x;
+            ximu_linear_acceleration_y = ximudata.imu_linear_acceleration_y;
+            ximu_linear_acceleration_z = ximudata.imu_linear_acceleration_z;
+            qint64 current_time_imu = ximudata.imutime;
+//            givlog->verbose("NDT","imu time is %ld",current_time_imu);
+            static qint64 previous_time_imu = current_time_imu;
+            double diff_time = (current_time_imu - previous_time_imu);
+            diff_time = diff_time/1000.0;
+
+            if(diff_time < 0)diff_time = 0.000001;
+            if(diff_time > 0.1)diff_time = 0.1;
+
+            if(current_time_imu < previous_time_imu)
+            {
+
+                std::cout<<"current time is old "<<current_time_imu<<std::endl;
+                previous_time_imu = current_time_imu;
+                continue;
+            }
+
+            double diff_imu_roll = ximu_angular_velocity_x * diff_time;
+            double diff_imu_pitch = ximu_angular_velocity_y * diff_time;
+            double diff_imu_yaw = ximu_angular_velocity_z * diff_time;
+
+            current_pose_imu.roll += diff_imu_roll;
+            current_pose_imu.pitch += diff_imu_pitch;
+            current_pose_imu.yaw += diff_imu_yaw;
+
+            double accX1 = ximu_linear_acceleration_x;
+            double accY1 = std::cos(current_pose_imu.roll) * ximu_linear_acceleration_y -
+                           std::sin(current_pose_imu.roll) * ximu_linear_acceleration_z;
+            double accZ1 = std::sin(current_pose_imu.roll) * ximu_linear_acceleration_y +
+                           std::cos(current_pose_imu.roll) * ximu_linear_acceleration_z;
+
+            double accX2 = std::sin(current_pose_imu.pitch) * accZ1 + std::cos(current_pose_imu.pitch) * accX1;
+            double accY2 = accY1;
+            double accZ2 = std::cos(current_pose_imu.pitch) * accZ1 - std::sin(current_pose_imu.pitch) * accX1;
+
+            double accX = std::cos(current_pose_imu.yaw) * accX2 - std::sin(current_pose_imu.yaw) * accY2;
+            double accY = std::sin(current_pose_imu.yaw) * accX2 + std::cos(current_pose_imu.yaw) * accY2;
+            double accZ = accZ2;
+
+            offset_imu_x += current_velocity_imu_x * diff_time + accX * diff_time * diff_time / 2.0;
+            offset_imu_y += current_velocity_imu_y * diff_time + accY * diff_time * diff_time / 2.0;
+            offset_imu_z += current_velocity_imu_z * diff_time + accZ * diff_time * diff_time / 2.0;
+
+            current_velocity_imu_x += accX * diff_time;
+            current_velocity_imu_y += accY * diff_time;
+            current_velocity_imu_z += accZ * diff_time;
+
+            offset_imu_roll += diff_imu_roll;
+            offset_imu_pitch += diff_imu_pitch;
+            offset_imu_yaw += diff_imu_yaw;
+
+          //  guess_pose_imu.x = previous_pose.x + offset_imu_x;
+          //  guess_pose_imu.y = previous_pose.y + offset_imu_y;
+          //  guess_pose_imu.z = previous_pose.z + offset_imu_z;
+          //  guess_pose_imu.roll = previous_pose.roll + offset_imu_roll;
+          //  guess_pose_imu.pitch = previous_pose.pitch + offset_imu_pitch;
+          //  guess_pose_imu.yaw = previous_pose.yaw + offset_imu_yaw;
+
+            predict_pose_imu.x = previous_pose.x + offset_imu_x;
+            predict_pose_imu.y = previous_pose.y + offset_imu_y;
+            predict_pose_imu.z = previous_pose.z + offset_imu_z;
+            predict_pose_imu.roll = previous_pose.roll + offset_imu_roll;
+            predict_pose_imu.pitch = previous_pose.pitch + offset_imu_pitch;
+            predict_pose_imu.yaw = previous_pose.yaw + offset_imu_yaw;
+
+            givlog->verbose("NDT","imu x:%f y:%f z:%f yaw:%f",offset_imu_x,
+                            offset_imu_y,offset_imu_z,offset_imu_yaw);
+
+            previous_time_imu = current_time_imu;
+
+
+        }
+        else
+        {
+            break;;
+        }
+
+    }
+
+    if(i>0)
+    {
+        gvectorimu.erase(gvectorimu.begin(),gvectorimu.begin()+i);
+    }
+
+    gMuteximu.unlock();
+
+//    for(i=0;i<gvectorimu.size();i++)
+//    {
+//        iv::imudata ximudata = gvectorimu[i];
+//        double ximu_angular_velocity_x,ximu_angular_velocity_y,ximu_angular_velocity_z;
+//        double ximu_linear_acceleration_x,ximu_linear_acceleration_y,ximu_linear_acceleration_z;
+//        if(current_lidar_time >  ximudata.imutime)
+//        {
+
+//            gvectorimu.erase(gvectorimu.begin())
+//        }
+//    }
+}
+
+
+static void imu_calc(qint64 current_time_imu)
+{
+
+  static qint64 previous_time_imu = current_time_imu;
+  double diff_time = (current_time_imu - previous_time_imu);
+  diff_time = diff_time/1000.0;
+
+  if(current_time_imu < previous_time_imu)
+  {
+      std::cout<<"current time is old "<<current_time_imu<<std::endl;
+      return;
+  }
+
+  double diff_imu_roll = imu_angular_velocity_x * diff_time;
+  double diff_imu_pitch = imu_angular_velocity_y * diff_time;
+  double diff_imu_yaw = imu_angular_velocity_z * diff_time;
+
+  current_pose_imu.roll += diff_imu_roll;
+  current_pose_imu.pitch += diff_imu_pitch;
+  current_pose_imu.yaw += diff_imu_yaw;
+
+  double accX1 = imu_linear_acceleration_x;
+  double accY1 = std::cos(current_pose_imu.roll) * imu_linear_acceleration_y -
+                 std::sin(current_pose_imu.roll) * imu_linear_acceleration_z;
+  double accZ1 = std::sin(current_pose_imu.roll) * imu_linear_acceleration_y +
+                 std::cos(current_pose_imu.roll) * imu_linear_acceleration_z;
+
+  double accX2 = std::sin(current_pose_imu.pitch) * accZ1 + std::cos(current_pose_imu.pitch) * accX1;
+  double accY2 = accY1;
+  double accZ2 = std::cos(current_pose_imu.pitch) * accZ1 - std::sin(current_pose_imu.pitch) * accX1;
+
+  double accX = std::cos(current_pose_imu.yaw) * accX2 - std::sin(current_pose_imu.yaw) * accY2;
+  double accY = std::sin(current_pose_imu.yaw) * accX2 + std::cos(current_pose_imu.yaw) * accY2;
+  double accZ = accZ2;
+
+  offset_imu_x += current_velocity_imu_x * diff_time + accX * diff_time * diff_time / 2.0;
+  offset_imu_y += current_velocity_imu_y * diff_time + accY * diff_time * diff_time / 2.0;
+  offset_imu_z += current_velocity_imu_z * diff_time + accZ * diff_time * diff_time / 2.0;
+
+  current_velocity_imu_x += accX * diff_time;
+  current_velocity_imu_y += accY * diff_time;
+  current_velocity_imu_z += accZ * diff_time;
+
+  offset_imu_roll += diff_imu_roll;
+  offset_imu_pitch += diff_imu_pitch;
+  offset_imu_yaw += diff_imu_yaw;
+
+//  guess_pose_imu.x = previous_pose.x + offset_imu_x;
+//  guess_pose_imu.y = previous_pose.y + offset_imu_y;
+//  guess_pose_imu.z = previous_pose.z + offset_imu_z;
+//  guess_pose_imu.roll = previous_pose.roll + offset_imu_roll;
+//  guess_pose_imu.pitch = previous_pose.pitch + offset_imu_pitch;
+//  guess_pose_imu.yaw = previous_pose.yaw + offset_imu_yaw;
+
+  predict_pose_imu.x = previous_pose.x + offset_imu_x;
+  predict_pose_imu.y = previous_pose.y + offset_imu_y;
+  predict_pose_imu.z = previous_pose.z + offset_imu_z;
+  predict_pose_imu.roll = previous_pose.roll + offset_imu_roll;
+  predict_pose_imu.pitch = previous_pose.pitch + offset_imu_pitch;
+  predict_pose_imu.yaw = previous_pose.yaw + offset_imu_yaw;
+
+  givlog->verbose("NDT","imu x:%f y:%f z:%f yaw:%f",offset_imu_x,
+                  offset_imu_y,offset_imu_z,offset_imu_yaw);
+
+  previous_time_imu = current_time_imu;
+}
+
+
+void imu_update(qint64 current_time_imu,double imu_roll,double imu_pitch,double imu_yaw,
+                       double acceleration_x,double acceleration_y,
+                       double acceleration_z)
+{
+  // std::cout << __func__ << std::endl;
+
+// double imu_angular_velocity_x,imu_angular_velocity_y,imu_angular_velocity_z;
+
+  static double previous_time_imu = current_time_imu;
+  double diff_time = (current_time_imu - previous_time_imu);
+  diff_time = diff_time/1000.0;
+
+
+  imu_roll = imu_roll * M_PI/180.0;
+  imu_pitch = imu_pitch * M_PI/180.0;
+  imu_yaw = (-1.0)*imu_yaw * M_PI/180.0;
+
+  imu_yaw = imu_yaw + 2.0*M_PI;
+
+//  imu_pitch = 0;
+//  imu_roll = 0;
+
+  imu_roll = wrapToPmPi(imu_roll);
+  imu_pitch = wrapToPmPi(imu_pitch);
+  imu_yaw = wrapToPmPi(imu_yaw);
+
+  static double previous_imu_roll = imu_roll, previous_imu_pitch = imu_pitch, previous_imu_yaw = imu_yaw;
+  const double diff_imu_roll = imu_roll - previous_imu_roll;
+
+  const double diff_imu_pitch = imu_pitch - previous_imu_pitch;
+
+  double diff_imu_yaw;
+  if (fabs(imu_yaw - previous_imu_yaw) > M_PI)
+  {
+    if (imu_yaw > 0)
+      diff_imu_yaw = (imu_yaw - previous_imu_yaw) - M_PI * 2;
+    else
+      diff_imu_yaw = -M_PI * 2 - (imu_yaw - previous_imu_yaw);
+  }
+  else
+    diff_imu_yaw = imu_yaw - previous_imu_yaw;
+
+
+  imu_linear_acceleration_x = acceleration_x;
+//  imu_linear_acceleration_y = acceleration_y;
+//  imu_linear_acceleration_z = acceleration_z;
+  imu_linear_acceleration_y = 0;
+  imu_linear_acceleration_z = 0;
+
+  if (diff_time != 0)
+  {
+    imu_angular_velocity_x = diff_imu_roll / diff_time;
+    imu_angular_velocity_y = diff_imu_pitch / diff_time;
+    imu_angular_velocity_z = diff_imu_yaw / diff_time;
+  }
+  else
+  {
+    imu_angular_velocity_x = 0;
+    imu_angular_velocity_y = 0;
+    imu_angular_velocity_z = 0;
+  }
+
+  iv::imudata ximudata;
+  ximudata.imutime = current_time_imu;
+  ximudata.imu_linear_acceleration_x = imu_linear_acceleration_x;
+  ximudata.imu_linear_acceleration_y = imu_linear_acceleration_y;
+  ximudata.imu_linear_acceleration_z = imu_linear_acceleration_z;
+  ximudata.imu_angular_velocity_x = imu_angular_velocity_x;
+  ximudata.imu_angular_velocity_y = imu_angular_velocity_y;
+  ximudata.imu_angular_velocity_z = imu_angular_velocity_z;
+
+  gMuteximu.lock();
+  gvectorimu.push_back(ximudata);
+  gMuteximu.unlock();
+//  imu_calc(current_time_imu);
+
+  previous_time_imu = current_time_imu;
+  previous_imu_roll = imu_roll;
+  previous_imu_pitch = imu_pitch;
+  previous_imu_yaw = imu_yaw;
+}
+
+
+
+void point_ndt(pcl::PointCloud<pcl::PointXYZ>::Ptr raw_scan)
+{
+
+    static bool bNeedUpdateVel = false; //if gps update ndt, need update velocity
+    qint64 current_scan_time = raw_scan->header.stamp;
+    static qint64 old_scan_time = current_scan_time;
+    if(gbNDTRun == false)return;
+
+    bool bNotChangev = false;
+
+    if(gbgpsupdatendt == true)
+    {
+
+        std::cout<<std::chrono::system_clock::now().time_since_epoch().count()/1000000<<" update ndt pos use gps pos. "<<std::endl;
+        gcurndtgpspos = gcurgpsgpspos;
+        gbgpsupdatendt = false;
+        gbNeedGPSUpdateNDT = false;
+        current_velocity_x = 0;
+        current_velocity_y = 0;
+        current_velocity_z = 0;
+        angular_velocity = 0;
+        bNeedUpdateVel = true;
+
+//
+//
+//        gcurndtgpspos = gcurgpsgpspos;
+//        current_pose = CalcPose(gvector_trace[gusemapindex].mndtorigin,gcurndtgpspos);
+//       predict_pose_for_ndt = current_pose;
+        current_velocity_imu_x = 0;
+        current_velocity_imu_y = 0;
+        current_velocity_imu_z = 0;
+        gMuteximu.lock();
+        gvectorimu.erase(gvectorimu.begin(),gvectorimu.begin() + gvectorimu.size());
+        gMuteximu.unlock();
+        bNotChangev = true;
+        return;
+    }
+//    gbgpsupdatendt = false;
+
+    if(g_hasmap == false)
+    {
+        return;
+    }
+    if(gbNeedUpdate)
+    {
+        pthread_mutex_lock(&mutex);
+//        ndt = glocal_ndt;
+        anh_gpu_ndt_ptr = glocal_ndt;
+        pthread_mutex_unlock(&mutex);
+        gusemapindex = gcurmapindex;
+        gbNeedUpdate = false;
+
+    }
+
+
+
+
+
+    previous_pose = CalcPose(gvector_trace[gusemapindex].mndtorigin,gcurndtgpspos);
+    if(bNeedUpdateVel == true)
+    {
+        bNeedUpdateVel = false;
+        current_velocity_x = previous_pose.vx;
+        current_velocity_y = previous_pose.vy;
+        current_velocity_imu_x = current_velocity_x;
+        current_velocity_imu_y = current_velocity_y;
+    }
+    givlog->verbose("previos pose is %f %f",previous_pose.x,previous_pose.y);
+
+//    if(map_loaded == 0)
+//    {
+//        std::cout<<BOLDRED<<" point_ndt not set map."<<RESET<<std::endl;
+//        return;
+//    }
+
+
+
+    pthread_mutex_lock(&mutex_pose);
+
+    QTime xTime;
+    xTime.start();
+//    std::cout<<"time is "<<xTime.elapsed()<<std::endl;
+    double voxel_leaf_size = 2.0;
+    double measurement_range = 200.0;
+
+    pcl::PointCloud<pcl::PointXYZ>::Ptr filtered_scan(new pcl::PointCloud<pcl::PointXYZ>());
+
+    pcl::VoxelGrid<pcl::PointXYZ> voxel_grid_filter;
+    voxel_grid_filter.setLeafSize(voxel_leaf_size, voxel_leaf_size, voxel_leaf_size);
+    voxel_grid_filter.setInputCloud(raw_scan);
+    voxel_grid_filter.filter(*filtered_scan);
+
+//    std::cout<<"voxed time is "<<xTime.elapsed()<<std::endl;
+
+//    std::cout<<" seq is "<<raw_scan->header.seq<<std::endl;
+  //  qDebug("seq = %d stamp = %d ",raw_scan->header.seq,raw_scan->header.stamp);
+  //  std::cout<<"raw scan size is "<<raw_scan->size()<<"  filtered scan size is "<<filtered_scan->size()<<std::endl;
+
+ //   return;
+    matching_start = std::chrono::system_clock::now();
+
+    pcl::PointCloud<pcl::PointXYZ>::Ptr filtered_scan_ptr(new pcl::PointCloud<pcl::PointXYZ>(*filtered_scan));
+
+    int scan_points_num = filtered_scan_ptr->size();
+
+    Eigen::Matrix4f t(Eigen::Matrix4f::Identity());   // base_link
+    Eigen::Matrix4f t2(Eigen::Matrix4f::Identity());  // localizer
+
+    std::chrono::time_point<std::chrono::system_clock> align_start, align_end, getFitnessScore_start,
+        getFitnessScore_end;
+    static double align_time, getFitnessScore_time = 0.0;
+
+//    std::cout<<"run hear"<<std::endl;
+    pthread_mutex_lock(&mutex);
+
+//    if (_method_type == MethodType::PCL_GENERIC)
+ //   ndt.setInputSource(filtered_scan_ptr);
+
+
+    ndt_omp->setInputSource(filtered_scan_ptr);
+
+    // Guess the initial gross estimation of the transformation
+//    double diff_time = (current_scan_time - previous_scan_time).toSec();
+
+
+    double diff_time = 0.0;
+
+    if(g_seq_old != 0)
+    {
+        if((raw_scan->header.seq - g_seq_old)>0)
+        {
+            diff_time = raw_scan->header.seq - g_seq_old;
+            diff_time = diff_time * 0.1;
+        }
+    }
+
+    g_seq_old = raw_scan->header.seq;
+
+    diff_time = current_scan_time -old_scan_time;
+    diff_time = diff_time/1000.0;
+    old_scan_time = current_scan_time;
+
+    if(diff_time<=0)diff_time = 0.1;
+    if(diff_time>1.0)diff_time = 0.1;
+
+//    std::cout<<"diff time is "<<diff_time<<std::endl;
+
+//    std::cout<<" diff time is "<<diff_time<<std::endl;
+
+//    diff_time = 0.0;
+
+//    if (_offset == "linear")
+//    {
+
+      if(diff_time<0.1)diff_time = 0.1;
+      offset_x = current_velocity_x * diff_time;
+      offset_y = current_velocity_y * diff_time;
+      offset_z = current_velocity_z * diff_time;
+      offset_yaw = angular_velocity * diff_time;
+//    }
+      predict_pose.x = previous_pose.x + offset_x;
+      predict_pose.y = previous_pose.y + offset_y;
+      predict_pose.z = previous_pose.z + offset_z;
+      predict_pose.roll = previous_pose.roll;
+      predict_pose.pitch = previous_pose.pitch;
+      predict_pose.yaw = previous_pose.yaw + offset_yaw;
+
+      pose predict_pose_for_ndt;
+
+
+      givlog->verbose("NDT","previous x:%f y:%f z:%f yaw:%f",previous_pose.x,
+                      previous_pose.y,previous_pose.z,previous_pose.yaw);
+//      if (_use_imu == true && _use_odom == true)
+//         imu_odom_calc(current_scan_time);
+       if (_use_imu == true && _use_odom == false)
+       {
+         lidar_imu_calc((current_scan_time-0));
+       }
+//       if (_use_imu == false && _use_odom == true)
+//         odom_calc(current_scan_time);
+
+//       if (_use_imu == true && _use_odom == true)
+//         predict_pose_for_ndt = predict_pose_imu_odom;
+//       else
+//           if (_use_imu == true && _use_odom == false)
+//         predict_pose_for_ndt = predict_pose_imu;
+//       else if (_use_imu == false && _use_odom == true)
+//         predict_pose_for_ndt = predict_pose_odom;
+//       else
+//         predict_pose_for_ndt = predict_pose;
+
+      if (_use_imu == true && _use_odom == false)
+      {
+         predict_pose_for_ndt = predict_pose_imu;
+ //         predict_pose_for_ndt = predict_pose;
+ //         predict_pose_for_ndt.yaw = predict_pose_imu.yaw;
+      }
+      else
+      {
+          predict_pose_for_ndt = predict_pose;
+      }
+
+      predict_pose_for_ndt = predict_pose; //not use predict_pose_imu in shelan.
+
+      if(fabs(predict_pose_for_ndt.x -previous_pose.x)>10)
+      {
+          predict_pose_for_ndt = previous_pose;
+      }
+
+      if(fabs(predict_pose_for_ndt.y -previous_pose.y)>10)
+      {
+          predict_pose_for_ndt = previous_pose;
+      }
+
+ //     predict_pose_for_ndt = predict_pose;
+
+
+
+
+      givlog->verbose("NDT","pre x:%f y:%f z:%f yaw:%f",predict_pose_for_ndt.x,
+                      predict_pose_for_ndt.y,predict_pose_for_ndt.z,predict_pose_for_ndt.yaw);
+//      predict_pose_for_ndt = predict_pose;
+
+      Eigen::Translation3f init_translation(predict_pose_for_ndt.x, predict_pose_for_ndt.y, predict_pose_for_ndt.z);
+      Eigen::AngleAxisf init_rotation_x(predict_pose_for_ndt.roll, Eigen::Vector3f::UnitX());
+      Eigen::AngleAxisf init_rotation_y(predict_pose_for_ndt.pitch, Eigen::Vector3f::UnitY());
+      Eigen::AngleAxisf init_rotation_z(predict_pose_for_ndt.yaw, Eigen::Vector3f::UnitZ());
+      Eigen::Matrix4f init_guess = (init_translation * init_rotation_z * init_rotation_y * init_rotation_x) * tf_btol;
+
+      pcl::PointCloud<pcl::PointXYZ>::Ptr output_cloud(new pcl::PointCloud<pcl::PointXYZ>);
+
+ //     std::cout<<"before ndt time is "<<xTime.elapsed()<<std::endl;
+//      std::cout<<"time is "<<xTime.elapsed()<<std::endl;
+      pcl::PointCloud<pcl::PointXYZ>::Ptr aligned(new pcl::PointCloud<pcl::PointXYZ>());
+      align_start = std::chrono::system_clock::now();
+      ndt_omp->align(*aligned,init_guess);
+      align_end = std::chrono::system_clock::now();
+
+      if(xTime.elapsed()<90)
+      std::cout<<GREEN<<"after ndt time is "<<xTime.elapsed()<<RESET<<std::endl;
+      else
+          std::cout<<RED<<"after ndt time is "<<xTime.elapsed()<<RESET<<std::endl;
+
+      has_converged = ndt_omp->hasConverged();
+
+      t = ndt_omp->getFinalTransformation();
+      iteration = ndt_omp->getFinalNumIteration();
+
+      getFitnessScore_start = std::chrono::system_clock::now();
+      fitness_score = ndt_omp->getFitnessScore();
+      getFitnessScore_end = std::chrono::system_clock::now();
+
+      trans_probability = ndt_omp->getTransformationProbability();
+
+      std::cout<<"score: "<<fitness_score<<" trans_probability:"<<trans_probability<<std::endl;
+  //    std::cout<<" scoure is "<<fitness_score<<std::endl;
+
+//      std::cout<<" iter is "<<iteration<<std::endl;
+
+      t2 = t * tf_btol.inverse();
+
+      getFitnessScore_time =
+          std::chrono::duration_cast<std::chrono::microseconds>(getFitnessScore_end - getFitnessScore_start).count() /
+          1000.0;
+
+      pthread_mutex_unlock(&mutex);
+
+      tf::Matrix3x3 mat_l;  // localizer
+      mat_l.setValue(static_cast<double>(t(0, 0)), static_cast<double>(t(0, 1)), static_cast<double>(t(0, 2)),
+                     static_cast<double>(t(1, 0)), static_cast<double>(t(1, 1)), static_cast<double>(t(1, 2)),
+                     static_cast<double>(t(2, 0)), static_cast<double>(t(2, 1)), static_cast<double>(t(2, 2)));
+
+      // Update localizer_pose
+      localizer_pose.x = t(0, 3);
+      localizer_pose.y = t(1, 3);
+      localizer_pose.z = t(2, 3);
+      mat_l.getRPY(localizer_pose.roll, localizer_pose.pitch, localizer_pose.yaw, 1);
+
+
+//     std::cout<<" local pose x is "<<localizer_pose.x<<std::endl;
+
+      tf::Matrix3x3 mat_b;  // base_link
+      mat_b.setValue(static_cast<double>(t2(0, 0)), static_cast<double>(t2(0, 1)), static_cast<double>(t2(0, 2)),
+                     static_cast<double>(t2(1, 0)), static_cast<double>(t2(1, 1)), static_cast<double>(t2(1, 2)),
+                     static_cast<double>(t2(2, 0)), static_cast<double>(t2(2, 1)), static_cast<double>(t2(2, 2)));
+
+      // Update ndt_pose
+      ndt_pose.x = t2(0, 3);
+      ndt_pose.y = t2(1, 3);
+      ndt_pose.z = t2(2, 3);
+
+//      ndt_pose.x = localizer_pose.x;
+//      ndt_pose.y = localizer_pose.y;
+//      ndt_pose.z = localizer_pose.z;
+      mat_b.getRPY(ndt_pose.roll, ndt_pose.pitch, ndt_pose.yaw, 1);
+
+
+      givlog->verbose("NDT","calc x:%f y:%f z:%f yaw:%f",ndt_pose.x,
+                      ndt_pose.y,ndt_pose.z,ndt_pose.yaw);
+
+
+      // Calculate the difference between ndt_pose and predict_pose
+      predict_pose_error = sqrt((ndt_pose.x - predict_pose_for_ndt.x) * (ndt_pose.x - predict_pose_for_ndt.x) +
+                                (ndt_pose.y - predict_pose_for_ndt.y) * (ndt_pose.y - predict_pose_for_ndt.y) +
+                                (ndt_pose.z - predict_pose_for_ndt.z) * (ndt_pose.z - predict_pose_for_ndt.z));
+
+
+ //     std::cout<<" pose error is "<<predict_pose_error<<std::endl;
+ //     std::cout<<"ndt pose x is "<<ndt_pose.x<<std::endl;
+      if (predict_pose_error <= (PREDICT_POSE_THRESHOLD*10*diff_time))
+      {
+        use_predict_pose = 0;
+      }
+      else
+      {
+        use_predict_pose = 1;
+      }
+      use_predict_pose = 0;
+
+      if (use_predict_pose == 0)
+      {
+        current_pose.x = ndt_pose.x;
+        current_pose.y = ndt_pose.y;
+        current_pose.z = ndt_pose.z;
+        current_pose.roll = ndt_pose.roll;
+        current_pose.pitch = ndt_pose.pitch;
+        current_pose.yaw = ndt_pose.yaw;
+
+      }
+      else
+      {
+          givlog->verbose("NDT","USE PREDICT POSE");
+        current_pose.x = predict_pose_for_ndt.x;
+        current_pose.y = predict_pose_for_ndt.y;
+        current_pose.z = predict_pose_for_ndt.z;
+        current_pose.roll = predict_pose_for_ndt.roll;
+        current_pose.pitch = predict_pose_for_ndt.pitch;
+        current_pose.yaw = predict_pose_for_ndt.yaw;
+      }
+
+ //     std::cout<<" current pose x is "<<current_pose.x<<std::endl;
+
+      // Compute the velocity and acceleration
+      diff_x = current_pose.x - previous_pose.x;
+      diff_y = current_pose.y - previous_pose.y;
+      diff_z = current_pose.z - previous_pose.z;
+      diff_yaw = calcDiffForRadian(current_pose.yaw, previous_pose.yaw);
+      diff = sqrt(diff_x * diff_x + diff_y * diff_y + diff_z * diff_z);
+
+      current_velocity = (diff_time > 0) ? (diff / diff_time) : 0;
+      current_velocity_x = (diff_time > 0) ? (diff_x / diff_time) : 0;
+      current_velocity_y = (diff_time > 0) ? (diff_y / diff_time) : 0;
+      current_velocity_z = (diff_time > 0) ? (diff_z / diff_time) : 0;
+      angular_velocity = (diff_time > 0) ? (diff_yaw / diff_time) : 0;
+
+      current_pose.vx = current_velocity_x;
+      current_pose.vy = current_velocity_y;
+
+      current_velocity_smooth = (current_velocity + previous_velocity + previous_previous_velocity) / 3.0;
+      if (current_velocity_smooth < 0.2)
+      {
+        current_velocity_smooth = 0.0;
+      }
+
+ //     bNotChangev = true;
+      if((diff_time >  0)&&(bNotChangev == false))
+      {
+          double aa = (current_velocity -previous_velocity)/diff_time;
+          if(fabs(aa)>5.0)
+          {
+              givlog->verbose("NDT","aa is %f",aa);
+              aa = fabs(5.0/aa);
+              current_velocity = previous_velocity + aa*(current_velocity -previous_velocity);
+              current_velocity_x = previous_velocity_x + aa *((current_velocity_x -previous_velocity_x));
+              current_velocity_y = previous_velocity_y + aa *((current_velocity_y -previous_velocity_y));
+              current_velocity_z = previous_velocity_z + aa *((current_velocity_z -previous_velocity_z));
+
+          }
+      }
+
+      current_accel = (diff_time > 0) ? ((current_velocity - previous_velocity) / diff_time) : 0;
+      current_accel_x = (diff_time > 0) ? ((current_velocity_x - previous_velocity_x) / diff_time) : 0;
+      current_accel_y = (diff_time > 0) ? ((current_velocity_y - previous_velocity_y) / diff_time) : 0;
+      current_accel_z = (diff_time > 0) ? ((current_velocity_z - previous_velocity_z) / diff_time) : 0;
+
+ //   std::cout<<"fit time is "<<getFitnessScore_time<<std::endl;
+
+      gcurndtgpspos = PoseToGPS(gvector_trace[gcurmapindex].mndtorigin,current_pose);
+//      std::cout<<" pre x:"<<previous_pose.x<<" pre y:"<<previous_pose.y<<std::endl;
+    std::cout<<" x: "<<current_pose.x<<" y: "<<current_pose.y
+            <<" z: "<<current_pose.z<<" yaw:"<<current_pose.yaw
+           <<" vel: "<<current_velocity<<std::endl;
+
+
+    std::cout<<"NDT ve:"<<gcurndtgpspos.ve<<" vn:"<<gcurndtgpspos.vn
+            <<" GPS ve:"<<gcurgpsgpspos.ve<<" vn:"<<gcurgpsgpspos.vn<<std::endl;
+
+    gbFileNDTUpdate = true;
+
+//    gw->Updatexyyaw(current_pose.x,current_pose.y,current_pose.yaw);
+
+    current_pose_imu.x = current_pose.x;
+    current_pose_imu.y = current_pose.y;
+    current_pose_imu.z = current_pose.z;
+    current_pose_imu.roll = current_pose.roll;
+    current_pose_imu.pitch = current_pose.pitch;
+    current_pose_imu.yaw = current_pose.yaw;
+
+    current_velocity_imu_x = current_velocity_x;
+    current_velocity_imu_y = current_velocity_y;
+    current_velocity_imu_z = current_velocity_z;
+
+    current_velocity_imu_z = 0;
+
+    offset_imu_x = 0.0;
+    offset_imu_y = 0.0;
+    offset_imu_z = 0.0;
+    offset_imu_roll = 0.0;
+    offset_imu_pitch = 0.0;
+    offset_imu_yaw = 0.0;
+
+//    std::cout<<" vx is "<<current_velocity_x<<" vy is "<<current_velocity_y<<std::endl;
+    previous_pose.x = current_pose.x;
+    previous_pose.y = current_pose.y;
+    previous_pose.z = current_pose.z;
+    previous_pose.roll = current_pose.roll;
+    previous_pose.pitch = current_pose.pitch;
+    previous_pose.yaw = current_pose.yaw;
+
+//    previous_scan_time = current_scan_time;
+
+    previous_previous_velocity = previous_velocity;
+    previous_velocity = current_velocity;
+    previous_velocity_x = current_velocity_x;
+    previous_velocity_y = current_velocity_y;
+    previous_velocity_z = current_velocity_z;
+    previous_accel = current_accel;
+
+    pthread_mutex_lock(&mutex_read);
+    gindex++;
+    glidar_pose.accel = current_accel;
+    glidar_pose.accel_x = current_accel_x;
+    glidar_pose.accel_y = current_accel_y;
+    glidar_pose.accel_z = current_accel_z;
+    glidar_pose.vel = current_velocity;
+    glidar_pose.vel_x = current_velocity_x;
+    glidar_pose.vel_y = current_velocity_y;
+    glidar_pose.vel_z = current_velocity_z;
+    glidar_pose.mpose = current_pose;
+    pthread_mutex_unlock(&mutex_read);
+
+    pthread_mutex_unlock(&mutex_pose);
+
+
+
+
+    double hdg_o = (90 - gvector_trace[gcurmapindex].mndtorigin.heading)*M_PI/180.0;
+    double ndt_vn = current_velocity_x * cos(hdg_o) - current_velocity_y * sin(hdg_o);
+    double ndt_ve = current_velocity_x * sin(hdg_o) + current_velocity_y * cos(hdg_o);
+    double ndt_vd = current_accel_z;
+
+
+
+    std::cout<<std::setprecision(9)<<"gps lat:"<<gcurndtgpspos.lat<<" lon:"
+            <<gcurndtgpspos.lon<<" z:"<<gcurndtgpspos.height<<" heading:"
+           <<gcurndtgpspos.heading<<std::endl;
+    std::cout<<std::setprecision(6)<<std::endl;
+
+
+
+
+    double x0,y0;
+    double x,y;
+    GaussProjCal(gcurndtgpspos.lon,gcurndtgpspos.lat,&x0,&y0);
+    GaussProjCal(gcurgpsgpspos.lon,gcurgpsgpspos.lat,&x,&y);
+    double dis = sqrt(pow(x-x0,2)+ pow(y-y0,2));
+    double headdiff = fabs(gcurgpsgpspos.heading - gcurndtgpspos.heading);
+    double zdiff = fabs(gcurgpsgpspos.height - gcurndtgpspos.height);
+    std::cout<<" dis:"<<dis<<" headdiff:"<<headdiff<<" zdiff:"<<zdiff<<std::endl;
+
+    iv::lidar::ndtpos ndtpos;
+    ndtpos.set_lidartime(raw_scan->header.stamp/1000);
+    ndtpos.set_ndt_time(QDateTime::currentMSecsSinceEpoch());
+    ndtpos.set_accel(current_accel);
+    ndtpos.set_accel_x(current_accel_x);
+    ndtpos.set_accel_y(current_accel_y);
+    ndtpos.set_accel_z(current_accel_z);
+    ndtpos.set_vel(current_velocity);
+    ndtpos.set_vel_x(current_velocity_x);
+    ndtpos.set_vel_y(current_velocity_y);
+    ndtpos.set_vel_z(current_velocity_z);
+    ndtpos.set_fitness_score(fitness_score);
+    ndtpos.set_has_converged(has_converged);
+    ndtpos.set_id(0);
+    ndtpos.set_iteration(iteration);
+    ndtpos.set_trans_probability(trans_probability);
+    ndtpos.set_pose_pitch(current_pose.pitch);
+    ndtpos.set_pose_roll(current_pose.roll);
+    ndtpos.set_pose_yaw(current_pose.yaw);
+    ndtpos.set_pose_x(current_pose.x);
+    ndtpos.set_pose_y(current_pose.y);
+    ndtpos.set_pose_z(current_pose.z);
+    ndtpos.set_comp_time(xTime.elapsed());
+
+
+    int ndatasize = ndtpos.ByteSize();
+    char * strser = new char[ndatasize];
+    bool bSer = ndtpos.SerializeToArray(strser,ndatasize);
+    if(bSer)
+    {
+        iv::modulecomm::ModuleSendMsg(gpc,strser,ndatasize);
+    }
+    else
+    {
+        std::cout<<BOLDRED<<"ndtpos serialize error."<<RESET<<std::endl;
+    }
+    delete strser;
+
+    iv::lidar::ndtgpspos xndtgpspos;
+    xndtgpspos.set_lat(gcurndtgpspos.lat);
+    xndtgpspos.set_lon(gcurndtgpspos.lon);
+    xndtgpspos.set_heading(gcurndtgpspos.heading);
+    xndtgpspos.set_height(gcurndtgpspos.height);
+    xndtgpspos.set_pitch(gcurndtgpspos.pitch);
+    xndtgpspos.set_roll(gcurndtgpspos.roll);
+    xndtgpspos.set_tras_prob(trans_probability);
+    xndtgpspos.set_score(fitness_score);
+    xndtgpspos.set_vn(ndt_vn);
+    xndtgpspos.set_ve(ndt_ve);
+    xndtgpspos.set_vd(ndt_vd);
+
+    givlog->debug("ndtgpspos","lat:%11.7f lon:%11.7f height:%11.3f heading:%6.3f pitch:%6.3f roll:%6.3f vn:%6.3f ve:%6.3f vd:%6.3f trans_prob:%6.3f score:%6.3f",
+                  xndtgpspos.lat(),xndtgpspos.lon(),xndtgpspos.height(),
+                  xndtgpspos.heading(),xndtgpspos.pitch(),xndtgpspos.roll(),
+                  xndtgpspos.vn(),xndtgpspos.ve(),xndtgpspos.vd(),
+                  xndtgpspos.tras_prob(),xndtgpspos.score());
+
+    ndatasize = xndtgpspos.ByteSize();
+    strser = new char[ndatasize];
+    bSer = xndtgpspos.SerializeToArray(strser,ndatasize);
+    if(bSer)
+    {
+        std::cout<<"share msg."<<std::endl;
+        iv::modulecomm::ModuleSendMsg(gpd,strser,ndatasize);
+    }
+    else
+    {
+        std::cout<<BOLDRED<<"ndtgpspos serialize error."<<RESET<<std::endl;
+    }
+    delete strser;
+
+    if(trans_probability < 0.5)gbNeedGPSUpdateNDT = true;
+    else gbNeedGPSUpdateNDT = false;
+
+
+}
+
+
+int GetPose(int & curindex,iv::lidar_pose & xlidar_pose)
+{
+     pthread_mutex_lock(&mutex_read);
+    if(curindex == gindex)
+    {
+        pthread_mutex_unlock(&mutex_read);
+        return 0;
+    }
+    curindex = gindex;
+    xlidar_pose = glidar_pose;
+    pthread_mutex_unlock(&mutex_read);
+    return 1;
+}
+
+void SetCurPose(double x0, double y0, double yaw0, double z0, double pitch0, double roll0)
+{
+
+    std::cout<<"set pose"<<std::endl;
+    pthread_mutex_lock(&mutex_pose);
+
+    initial_pose.x = x0;
+    initial_pose.y = y0;
+    initial_pose.z = z0;
+    initial_pose.roll = roll0;
+    initial_pose.pitch = pitch0;
+    initial_pose.yaw = yaw0;
+
+    localizer_pose.x = initial_pose.x;
+    localizer_pose.y = initial_pose.y;
+    localizer_pose.z = initial_pose.z;
+    localizer_pose.roll = initial_pose.roll;
+    localizer_pose.pitch = initial_pose.pitch;
+    localizer_pose.yaw = initial_pose.yaw;
+
+    previous_pose.x = initial_pose.x;
+    previous_pose.y = initial_pose.y;
+    previous_pose.z = initial_pose.z;
+    previous_pose.roll = initial_pose.roll;
+    previous_pose.pitch = initial_pose.pitch;
+    previous_pose.yaw = initial_pose.yaw;
+
+    current_pose.x = initial_pose.x;
+    current_pose.y = initial_pose.y;
+    current_pose.z = initial_pose.z;
+    current_pose.roll = initial_pose.roll;
+    current_pose.pitch = initial_pose.pitch;
+    current_pose.yaw = initial_pose.yaw;
+
+    current_velocity = 0;
+    current_velocity_x = 0;
+    current_velocity_y = 0;
+    current_velocity_z = 0;
+    angular_velocity = 0;
+
+    pthread_mutex_unlock(&mutex_pose);
+
+
+}
+
+void SetRunState(bool bRun)
+{
+    std::cout<<"set state."<<std::endl;
+    gbNDTRun = bRun;
+}
+
+void setuseimu(bool bUse)
+{
+    _use_imu = bUse;
+}
+

+ 112 - 0
src/detection/detection_ndt_pclomp/ndt_matching.h

@@ -0,0 +1,112 @@
+#ifndef NDT_MATCHING_H
+#define NDT_MATCHING_H
+#include<QDateTime>
+#include <QMutex>
+
+struct pose
+{
+  double x;
+  double y;
+  double z;
+  double roll;
+  double pitch;
+  double yaw;
+  double vx;
+  double vy;
+};
+
+
+namespace  iv {
+struct lidar_pose
+{
+    lidar_pose() {}
+
+    double vel_x;
+    double vel_y;
+    double vel_z;
+    double vel;
+    double accel_x;
+    double accel_y;
+    double accel_z;
+    double accel;
+    struct pose mpose;
+};
+
+}
+
+namespace iv {
+struct imudata
+{
+  qint64 imutime;
+  double imu_linear_acceleration_x;
+  double imu_linear_acceleration_y;
+  double imu_linear_acceleration_z;
+  double imu_angular_velocity_x;
+  double imu_angular_velocity_y;
+  double imu_angular_velocity_z;
+};
+}
+
+
+void ndt_match_Init_nomap();
+
+void ndt_match_Init(pcl::PointCloud<pcl::PointXYZ>::Ptr mappcd);
+
+void point_ndt(pcl::PointCloud<pcl::PointXYZ>::Ptr raw_scan);
+
+void point_ndt_test(pcl::PointCloud<pcl::PointXYZ>::Ptr raw_scan);
+
+int GetPose(int & curindex,iv::lidar_pose & xlidar_pose);
+
+void SetCurPose(double x0,double y0,double yaw0,double z0 = 0,double pitch0 = 0,double roll0 = 0);
+
+void ndt_match_SetMap(pcl::PointCloud<pcl::PointXYZ>::Ptr mappcd);
+
+void SetRunState(bool bRun);
+
+namespace iv {
+struct ndttracepoint
+{
+    double x;
+    double y;
+    double z;
+    double pitch;
+    double roll;
+    double yaw;
+};
+}
+
+namespace iv {
+struct gpspos
+{
+    double lat;
+    double lon;
+    double height;
+    double heading;
+    double pitch;
+    double roll;
+    double ve;
+    double vn;
+};
+}
+
+iv::gpspos PoseToGPS(iv::gpspos xori,pose xpose);
+pose CalcPose(iv::gpspos xori,iv::gpspos xcur);
+
+namespace iv {
+struct ndtmaptrace
+{
+    ndtmaptrace() {}
+    std::vector<ndttracepoint> mvector_trace;
+    std::string mstrpcdpath;
+    iv::gpspos mndtorigin;
+};
+
+}
+
+void setuseimu(bool bUse);
+void imu_update(qint64 current_time_imu,double imu_roll,double imu_pitch,double imu_yaw,
+                       double acceleration_x,double acceleration_y,
+                       double acceleration_z);
+
+#endif // NDT_MATCHING_H

+ 5 - 0
src/detection/detection_ndt_pclomp/pclomp/ndt_omp.cpp

@@ -0,0 +1,5 @@
+#include <pclomp/ndt_omp.h>
+#include <ndt_omp_impl.hpp>
+
+template class pclomp::NormalDistributionsTransform<pcl::PointXYZ, pcl::PointXYZ>;
+template class pclomp::NormalDistributionsTransform<pcl::PointXYZI, pcl::PointXYZI>;

+ 985 - 0
src/detection/detection_ndt_pclomp/pclomp/ndt_omp_impl.hpp

@@ -0,0 +1,985 @@
+#include <pclomp/ndt_omp.h>
+/*
+ * Software License Agreement (BSD License)
+ *
+ *  Point Cloud Library (PCL) - www.pointclouds.org
+ *  Copyright (c) 2010-2011, Willow Garage, Inc.
+ *  Copyright (c) 2012-, Open Perception, Inc.
+ *
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the copyright holder(s) nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $Id$
+ *
+ */
+
+#ifndef PCL_REGISTRATION_NDT_OMP_IMPL_H_
+#define PCL_REGISTRATION_NDT_OMP_IMPL_H_
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget>
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::NormalDistributionsTransform ()
+  : target_cells_ ()
+  , resolution_ (1.0f)
+  , step_size_ (0.1)
+  , outlier_ratio_ (0.55)
+  , gauss_d1_ ()
+  , gauss_d2_ ()
+  , gauss_d3_ ()
+  , trans_probability_ ()
+  , j_ang_a_ (), j_ang_b_ (), j_ang_c_ (), j_ang_d_ (), j_ang_e_ (), j_ang_f_ (), j_ang_g_ (), j_ang_h_ ()
+  , h_ang_a2_ (), h_ang_a3_ (), h_ang_b2_ (), h_ang_b3_ (), h_ang_c2_ (), h_ang_c3_ (), h_ang_d1_ (), h_ang_d2_ ()
+  , h_ang_d3_ (), h_ang_e1_ (), h_ang_e2_ (), h_ang_e3_ (), h_ang_f1_ (), h_ang_f2_ (), h_ang_f3_ ()
+{
+  reg_name_ = "NormalDistributionsTransform";
+
+  double gauss_c1, gauss_c2;
+
+  // Initializes the gaussian fitting parameters (eq. 6.8) [Magnusson 2009]
+  gauss_c1 = 10.0 * (1 - outlier_ratio_);
+  gauss_c2 = outlier_ratio_ / pow (resolution_, 3);
+  gauss_d3_ = -log (gauss_c2);
+  gauss_d1_ = -log ( gauss_c1 + gauss_c2 ) - gauss_d3_;
+  gauss_d2_ = -2 * log ((-log ( gauss_c1 * exp ( -0.5 ) + gauss_c2 ) - gauss_d3_) / gauss_d1_);
+
+  transformation_epsilon_ = 0.1;
+  max_iterations_ = 35;
+
+  search_method = DIRECT7;
+  num_threads_ = omp_get_max_threads();
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> void
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::computeTransformation (PointCloudSource &output, const Eigen::Matrix4f &guess)
+{
+  nr_iterations_ = 0;
+  converged_ = false;
+
+  double gauss_c1, gauss_c2;
+
+  // Initializes the gaussian fitting parameters (eq. 6.8) [Magnusson 2009]
+  gauss_c1 = 10 * (1 - outlier_ratio_);
+  gauss_c2 = outlier_ratio_ / pow (resolution_, 3);
+  gauss_d3_ = -log (gauss_c2);
+  gauss_d1_ = -log ( gauss_c1 + gauss_c2 ) - gauss_d3_;
+  gauss_d2_ = -2 * log ((-log ( gauss_c1 * exp ( -0.5 ) + gauss_c2 ) - gauss_d3_) / gauss_d1_);
+
+  if (guess != Eigen::Matrix4f::Identity ())
+  {
+    // Initialise final transformation to the guessed one
+    final_transformation_ = guess;
+    // Apply guessed transformation prior to search for neighbours
+    transformPointCloud (output, output, guess);
+  }
+
+  Eigen::Transform<float, 3, Eigen::Affine, Eigen::ColMajor> eig_transformation;
+  eig_transformation.matrix () = final_transformation_;
+
+  // Convert initial guess matrix to 6 element transformation vector
+  Eigen::Matrix<double, 6, 1> p, delta_p, score_gradient;
+  Eigen::Vector3f init_translation = eig_transformation.translation ();
+  Eigen::Vector3f init_rotation = eig_transformation.rotation ().eulerAngles (0, 1, 2);
+  p << init_translation (0), init_translation (1), init_translation (2),
+  init_rotation (0), init_rotation (1), init_rotation (2);
+
+  Eigen::Matrix<double, 6, 6> hessian;
+
+  double score = 0;
+  double delta_p_norm;
+
+  // Calculate derivatives of initial transform vector, subsequent derivative calculations are done in the step length determination.
+  score = computeDerivatives (score_gradient, hessian, output, p);
+
+  while (!converged_)
+  {
+    // Store previous transformation
+    previous_transformation_ = transformation_;
+
+    // Solve for decent direction using newton method, line 23 in Algorithm 2 [Magnusson 2009]
+    Eigen::JacobiSVD<Eigen::Matrix<double, 6, 6> > sv (hessian, Eigen::ComputeFullU | Eigen::ComputeFullV);
+    // Negative for maximization as opposed to minimization
+    delta_p = sv.solve (-score_gradient);
+
+    //Calculate step length with guaranteed sufficient decrease [More, Thuente 1994]
+    delta_p_norm = delta_p.norm ();
+
+    if (delta_p_norm == 0 || delta_p_norm != delta_p_norm)
+    {
+      trans_probability_ = score / static_cast<double> (input_->points.size ());
+      converged_ = delta_p_norm == delta_p_norm;
+      return;
+    }
+
+    delta_p.normalize ();
+    delta_p_norm = computeStepLengthMT (p, delta_p, delta_p_norm, step_size_, transformation_epsilon_ / 2, score, score_gradient, hessian, output);
+    delta_p *= delta_p_norm;
+
+
+    transformation_ = (Eigen::Translation<float, 3> (static_cast<float> (delta_p (0)), static_cast<float> (delta_p (1)), static_cast<float> (delta_p (2))) *
+                       Eigen::AngleAxis<float> (static_cast<float> (delta_p (3)), Eigen::Vector3f::UnitX ()) *
+                       Eigen::AngleAxis<float> (static_cast<float> (delta_p (4)), Eigen::Vector3f::UnitY ()) *
+                       Eigen::AngleAxis<float> (static_cast<float> (delta_p (5)), Eigen::Vector3f::UnitZ ())).matrix ();
+
+
+    p = p + delta_p;
+
+    // Update Visualizer (untested)
+    if (update_visualizer_ != 0)
+      update_visualizer_ (output, std::vector<int>(), *target_, std::vector<int>() );
+
+    if (nr_iterations_ > max_iterations_ ||
+        (nr_iterations_ && (std::fabs (delta_p_norm) < transformation_epsilon_)))
+    {
+      converged_ = true;
+    }
+
+    nr_iterations_++;
+
+  }
+
+  // Store transformation probability. The relative differences within each scan registration are accurate
+  // but the normalization constants need to be modified for it to be globally accurate
+  trans_probability_ = score / static_cast<double> (input_->points.size ());
+}
+
+#ifndef _OPENMP
+int omp_get_max_threads() { return 1; }
+int omp_get_thread_num() { return 0; }
+#endif
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> double
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::computeDerivatives(Eigen::Matrix<double, 6, 1> &score_gradient,
+	Eigen::Matrix<double, 6, 6> &hessian,
+	PointCloudSource &trans_cloud,
+	Eigen::Matrix<double, 6, 1> &p,
+	bool compute_hessian)
+{
+	score_gradient.setZero();
+	hessian.setZero();
+	double score = 0;
+
+  std::vector<double> scores(input_->points.size());
+  std::vector<Eigen::Matrix<double, 6, 1>, Eigen::aligned_allocator<Eigen::Matrix<double, 6, 1>>> score_gradients(input_->points.size());
+  std::vector<Eigen::Matrix<double, 6, 6>, Eigen::aligned_allocator<Eigen::Matrix<double, 6, 6>>> hessians(input_->points.size());
+  for (std::size_t i = 0; i < input_->points.size(); i++) {
+		scores[i] = 0;
+		score_gradients[i].setZero();
+		hessians[i].setZero();
+	}
+
+	// Precompute Angular Derivatives (eq. 6.19 and 6.21)[Magnusson 2009]
+	computeAngleDerivatives(p);
+
+  std::vector<std::vector<TargetGridLeafConstPtr>> neighborhoods(num_threads_);
+  std::vector<std::vector<float>> distancess(num_threads_);
+
+	// Update gradient and hessian for each point, line 17 in Algorithm 2 [Magnusson 2009]
+#pragma omp parallel for num_threads(num_threads_) schedule(guided, 8)
+	for (std::size_t idx = 0; idx < input_->points.size(); idx++)
+	{
+		int thread_n = omp_get_thread_num();
+
+		// Original Point and Transformed Point
+		PointSource x_pt, x_trans_pt;
+		// Original Point and Transformed Point (for math)
+		Eigen::Vector3d x, x_trans;
+		// Occupied Voxel
+		TargetGridLeafConstPtr cell;
+		// Inverse Covariance of Occupied Voxel
+		Eigen::Matrix3d c_inv;
+
+		// Initialize Point Gradient and Hessian
+		Eigen::Matrix<float, 4, 6> point_gradient_;
+		Eigen::Matrix<float, 24, 6> point_hessian_;
+		point_gradient_.setZero();
+		point_gradient_.block<3, 3>(0, 0).setIdentity();
+		point_hessian_.setZero();
+
+		x_trans_pt = trans_cloud.points[idx];
+
+		auto& neighborhood = neighborhoods[thread_n];
+		auto& distances = distancess[thread_n];
+
+		// Find neighbors (Radius search has been experimentally faster than direct neighbor checking.
+		switch (search_method) {
+		case KDTREE:
+			target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
+			break;
+		case DIRECT26:
+			target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood);
+			break;
+		default:
+		case DIRECT7:
+			target_cells_.getNeighborhoodAtPoint7(x_trans_pt, neighborhood);
+			break;
+		case DIRECT1:
+			target_cells_.getNeighborhoodAtPoint1(x_trans_pt, neighborhood);
+			break;
+		}
+
+		double score_pt = 0;
+		Eigen::Matrix<double, 6, 1> score_gradient_pt = Eigen::Matrix<double, 6, 1>::Zero();
+		Eigen::Matrix<double, 6, 6> hessian_pt = Eigen::Matrix<double, 6, 6>::Zero();
+
+		for (typename std::vector<TargetGridLeafConstPtr>::iterator neighborhood_it = neighborhood.begin(); neighborhood_it != neighborhood.end(); neighborhood_it++)
+		{
+			cell = *neighborhood_it;
+			x_pt = input_->points[idx];
+			x = Eigen::Vector3d(x_pt.x, x_pt.y, x_pt.z);
+
+			x_trans = Eigen::Vector3d(x_trans_pt.x, x_trans_pt.y, x_trans_pt.z);
+
+			// Denorm point, x_k' in Equations 6.12 and 6.13 [Magnusson 2009]
+			x_trans -= cell->getMean();
+			// Uses precomputed covariance for speed.
+			c_inv = cell->getInverseCov();
+
+			// Compute derivative of transform function w.r.t. transform vector, J_E and H_E in Equations 6.18 and 6.20 [Magnusson 2009]
+			computePointDerivatives(x, point_gradient_, point_hessian_);
+			// Update score, gradient and hessian, lines 19-21 in Algorithm 2, according to Equations 6.10, 6.12 and 6.13, respectively [Magnusson 2009]
+			score_pt += updateDerivatives(score_gradient_pt, hessian_pt, point_gradient_, point_hessian_, x_trans, c_inv, compute_hessian);
+		}
+
+		scores[idx] = score_pt;
+		score_gradients[idx].noalias() = score_gradient_pt;
+		hessians[idx].noalias() = hessian_pt;
+	}
+
+  // Ensure that the result is invariant against the summing up order
+  for (std::size_t i = 0; i < input_->points.size(); i++) {
+		score += scores[i];
+		score_gradient += score_gradients[i];
+		hessian += hessians[i];
+	}
+
+	return (score);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> void
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::computeAngleDerivatives(Eigen::Matrix<double, 6, 1> &p, bool compute_hessian)
+{
+	// Simplified math for near 0 angles
+	double cx, cy, cz, sx, sy, sz;
+	if (fabs(p(3)) < 10e-5)
+	{
+		//p(3) = 0;
+		cx = 1.0;
+		sx = 0.0;
+	}
+	else
+	{
+		cx = cos(p(3));
+		sx = sin(p(3));
+	}
+	if (fabs(p(4)) < 10e-5)
+	{
+		//p(4) = 0;
+		cy = 1.0;
+		sy = 0.0;
+	}
+	else
+	{
+		cy = cos(p(4));
+		sy = sin(p(4));
+	}
+
+	if (fabs(p(5)) < 10e-5)
+	{
+		//p(5) = 0;
+		cz = 1.0;
+		sz = 0.0;
+	}
+	else
+	{
+		cz = cos(p(5));
+		sz = sin(p(5));
+	}
+
+	// Precomputed angular gradiant components. Letters correspond to Equation 6.19 [Magnusson 2009]
+	j_ang_a_ << (-sx * sz + cx * sy * cz), (-sx * cz - cx * sy * sz), (-cx * cy);
+	j_ang_b_ << (cx * sz + sx * sy * cz), (cx * cz - sx * sy * sz), (-sx * cy);
+	j_ang_c_ << (-sy * cz), sy * sz, cy;
+	j_ang_d_ << sx * cy * cz, (-sx * cy * sz), sx * sy;
+	j_ang_e_ << (-cx * cy * cz), cx * cy * sz, (-cx * sy);
+	j_ang_f_ << (-cy * sz), (-cy * cz), 0;
+	j_ang_g_ << (cx * cz - sx * sy * sz), (-cx * sz - sx * sy * cz), 0;
+	j_ang_h_ << (sx * cz + cx * sy * sz), (cx * sy * cz - sx * sz), 0;
+
+	j_ang.setZero();
+	j_ang.row(0).noalias() = Eigen::Vector4f((-sx * sz + cx * sy * cz), (-sx * cz - cx * sy * sz), (-cx * cy), 0.0f);
+	j_ang.row(1).noalias() = Eigen::Vector4f((cx * sz + sx * sy * cz), (cx * cz - sx * sy * sz), (-sx * cy), 0.0f);
+	j_ang.row(2).noalias() = Eigen::Vector4f((-sy * cz), sy * sz, cy, 0.0f);
+	j_ang.row(3).noalias() = Eigen::Vector4f(sx * cy * cz, (-sx * cy * sz), sx * sy, 0.0f);
+	j_ang.row(4).noalias() = Eigen::Vector4f((-cx * cy * cz), cx * cy * sz, (-cx * sy), 0.0f);
+	j_ang.row(5).noalias() = Eigen::Vector4f((-cy * sz), (-cy * cz), 0, 0.0f);
+	j_ang.row(6).noalias() = Eigen::Vector4f((cx * cz - sx * sy * sz), (-cx * sz - sx * sy * cz), 0, 0.0f);
+	j_ang.row(7).noalias() = Eigen::Vector4f((sx * cz + cx * sy * sz), (cx * sy * cz - sx * sz), 0, 0.0f);
+
+	if (compute_hessian)
+	{
+		// Precomputed angular hessian components. Letters correspond to Equation 6.21 and numbers correspond to row index [Magnusson 2009]
+		h_ang_a2_ << (-cx * sz - sx * sy * cz), (-cx * cz + sx * sy * sz), sx * cy;
+		h_ang_a3_ << (-sx * sz + cx * sy * cz), (-cx * sy * sz - sx * cz), (-cx * cy);
+
+		h_ang_b2_ << (cx * cy * cz), (-cx * cy * sz), (cx * sy);
+		h_ang_b3_ << (sx * cy * cz), (-sx * cy * sz), (sx * sy);
+
+		// The sign of 'sx * sz' in c2 is incorrect in [Magnusson 2009], and it is fixed here.
+		h_ang_c2_ << (-sx * cz - cx * sy * sz), (sx * sz - cx * sy * cz), 0;
+		h_ang_c3_ << (cx * cz - sx * sy * sz), (-sx * sy * cz - cx * sz), 0;
+
+		h_ang_d1_ << (-cy * cz), (cy * sz), (-sy);
+		h_ang_d2_ << (-sx * sy * cz), (sx * sy * sz), (sx * cy);
+		h_ang_d3_ << (cx * sy * cz), (-cx * sy * sz), (-cx * cy);
+
+		h_ang_e1_ << (sy * sz), (sy * cz), 0;
+		h_ang_e2_ << (-sx * cy * sz), (-sx * cy * cz), 0;
+		h_ang_e3_ << (cx * cy * sz), (cx * cy * cz), 0;
+
+		h_ang_f1_ << (-cy * cz), (cy * sz), 0;
+		h_ang_f2_ << (-cx * sz - sx * sy * cz), (-cx * cz + sx * sy * sz), 0;
+		h_ang_f3_ << (-sx * sz + cx * sy * cz), (-cx * sy * sz - sx * cz), 0;
+
+		h_ang.setZero();
+		h_ang.row(0).noalias() = Eigen::Vector4f((-cx * sz - sx * sy * cz), (-cx * cz + sx * sy * sz), sx * cy, 0.0f);		// a2
+		h_ang.row(1).noalias() = Eigen::Vector4f((-sx * sz + cx * sy * cz), (-cx * sy * sz - sx * cz), (-cx * cy), 0.0f);	// a3
+
+		h_ang.row(2).noalias() = Eigen::Vector4f((cx * cy * cz), (-cx * cy * sz), (cx * sy), 0.0f);							// b2
+		h_ang.row(3).noalias() = Eigen::Vector4f((sx * cy * cz), (-sx * cy * sz), (sx * sy), 0.0f);							// b3
+
+		h_ang.row(4).noalias() = Eigen::Vector4f((-sx * cz - cx * sy * sz), (sx * sz - cx * sy * cz), 0, 0.0f);				// c2
+		h_ang.row(5).noalias() = Eigen::Vector4f((cx * cz - sx * sy * sz), (-sx * sy * cz - cx * sz), 0, 0.0f);				// c3
+
+		h_ang.row(6).noalias() = Eigen::Vector4f((-cy * cz), (cy * sz), (sy), 0.0f);										// d1
+		h_ang.row(7).noalias() = Eigen::Vector4f((-sx * sy * cz), (sx * sy * sz), (sx * cy), 0.0f);							// d2
+		h_ang.row(8).noalias() = Eigen::Vector4f((cx * sy * cz), (-cx * sy * sz), (-cx * cy), 0.0f);						// d3
+
+		h_ang.row(9).noalias() = Eigen::Vector4f((sy * sz), (sy * cz), 0, 0.0f);											// e1
+		h_ang.row(10).noalias() = Eigen::Vector4f ((-sx * cy * sz), (-sx * cy * cz), 0, 0.0f);								// e2
+		h_ang.row(11).noalias() = Eigen::Vector4f ((cx * cy * sz), (cx * cy * cz), 0, 0.0f);								// e3
+
+		h_ang.row(12).noalias() = Eigen::Vector4f ((-cy * cz), (cy * sz), 0, 0.0f);											// f1
+		h_ang.row(13).noalias() = Eigen::Vector4f ((-cx * sz - sx * sy * cz), (-cx * cz + sx * sy * sz), 0, 0.0f);			// f2
+		h_ang.row(14).noalias() = Eigen::Vector4f ((-sx * sz + cx * sy * cz), (-cx * sy * sz - sx * cz), 0, 0.0f);			// f3
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> void
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::computePointDerivatives(Eigen::Vector3d &x, Eigen::Matrix<float, 4, 6>& point_gradient_, Eigen::Matrix<float, 24, 6>& point_hessian_, bool compute_hessian) const
+{
+	Eigen::Vector4f x4(x[0], x[1], x[2], 0.0f);
+
+	// Calculate first derivative of Transformation Equation 6.17 w.r.t. transform vector p.
+	// Derivative w.r.t. ith element of transform vector corresponds to column i, Equation 6.18 and 6.19 [Magnusson 2009]
+	Eigen::Matrix<float, 8, 1> x_j_ang = j_ang * x4;
+
+	point_gradient_(1, 3) = x_j_ang[0];
+	point_gradient_(2, 3) = x_j_ang[1];
+	point_gradient_(0, 4) = x_j_ang[2];
+	point_gradient_(1, 4) = x_j_ang[3];
+	point_gradient_(2, 4) = x_j_ang[4];
+	point_gradient_(0, 5) = x_j_ang[5];
+	point_gradient_(1, 5) = x_j_ang[6];
+	point_gradient_(2, 5) = x_j_ang[7];
+
+	if (compute_hessian)
+	{
+		Eigen::Matrix<float, 16, 1> x_h_ang = h_ang * x4;
+
+		// Vectors from Equation 6.21 [Magnusson 2009]
+		Eigen::Vector4f a (0, x_h_ang[0], x_h_ang[1], 0.0f);
+		Eigen::Vector4f b (0, x_h_ang[2], x_h_ang[3], 0.0f);
+		Eigen::Vector4f c (0, x_h_ang[4], x_h_ang[5], 0.0f);
+		Eigen::Vector4f d (x_h_ang[6], x_h_ang[7], x_h_ang[8], 0.0f);
+		Eigen::Vector4f e (x_h_ang[9], x_h_ang[10], x_h_ang[11], 0.0f);
+		Eigen::Vector4f f (x_h_ang[12], x_h_ang[13], x_h_ang[14], 0.0f);
+
+		// Calculate second derivative of Transformation Equation 6.17 w.r.t. transform vector p.
+		// Derivative w.r.t. ith and jth elements of transform vector corresponds to the 3x1 block matrix starting at (3i,j), Equation 6.20 and 6.21 [Magnusson 2009]
+		point_hessian_.block<4, 1>((9/3)*4, 3) = a;
+		point_hessian_.block<4, 1>((12/3)*4, 3) = b;
+		point_hessian_.block<4, 1>((15/3)*4, 3) = c;
+		point_hessian_.block<4, 1>((9/3)*4, 4) = b;
+		point_hessian_.block<4, 1>((12/3)*4, 4) = d;
+		point_hessian_.block<4, 1>((15/3)*4, 4) = e;
+		point_hessian_.block<4, 1>((9/3)*4, 5) = c;
+		point_hessian_.block<4, 1>((12/3)*4, 5) = e;
+		point_hessian_.block<4, 1>((15/3)*4, 5) = f;
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> void
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::computePointDerivatives(Eigen::Vector3d &x, Eigen::Matrix<double, 3, 6>& point_gradient_, Eigen::Matrix<double, 18, 6>& point_hessian_, bool compute_hessian) const
+{
+	// Calculate first derivative of Transformation Equation 6.17 w.r.t. transform vector p.
+	// Derivative w.r.t. ith element of transform vector corresponds to column i, Equation 6.18 and 6.19 [Magnusson 2009]
+	point_gradient_(1, 3) = x.dot(j_ang_a_);
+	point_gradient_(2, 3) = x.dot(j_ang_b_);
+	point_gradient_(0, 4) = x.dot(j_ang_c_);
+	point_gradient_(1, 4) = x.dot(j_ang_d_);
+	point_gradient_(2, 4) = x.dot(j_ang_e_);
+	point_gradient_(0, 5) = x.dot(j_ang_f_);
+	point_gradient_(1, 5) = x.dot(j_ang_g_);
+	point_gradient_(2, 5) = x.dot(j_ang_h_);
+
+	if (compute_hessian)
+	{
+		// Vectors from Equation 6.21 [Magnusson 2009]
+		Eigen::Vector3d a, b, c, d, e, f;
+
+		a << 0, x.dot(h_ang_a2_), x.dot(h_ang_a3_);
+		b << 0, x.dot(h_ang_b2_), x.dot(h_ang_b3_);
+		c << 0, x.dot(h_ang_c2_), x.dot(h_ang_c3_);
+		d << x.dot(h_ang_d1_), x.dot(h_ang_d2_), x.dot(h_ang_d3_);
+		e << x.dot(h_ang_e1_), x.dot(h_ang_e2_), x.dot(h_ang_e3_);
+		f << x.dot(h_ang_f1_), x.dot(h_ang_f2_), x.dot(h_ang_f3_);
+
+		// Calculate second derivative of Transformation Equation 6.17 w.r.t. transform vector p.
+		// Derivative w.r.t. ith and jth elements of transform vector corresponds to the 3x1 block matrix starting at (3i,j), Equation 6.20 and 6.21 [Magnusson 2009]
+		point_hessian_.block<3, 1>(9, 3) = a;
+		point_hessian_.block<3, 1>(12, 3) = b;
+		point_hessian_.block<3, 1>(15, 3) = c;
+		point_hessian_.block<3, 1>(9, 4) = b;
+		point_hessian_.block<3, 1>(12, 4) = d;
+		point_hessian_.block<3, 1>(15, 4) = e;
+		point_hessian_.block<3, 1>(9, 5) = c;
+		point_hessian_.block<3, 1>(12, 5) = e;
+		point_hessian_.block<3, 1>(15, 5) = f;
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> double
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::updateDerivatives(Eigen::Matrix<double, 6, 1> &score_gradient,
+	Eigen::Matrix<double, 6, 6> &hessian,
+	const Eigen::Matrix<float, 4, 6> &point_gradient4,
+	const Eigen::Matrix<float, 24, 6> &point_hessian_,
+	const Eigen::Vector3d &x_trans, const Eigen::Matrix3d &c_inv,
+	bool compute_hessian) const
+{
+	Eigen::Matrix<float, 1, 4> x_trans4( x_trans[0], x_trans[1], x_trans[2], 0.0f );
+	Eigen::Matrix4f c_inv4 = Eigen::Matrix4f::Zero();
+	c_inv4.topLeftCorner(3, 3) = c_inv.cast<float>();
+
+	float gauss_d2 = gauss_d2_;
+
+	// e^(-d_2/2 * (x_k - mu_k)^T Sigma_k^-1 (x_k - mu_k)) Equation 6.9 [Magnusson 2009]
+	float e_x_cov_x = exp(-gauss_d2 * x_trans4.dot(x_trans4 * c_inv4) * 0.5f);
+	// Calculate probability of transformed points existence, Equation 6.9 [Magnusson 2009]
+	float score_inc = -gauss_d1_ * e_x_cov_x;
+
+	e_x_cov_x = gauss_d2 * e_x_cov_x;
+
+	// Error checking for invalid values.
+	if (e_x_cov_x > 1 || e_x_cov_x < 0 || e_x_cov_x != e_x_cov_x)
+		return (0);
+
+	// Reusable portion of Equation 6.12 and 6.13 [Magnusson 2009]
+	e_x_cov_x *= gauss_d1_;
+
+	Eigen::Matrix<float, 4, 6> c_inv4_x_point_gradient4 = c_inv4 * point_gradient4;
+	Eigen::Matrix<float, 6, 1> x_trans4_dot_c_inv4_x_point_gradient4 = x_trans4 * c_inv4_x_point_gradient4;
+
+	score_gradient.noalias() += (e_x_cov_x * x_trans4_dot_c_inv4_x_point_gradient4).cast<double>();
+
+	if (compute_hessian) {
+		Eigen::Matrix<float, 1, 4> x_trans4_x_c_inv4 = x_trans4 * c_inv4;
+		Eigen::Matrix<float, 6, 6> point_gradient4_colj_dot_c_inv4_x_point_gradient4_col_i = point_gradient4.transpose() * c_inv4_x_point_gradient4;
+		Eigen::Matrix<float, 6, 1> x_trans4_dot_c_inv4_x_ext_point_hessian_4ij;
+
+		for (int i = 0; i < 6; i++) {
+			// Sigma_k^-1 d(T(x,p))/dpi, Reusable portion of Equation 6.12 and 6.13 [Magnusson 2009]
+			// Update gradient, Equation 6.12 [Magnusson 2009]
+			x_trans4_dot_c_inv4_x_ext_point_hessian_4ij.noalias() = x_trans4_x_c_inv4 * point_hessian_.block<4, 6>(i * 4, 0);
+
+			for (int j = 0; j < hessian.cols(); j++) {
+				// Update hessian, Equation 6.13 [Magnusson 2009]
+				hessian(i, j) += e_x_cov_x * (-gauss_d2 * x_trans4_dot_c_inv4_x_point_gradient4(i) * x_trans4_dot_c_inv4_x_point_gradient4(j) +
+					x_trans4_dot_c_inv4_x_ext_point_hessian_4ij(j) +
+					point_gradient4_colj_dot_c_inv4_x_point_gradient4_col_i(j, i));
+			}
+		}
+	}
+
+	return (score_inc);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> void
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::computeHessian (Eigen::Matrix<double, 6, 6> &hessian,
+                                                                             PointCloudSource &trans_cloud, Eigen::Matrix<double, 6, 1> &)
+{
+  // Original Point and Transformed Point
+  PointSource x_pt, x_trans_pt;
+  // Original Point and Transformed Point (for math)
+  Eigen::Vector3d x, x_trans;
+  // Occupied Voxel
+  TargetGridLeafConstPtr cell;
+  // Inverse Covariance of Occupied Voxel
+  Eigen::Matrix3d c_inv;
+
+  // Initialize Point Gradient and Hessian
+  Eigen::Matrix<double, 3, 6> point_gradient_;
+  Eigen::Matrix<double, 18, 6> point_hessian_;
+  point_gradient_.setZero();
+  point_gradient_.block<3, 3>(0, 0).setIdentity();
+  point_hessian_.setZero();
+
+  hessian.setZero ();
+
+  // Precompute Angular Derivatives unnecessary because only used after regular derivative calculation
+
+  // Update hessian for each point, line 17 in Algorithm 2 [Magnusson 2009]
+  for (size_t idx = 0; idx < input_->points.size (); idx++)
+  {
+    x_trans_pt = trans_cloud.points[idx];
+
+    // Find neighbors (Radius search has been experimentally faster than direct neighbor checking.
+    std::vector<TargetGridLeafConstPtr> neighborhood;
+    std::vector<float> distances;
+		switch (search_method) {
+		case KDTREE:
+			target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
+			break;
+		case DIRECT26:
+			target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood);
+			break;
+		default:
+		case DIRECT7:
+			target_cells_.getNeighborhoodAtPoint7(x_trans_pt, neighborhood);
+			break;
+		case DIRECT1:
+			target_cells_.getNeighborhoodAtPoint1(x_trans_pt, neighborhood);
+			break;
+		}
+
+    for (typename std::vector<TargetGridLeafConstPtr>::iterator neighborhood_it = neighborhood.begin (); neighborhood_it != neighborhood.end (); neighborhood_it++)
+    {
+      cell = *neighborhood_it;
+
+      {
+        x_pt = input_->points[idx];
+        x = Eigen::Vector3d (x_pt.x, x_pt.y, x_pt.z);
+
+        x_trans = Eigen::Vector3d (x_trans_pt.x, x_trans_pt.y, x_trans_pt.z);
+
+        // Denorm point, x_k' in Equations 6.12 and 6.13 [Magnusson 2009]
+        x_trans -= cell->getMean ();
+        // Uses precomputed covariance for speed.
+        c_inv = cell->getInverseCov ();
+
+        // Compute derivative of transform function w.r.t. transform vector, J_E and H_E in Equations 6.18 and 6.20 [Magnusson 2009]
+        computePointDerivatives (x, point_gradient_, point_hessian_);
+        // Update hessian, lines 21 in Algorithm 2, according to Equations 6.10, 6.12 and 6.13, respectively [Magnusson 2009]
+        updateHessian (hessian, point_gradient_, point_hessian_, x_trans, c_inv);
+      }
+    }
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> void
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::updateHessian (Eigen::Matrix<double, 6, 6> &hessian,
+	const Eigen::Matrix<double, 3, 6> &point_gradient_,
+	const Eigen::Matrix<double, 18, 6> &point_hessian_,
+	const Eigen::Vector3d &x_trans,
+	const Eigen::Matrix3d &c_inv) const
+{
+  Eigen::Vector3d cov_dxd_pi;
+  // e^(-d_2/2 * (x_k - mu_k)^T Sigma_k^-1 (x_k - mu_k)) Equation 6.9 [Magnusson 2009]
+  double e_x_cov_x = gauss_d2_ * exp (-gauss_d2_ * x_trans.dot (c_inv * x_trans) / 2);
+
+  // Error checking for invalid values.
+  if (e_x_cov_x > 1 || e_x_cov_x < 0 || e_x_cov_x != e_x_cov_x)
+    return;
+
+  // Reusable portion of Equation 6.12 and 6.13 [Magnusson 2009]
+  e_x_cov_x *= gauss_d1_;
+
+  for (int i = 0; i < 6; i++)
+  {
+    // Sigma_k^-1 d(T(x,p))/dpi, Reusable portion of Equation 6.12 and 6.13 [Magnusson 2009]
+    cov_dxd_pi = c_inv * point_gradient_.col (i);
+
+    for (int j = 0; j < hessian.cols (); j++)
+    {
+      // Update hessian, Equation 6.13 [Magnusson 2009]
+      hessian (i, j) += e_x_cov_x * (-gauss_d2_ * x_trans.dot (cov_dxd_pi) * x_trans.dot (c_inv * point_gradient_.col (j)) +
+                                  x_trans.dot (c_inv * point_hessian_.block<3, 1>(3 * i, j)) +
+                                  point_gradient_.col (j).dot (cov_dxd_pi) );
+    }
+  }
+
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> bool
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::updateIntervalMT (double &a_l, double &f_l, double &g_l,
+                                                                               double &a_u, double &f_u, double &g_u,
+                                                                               double a_t, double f_t, double g_t)
+{
+  // Case U1 in Update Algorithm and Case a in Modified Update Algorithm [More, Thuente 1994]
+  if (f_t > f_l)
+  {
+    a_u = a_t;
+    f_u = f_t;
+    g_u = g_t;
+    return (false);
+  }
+  // Case U2 in Update Algorithm and Case b in Modified Update Algorithm [More, Thuente 1994]
+  else
+  if (g_t * (a_l - a_t) > 0)
+  {
+    a_l = a_t;
+    f_l = f_t;
+    g_l = g_t;
+    return (false);
+  }
+  // Case U3 in Update Algorithm and Case c in Modified Update Algorithm [More, Thuente 1994]
+  else
+  if (g_t * (a_l - a_t) < 0)
+  {
+    a_u = a_l;
+    f_u = f_l;
+    g_u = g_l;
+
+    a_l = a_t;
+    f_l = f_t;
+    g_l = g_t;
+    return (false);
+  }
+  // Interval Converged
+  else
+    return (true);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> double
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::trialValueSelectionMT (double a_l, double f_l, double g_l,
+                                                                                    double a_u, double f_u, double g_u,
+                                                                                    double a_t, double f_t, double g_t)
+{
+  // Case 1 in Trial Value Selection [More, Thuente 1994]
+  if (f_t > f_l)
+  {
+    // Calculate the minimizer of the cubic that interpolates f_l, f_t, g_l and g_t
+    // Equation 2.4.52 [Sun, Yuan 2006]
+    double z = 3 * (f_t - f_l) / (a_t - a_l) - g_t - g_l;
+    double w = std::sqrt (z * z - g_t * g_l);
+    // Equation 2.4.56 [Sun, Yuan 2006]
+    double a_c = a_l + (a_t - a_l) * (w - g_l - z) / (g_t - g_l + 2 * w);
+
+    // Calculate the minimizer of the quadratic that interpolates f_l, f_t and g_l
+    // Equation 2.4.2 [Sun, Yuan 2006]
+    double a_q = a_l - 0.5 * (a_l - a_t) * g_l / (g_l - (f_l - f_t) / (a_l - a_t));
+
+    if (std::fabs (a_c - a_l) < std::fabs (a_q - a_l))
+      return (a_c);
+    else
+      return (0.5 * (a_q + a_c));
+  }
+  // Case 2 in Trial Value Selection [More, Thuente 1994]
+  else
+  if (g_t * g_l < 0)
+  {
+    // Calculate the minimizer of the cubic that interpolates f_l, f_t, g_l and g_t
+    // Equation 2.4.52 [Sun, Yuan 2006]
+    double z = 3 * (f_t - f_l) / (a_t - a_l) - g_t - g_l;
+    double w = std::sqrt (z * z - g_t * g_l);
+    // Equation 2.4.56 [Sun, Yuan 2006]
+    double a_c = a_l + (a_t - a_l) * (w - g_l - z) / (g_t - g_l + 2 * w);
+
+    // Calculate the minimizer of the quadratic that interpolates f_l, g_l and g_t
+    // Equation 2.4.5 [Sun, Yuan 2006]
+    double a_s = a_l - (a_l - a_t) / (g_l - g_t) * g_l;
+
+    if (std::fabs (a_c - a_t) >= std::fabs (a_s - a_t))
+      return (a_c);
+    else
+      return (a_s);
+  }
+  // Case 3 in Trial Value Selection [More, Thuente 1994]
+  else
+  if (std::fabs (g_t) <= std::fabs (g_l))
+  {
+    // Calculate the minimizer of the cubic that interpolates f_l, f_t, g_l and g_t
+    // Equation 2.4.52 [Sun, Yuan 2006]
+    double z = 3 * (f_t - f_l) / (a_t - a_l) - g_t - g_l;
+    double w = std::sqrt (z * z - g_t * g_l);
+    double a_c = a_l + (a_t - a_l) * (w - g_l - z) / (g_t - g_l + 2 * w);
+
+    // Calculate the minimizer of the quadratic that interpolates g_l and g_t
+    // Equation 2.4.5 [Sun, Yuan 2006]
+    double a_s = a_l - (a_l - a_t) / (g_l - g_t) * g_l;
+
+    double a_t_next;
+
+    if (std::fabs (a_c - a_t) < std::fabs (a_s - a_t))
+      a_t_next = a_c;
+    else
+      a_t_next = a_s;
+
+    if (a_t > a_l)
+      return (std::min (a_t + 0.66 * (a_u - a_t), a_t_next));
+    else
+      return (std::max (a_t + 0.66 * (a_u - a_t), a_t_next));
+  }
+  // Case 4 in Trial Value Selection [More, Thuente 1994]
+  else
+  {
+    // Calculate the minimizer of the cubic that interpolates f_u, f_t, g_u and g_t
+    // Equation 2.4.52 [Sun, Yuan 2006]
+    double z = 3 * (f_t - f_u) / (a_t - a_u) - g_t - g_u;
+    double w = std::sqrt (z * z - g_t * g_u);
+    // Equation 2.4.56 [Sun, Yuan 2006]
+    return (a_u + (a_t - a_u) * (w - g_u - z) / (g_t - g_u + 2 * w));
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointSource, typename PointTarget> double
+pclomp::NormalDistributionsTransform<PointSource, PointTarget>::computeStepLengthMT (const Eigen::Matrix<double, 6, 1> &x, Eigen::Matrix<double, 6, 1> &step_dir, double step_init, double step_max,
+                                                                                  double step_min, double &score, Eigen::Matrix<double, 6, 1> &score_gradient, Eigen::Matrix<double, 6, 6> &hessian,
+                                                                                  PointCloudSource &trans_cloud)
+{
+  // Set the value of phi(0), Equation 1.3 [More, Thuente 1994]
+  double phi_0 = -score;
+  // Set the value of phi'(0), Equation 1.3 [More, Thuente 1994]
+  double d_phi_0 = -(score_gradient.dot (step_dir));
+
+  Eigen::Matrix<double, 6, 1>  x_t;
+
+  if (d_phi_0 >= 0)
+  {
+    // Not a decent direction
+    if (d_phi_0 == 0)
+      return 0;
+    else
+    {
+      // Reverse step direction and calculate optimal step.
+      d_phi_0 *= -1;
+      step_dir *= -1;
+
+    }
+  }
+
+  // The Search Algorithm for T(mu) [More, Thuente 1994]
+
+  int max_step_iterations = 10;
+  int step_iterations = 0;
+
+  // Sufficient decrease constant, Equation 1.1 [More, Thuete 1994]
+  double mu = 1.e-4;
+  // Curvature condition constant, Equation 1.2 [More, Thuete 1994]
+  double nu = 0.9;
+
+  // Initial endpoints of Interval I,
+  double a_l = 0, a_u = 0;
+
+  // Auxiliary function psi is used until I is determined ot be a closed interval, Equation 2.1 [More, Thuente 1994]
+  double f_l = auxiliaryFunction_PsiMT (a_l, phi_0, phi_0, d_phi_0, mu);
+  double g_l = auxiliaryFunction_dPsiMT (d_phi_0, d_phi_0, mu);
+
+  double f_u = auxiliaryFunction_PsiMT (a_u, phi_0, phi_0, d_phi_0, mu);
+  double g_u = auxiliaryFunction_dPsiMT (d_phi_0, d_phi_0, mu);
+
+  // Check used to allow More-Thuente step length calculation to be skipped by making step_min == step_max
+  bool interval_converged = (step_max - step_min) < 0, open_interval = true;
+
+  double a_t = step_init;
+  a_t = std::min (a_t, step_max);
+  a_t = std::max (a_t, step_min);
+
+  x_t = x + step_dir * a_t;
+
+  final_transformation_ = (Eigen::Translation<float, 3>(static_cast<float> (x_t (0)), static_cast<float> (x_t (1)), static_cast<float> (x_t (2))) *
+                           Eigen::AngleAxis<float> (static_cast<float> (x_t (3)), Eigen::Vector3f::UnitX ()) *
+                           Eigen::AngleAxis<float> (static_cast<float> (x_t (4)), Eigen::Vector3f::UnitY ()) *
+                           Eigen::AngleAxis<float> (static_cast<float> (x_t (5)), Eigen::Vector3f::UnitZ ())).matrix ();
+
+  // New transformed point cloud
+  transformPointCloud (*input_, trans_cloud, final_transformation_);
+
+  // Updates score, gradient and hessian.  Hessian calculation is unnecessary but testing showed that most step calculations use the
+  // initial step suggestion and recalculation the reusable portions of the hessian would intail more computation time.
+  score = computeDerivatives (score_gradient, hessian, trans_cloud, x_t, true);
+
+  // Calculate phi(alpha_t)
+  double phi_t = -score;
+  // Calculate phi'(alpha_t)
+  double d_phi_t = -(score_gradient.dot (step_dir));
+
+  // Calculate psi(alpha_t)
+  double psi_t = auxiliaryFunction_PsiMT (a_t, phi_t, phi_0, d_phi_0, mu);
+  // Calculate psi'(alpha_t)
+  double d_psi_t = auxiliaryFunction_dPsiMT (d_phi_t, d_phi_0, mu);
+
+  // Iterate until max number of iterations, interval convergence or a value satisfies the sufficient decrease, Equation 1.1, and curvature condition, Equation 1.2 [More, Thuente 1994]
+  while (!interval_converged && step_iterations < max_step_iterations && !(psi_t <= 0 /*Sufficient Decrease*/ && d_phi_t <= -nu * d_phi_0 /*Curvature Condition*/))
+  {
+    // Use auxiliary function if interval I is not closed
+    if (open_interval)
+    {
+      a_t = trialValueSelectionMT (a_l, f_l, g_l,
+                                   a_u, f_u, g_u,
+                                   a_t, psi_t, d_psi_t);
+    }
+    else
+    {
+      a_t = trialValueSelectionMT (a_l, f_l, g_l,
+                                   a_u, f_u, g_u,
+                                   a_t, phi_t, d_phi_t);
+    }
+
+    a_t = std::min (a_t, step_max);
+    a_t = std::max (a_t, step_min);
+
+    x_t = x + step_dir * a_t;
+
+    final_transformation_ = (Eigen::Translation<float, 3> (static_cast<float> (x_t (0)), static_cast<float> (x_t (1)), static_cast<float> (x_t (2))) *
+                             Eigen::AngleAxis<float> (static_cast<float> (x_t (3)), Eigen::Vector3f::UnitX ()) *
+                             Eigen::AngleAxis<float> (static_cast<float> (x_t (4)), Eigen::Vector3f::UnitY ()) *
+                             Eigen::AngleAxis<float> (static_cast<float> (x_t (5)), Eigen::Vector3f::UnitZ ())).matrix ();
+
+    // New transformed point cloud
+    // Done on final cloud to prevent wasted computation
+    transformPointCloud (*input_, trans_cloud, final_transformation_);
+
+    // Updates score, gradient. Values stored to prevent wasted computation.
+    score = computeDerivatives (score_gradient, hessian, trans_cloud, x_t, false);
+
+    // Calculate phi(alpha_t+)
+    phi_t = -score;
+    // Calculate phi'(alpha_t+)
+    d_phi_t = -(score_gradient.dot (step_dir));
+
+    // Calculate psi(alpha_t+)
+    psi_t = auxiliaryFunction_PsiMT (a_t, phi_t, phi_0, d_phi_0, mu);
+    // Calculate psi'(alpha_t+)
+    d_psi_t = auxiliaryFunction_dPsiMT (d_phi_t, d_phi_0, mu);
+
+    // Check if I is now a closed interval
+    if (open_interval && (psi_t <= 0 && d_psi_t >= 0))
+    {
+      open_interval = false;
+
+      // Converts f_l and g_l from psi to phi
+      f_l = f_l + phi_0 - mu * d_phi_0 * a_l;
+      g_l = g_l + mu * d_phi_0;
+
+      // Converts f_u and g_u from psi to phi
+      f_u = f_u + phi_0 - mu * d_phi_0 * a_u;
+      g_u = g_u + mu * d_phi_0;
+    }
+
+    if (open_interval)
+    {
+      // Update interval end points using Updating Algorithm [More, Thuente 1994]
+      interval_converged = updateIntervalMT (a_l, f_l, g_l,
+                                             a_u, f_u, g_u,
+                                             a_t, psi_t, d_psi_t);
+    }
+    else
+    {
+      // Update interval end points using Modified Updating Algorithm [More, Thuente 1994]
+      interval_converged = updateIntervalMT (a_l, f_l, g_l,
+                                             a_u, f_u, g_u,
+                                             a_t, phi_t, d_phi_t);
+    }
+
+    step_iterations++;
+  }
+
+  // If inner loop was run then hessian needs to be calculated.
+  // Hessian is unnecessary for step length determination but gradients are required
+  // so derivative and transform data is stored for the next iteration.
+  if (step_iterations)
+    computeHessian (hessian, trans_cloud, x_t);
+
+  return (a_t);
+}
+
+
+template<typename PointSource, typename PointTarget>
+double pclomp::NormalDistributionsTransform<PointSource, PointTarget>::calculateScore(const PointCloudSource & trans_cloud) const
+{
+	double score = 0;
+
+	for (std::size_t idx = 0; idx < trans_cloud.points.size(); idx++)
+	{
+		PointSource x_trans_pt = trans_cloud.points[idx];
+
+		// Find neighbors (Radius search has been experimentally faster than direct neighbor checking.
+		std::vector<TargetGridLeafConstPtr> neighborhood;
+		std::vector<float> distances;
+		switch (search_method) {
+		case KDTREE:
+			target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
+			break;
+		case DIRECT26:
+			target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood);
+			break;
+		default:
+		case DIRECT7:
+			target_cells_.getNeighborhoodAtPoint7(x_trans_pt, neighborhood);
+			break;
+		case DIRECT1:
+			target_cells_.getNeighborhoodAtPoint1(x_trans_pt, neighborhood);
+			break;
+		}
+
+		for (typename std::vector<TargetGridLeafConstPtr>::iterator neighborhood_it = neighborhood.begin(); neighborhood_it != neighborhood.end(); neighborhood_it++)
+		{
+			TargetGridLeafConstPtr cell = *neighborhood_it;
+
+			Eigen::Vector3d x_trans = Eigen::Vector3d(x_trans_pt.x, x_trans_pt.y, x_trans_pt.z);
+
+			// Denorm point, x_k' in Equations 6.12 and 6.13 [Magnusson 2009]
+			x_trans -= cell->getMean();
+			// Uses precomputed covariance for speed.
+			Eigen::Matrix3d c_inv = cell->getInverseCov();
+
+			// e^(-d_2/2 * (x_k - mu_k)^T Sigma_k^-1 (x_k - mu_k)) Equation 6.9 [Magnusson 2009]
+			double e_x_cov_x = exp(-gauss_d2_ * x_trans.dot(c_inv * x_trans) / 2);
+			// Calculate probability of transformed points existence, Equation 6.9 [Magnusson 2009]
+			double score_inc = -gauss_d1_ * e_x_cov_x - gauss_d3_;
+
+			score += score_inc / neighborhood.size();
+		}
+	}
+	return (score) / static_cast<double> (trans_cloud.size());
+}
+
+#endif // PCL_REGISTRATION_NDT_IMPL_H_

+ 506 - 0
src/detection/detection_ndt_pclomp/pclomp/pclomp/ndt_omp.h

@@ -0,0 +1,506 @@
+/*
+ * Software License Agreement (BSD License)
+ *
+ *  Point Cloud Library (PCL) - www.pointclouds.org
+ *  Copyright (c) 2010-2012, Willow Garage, Inc.
+ *  Copyright (c) 2012-, Open Perception, Inc.
+ *
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the copyright holder(s) nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $Id$
+ *
+ */
+
+#ifndef PCL_REGISTRATION_NDT_OMP_H_
+#define PCL_REGISTRATION_NDT_OMP_H_
+
+#include <pcl/registration/registration.h>
+#include <pcl/search/impl/search.hpp>
+#include <pclomp/voxel_grid_covariance_omp.h>
+
+#include <unsupported/Eigen/NonLinearOptimization>
+
+namespace pclomp
+{
+	enum NeighborSearchMethod {
+		KDTREE,
+		DIRECT26,
+		DIRECT7,
+		DIRECT1
+	};
+
+	/** \brief A 3D Normal Distribution Transform registration implementation for point cloud data.
+	  * \note For more information please see
+	  * <b>Magnusson, M. (2009). The Three-Dimensional Normal-Distributions Transform —
+	  * an Efficient Representation for Registration, Surface Analysis, and Loop Detection.
+	  * PhD thesis, Orebro University. Orebro Studies in Technology 36.</b>,
+	  * <b>More, J., and Thuente, D. (1994). Line Search Algorithm with Guaranteed Sufficient Decrease
+	  * In ACM Transactions on Mathematical Software.</b> and
+	  * Sun, W. and Yuan, Y, (2006) Optimization Theory and Methods: Nonlinear Programming. 89-100
+	  * \note Math refactored by Todor Stoyanov.
+	  * \author Brian Okorn (Space and Naval Warfare Systems Center Pacific)
+	  */
+	template<typename PointSource, typename PointTarget>
+	class NormalDistributionsTransform : public pcl::Registration<PointSource, PointTarget>
+	{
+	protected:
+
+		typedef typename pcl::Registration<PointSource, PointTarget>::PointCloudSource PointCloudSource;
+		typedef typename PointCloudSource::Ptr PointCloudSourcePtr;
+		typedef typename PointCloudSource::ConstPtr PointCloudSourceConstPtr;
+
+		typedef typename pcl::Registration<PointSource, PointTarget>::PointCloudTarget PointCloudTarget;
+		typedef typename PointCloudTarget::Ptr PointCloudTargetPtr;
+		typedef typename PointCloudTarget::ConstPtr PointCloudTargetConstPtr;
+
+		typedef pcl::PointIndices::Ptr PointIndicesPtr;
+		typedef pcl::PointIndices::ConstPtr PointIndicesConstPtr;
+
+		/** \brief Typename of searchable voxel grid containing mean and covariance. */
+		typedef pclomp::VoxelGridCovariance<PointTarget> TargetGrid;
+		/** \brief Typename of pointer to searchable voxel grid. */
+		typedef TargetGrid* TargetGridPtr;
+		/** \brief Typename of const pointer to searchable voxel grid. */
+		typedef const TargetGrid* TargetGridConstPtr;
+		/** \brief Typename of const pointer to searchable voxel grid leaf. */
+		typedef typename TargetGrid::LeafConstPtr TargetGridLeafConstPtr;
+
+
+	public:
+
+#if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0)
+		typedef pcl::shared_ptr< NormalDistributionsTransform<PointSource, PointTarget> > Ptr;
+		typedef pcl::shared_ptr< const NormalDistributionsTransform<PointSource, PointTarget> > ConstPtr;
+#else
+		typedef boost::shared_ptr< NormalDistributionsTransform<PointSource, PointTarget> > Ptr;
+		typedef boost::shared_ptr< const NormalDistributionsTransform<PointSource, PointTarget> > ConstPtr;
+#endif
+
+
+		/** \brief Constructor.
+		  * Sets \ref outlier_ratio_ to 0.35, \ref step_size_ to 0.05 and \ref resolution_ to 1.0
+		  */
+		NormalDistributionsTransform();
+
+		/** \brief Empty destructor */
+		virtual ~NormalDistributionsTransform() {}
+
+    void setNumThreads(int n) {
+      num_threads_ = n;
+    }
+
+		/** \brief Provide a pointer to the input target (e.g., the point cloud that we want to align the input source to).
+		  * \param[in] cloud the input point cloud target
+		  */
+		inline void
+			setInputTarget(const PointCloudTargetConstPtr &cloud)
+		{
+			pcl::Registration<PointSource, PointTarget>::setInputTarget(cloud);
+			init();
+		}
+
+		/** \brief Set/change the voxel grid resolution.
+		  * \param[in] resolution side length of voxels
+		  */
+		inline void
+			setResolution(float resolution)
+		{
+			// Prevents unnecessary voxel initiations
+			if (resolution_ != resolution)
+			{
+				resolution_ = resolution;
+				if (input_)
+					init();
+			}
+		}
+
+		/** \brief Get voxel grid resolution.
+		  * \return side length of voxels
+		  */
+		inline float
+			getResolution() const
+		{
+			return (resolution_);
+		}
+
+		/** \brief Get the newton line search maximum step length.
+		  * \return maximum step length
+		  */
+		inline double
+			getStepSize() const
+		{
+			return (step_size_);
+		}
+
+		/** \brief Set/change the newton line search maximum step length.
+		  * \param[in] step_size maximum step length
+		  */
+		inline void
+			setStepSize(double step_size)
+		{
+			step_size_ = step_size;
+		}
+
+		/** \brief Get the point cloud outlier ratio.
+		  * \return outlier ratio
+		  */
+		inline double
+			getOutlierRatio() const
+		{
+			return (outlier_ratio_);
+		}
+
+		/** \brief Set/change the point cloud outlier ratio.
+		  * \param[in] outlier_ratio outlier ratio
+		  */
+		inline void
+			setOutlierRatio(double outlier_ratio)
+		{
+			outlier_ratio_ = outlier_ratio;
+		}
+
+		inline void setNeighborhoodSearchMethod(NeighborSearchMethod method) {
+			search_method = method;
+		}
+
+		/** \brief Get the registration alignment probability.
+		  * \return transformation probability
+		  */
+		inline double
+			getTransformationProbability() const
+		{
+			return (trans_probability_);
+		}
+
+		/** \brief Get the number of iterations required to calculate alignment.
+		  * \return final number of iterations
+		  */
+		inline int
+			getFinalNumIteration() const
+		{
+			return (nr_iterations_);
+		}
+
+		/** \brief Convert 6 element transformation vector to affine transformation.
+		  * \param[in] x transformation vector of the form [x, y, z, roll, pitch, yaw]
+		  * \param[out] trans affine transform corresponding to given transformation vector
+		  */
+		static void
+			convertTransform(const Eigen::Matrix<double, 6, 1> &x, Eigen::Affine3f &trans)
+		{
+			trans = Eigen::Translation<float, 3>(float(x(0)), float(x(1)), float(x(2))) *
+				Eigen::AngleAxis<float>(float(x(3)), Eigen::Vector3f::UnitX()) *
+				Eigen::AngleAxis<float>(float(x(4)), Eigen::Vector3f::UnitY()) *
+				Eigen::AngleAxis<float>(float(x(5)), Eigen::Vector3f::UnitZ());
+		}
+
+		/** \brief Convert 6 element transformation vector to transformation matrix.
+		  * \param[in] x transformation vector of the form [x, y, z, roll, pitch, yaw]
+		  * \param[out] trans 4x4 transformation matrix corresponding to given transformation vector
+		  */
+		static void
+			convertTransform(const Eigen::Matrix<double, 6, 1> &x, Eigen::Matrix4f &trans)
+		{
+			Eigen::Affine3f _affine;
+			convertTransform(x, _affine);
+			trans = _affine.matrix();
+		}
+
+		// negative log likelihood function
+		// lower is better
+		double calculateScore(const PointCloudSource& cloud) const;
+
+	protected:
+
+		using pcl::Registration<PointSource, PointTarget>::reg_name_;
+		using pcl::Registration<PointSource, PointTarget>::getClassName;
+		using pcl::Registration<PointSource, PointTarget>::input_;
+		using pcl::Registration<PointSource, PointTarget>::indices_;
+		using pcl::Registration<PointSource, PointTarget>::target_;
+		using pcl::Registration<PointSource, PointTarget>::nr_iterations_;
+		using pcl::Registration<PointSource, PointTarget>::max_iterations_;
+		using pcl::Registration<PointSource, PointTarget>::previous_transformation_;
+		using pcl::Registration<PointSource, PointTarget>::final_transformation_;
+		using pcl::Registration<PointSource, PointTarget>::transformation_;
+		using pcl::Registration<PointSource, PointTarget>::transformation_epsilon_;
+		using pcl::Registration<PointSource, PointTarget>::converged_;
+		using pcl::Registration<PointSource, PointTarget>::corr_dist_threshold_;
+		using pcl::Registration<PointSource, PointTarget>::inlier_threshold_;
+
+		using pcl::Registration<PointSource, PointTarget>::update_visualizer_;
+
+		/** \brief Estimate the transformation and returns the transformed source (input) as output.
+		  * \param[out] output the resultant input transformed point cloud dataset
+		  */
+		virtual void
+			computeTransformation(PointCloudSource &output)
+		{
+			computeTransformation(output, Eigen::Matrix4f::Identity());
+		}
+
+		/** \brief Estimate the transformation and returns the transformed source (input) as output.
+		  * \param[out] output the resultant input transformed point cloud dataset
+		  * \param[in] guess the initial gross estimation of the transformation
+		  */
+		virtual void
+			computeTransformation(PointCloudSource &output, const Eigen::Matrix4f &guess);
+
+		/** \brief Initiate covariance voxel structure. */
+		void inline
+			init()
+		{
+			target_cells_.setLeafSize(resolution_, resolution_, resolution_);
+			target_cells_.setInputCloud(target_);
+			// Initiate voxel structure.
+			target_cells_.filter(true);
+		}
+
+		/** \brief Compute derivatives of probability function w.r.t. the transformation vector.
+		  * \note Equation 6.10, 6.12 and 6.13 [Magnusson 2009].
+		  * \param[out] score_gradient the gradient vector of the probability function w.r.t. the transformation vector
+		  * \param[out] hessian the hessian matrix of the probability function w.r.t. the transformation vector
+		  * \param[in] trans_cloud transformed point cloud
+		  * \param[in] p the current transform vector
+		  * \param[in] compute_hessian flag to calculate hessian, unnecessary for step calculation.
+		  */
+		double
+			computeDerivatives(Eigen::Matrix<double, 6, 1> &score_gradient,
+				Eigen::Matrix<double, 6, 6> &hessian,
+				PointCloudSource &trans_cloud,
+				Eigen::Matrix<double, 6, 1> &p,
+				bool compute_hessian = true);
+
+		/** \brief Compute individual point contributions to derivatives of probability function w.r.t. the transformation vector.
+		  * \note Equation 6.10, 6.12 and 6.13 [Magnusson 2009].
+		  * \param[in,out] score_gradient the gradient vector of the probability function w.r.t. the transformation vector
+		  * \param[in,out] hessian the hessian matrix of the probability function w.r.t. the transformation vector
+		  * \param[in] x_trans transformed point minus mean of occupied covariance voxel
+		  * \param[in] c_inv covariance of occupied covariance voxel
+		  * \param[in] compute_hessian flag to calculate hessian, unnecessary for step calculation.
+		  */
+		double
+			updateDerivatives(Eigen::Matrix<double, 6, 1> &score_gradient,
+				Eigen::Matrix<double, 6, 6> &hessian,
+				const Eigen::Matrix<float, 4, 6> &point_gradient_,
+				const Eigen::Matrix<float, 24, 6> &point_hessian_,
+				const Eigen::Vector3d &x_trans, const Eigen::Matrix3d &c_inv,
+				bool compute_hessian = true) const;
+
+		/** \brief Precompute angular components of derivatives.
+		  * \note Equation 6.19 and 6.21 [Magnusson 2009].
+		  * \param[in] p the current transform vector
+		  * \param[in] compute_hessian flag to calculate hessian, unnecessary for step calculation.
+		  */
+		void
+			computeAngleDerivatives(Eigen::Matrix<double, 6, 1> &p, bool compute_hessian = true);
+
+		/** \brief Compute point derivatives.
+		  * \note Equation 6.18-21 [Magnusson 2009].
+		  * \param[in] x point from the input cloud
+		  * \param[in] compute_hessian flag to calculate hessian, unnecessary for step calculation.
+		  */
+		void
+			computePointDerivatives(Eigen::Vector3d &x, Eigen::Matrix<double, 3, 6>& point_gradient_, Eigen::Matrix<double, 18, 6>& point_hessian_, bool compute_hessian = true) const;
+
+		void
+			computePointDerivatives(Eigen::Vector3d &x, Eigen::Matrix<float, 4, 6>& point_gradient_, Eigen::Matrix<float, 24, 6>& point_hessian_, bool compute_hessian = true) const;
+
+		/** \brief Compute hessian of probability function w.r.t. the transformation vector.
+		  * \note Equation 6.13 [Magnusson 2009].
+		  * \param[out] hessian the hessian matrix of the probability function w.r.t. the transformation vector
+		  * \param[in] trans_cloud transformed point cloud
+		  * \param[in] p the current transform vector
+		  */
+		void
+			computeHessian(Eigen::Matrix<double, 6, 6> &hessian,
+				PointCloudSource &trans_cloud,
+				Eigen::Matrix<double, 6, 1> &p);
+
+		/** \brief Compute individual point contributions to hessian of probability function w.r.t. the transformation vector.
+		  * \note Equation 6.13 [Magnusson 2009].
+		  * \param[in,out] hessian the hessian matrix of the probability function w.r.t. the transformation vector
+		  * \param[in] x_trans transformed point minus mean of occupied covariance voxel
+		  * \param[in] c_inv covariance of occupied covariance voxel
+		  */
+		void
+			updateHessian(Eigen::Matrix<double, 6, 6> &hessian,
+				const Eigen::Matrix<double, 3, 6> &point_gradient_,
+				const Eigen::Matrix<double, 18, 6> &point_hessian_,
+				const Eigen::Vector3d &x_trans, const Eigen::Matrix3d &c_inv) const;
+
+		/** \brief Compute line search step length and update transform and probability derivatives using More-Thuente method.
+		  * \note Search Algorithm [More, Thuente 1994]
+		  * \param[in] x initial transformation vector, \f$ x \f$ in Equation 1.3 (Moore, Thuente 1994) and \f$ \vec{p} \f$ in Algorithm 2 [Magnusson 2009]
+		  * \param[in] step_dir descent direction, \f$ p \f$ in Equation 1.3 (Moore, Thuente 1994) and \f$ \delta \vec{p} \f$ normalized in Algorithm 2 [Magnusson 2009]
+		  * \param[in] step_init initial step length estimate, \f$ \alpha_0 \f$ in Moore-Thuente (1994) and the normal of \f$ \delta \vec{p} \f$ in Algorithm 2 [Magnusson 2009]
+		  * \param[in] step_max maximum step length, \f$ \alpha_max \f$ in Moore-Thuente (1994)
+		  * \param[in] step_min minimum step length, \f$ \alpha_min \f$ in Moore-Thuente (1994)
+		  * \param[out] score final score function value, \f$ f(x + \alpha p) \f$ in Equation 1.3 (Moore, Thuente 1994) and \f$ score \f$ in Algorithm 2 [Magnusson 2009]
+		  * \param[in,out] score_gradient gradient of score function w.r.t. transformation vector, \f$ f'(x + \alpha p) \f$ in Moore-Thuente (1994) and \f$ \vec{g} \f$ in Algorithm 2 [Magnusson 2009]
+		  * \param[out] hessian hessian of score function w.r.t. transformation vector, \f$ f''(x + \alpha p) \f$ in Moore-Thuente (1994) and \f$ H \f$ in Algorithm 2 [Magnusson 2009]
+		  * \param[in,out] trans_cloud transformed point cloud, \f$ X \f$ transformed by \f$ T(\vec{p},\vec{x}) \f$ in Algorithm 2 [Magnusson 2009]
+		  * \return final step length
+		  */
+		double
+			computeStepLengthMT(const Eigen::Matrix<double, 6, 1> &x,
+				Eigen::Matrix<double, 6, 1> &step_dir,
+				double step_init,
+				double step_max, double step_min,
+				double &score,
+				Eigen::Matrix<double, 6, 1> &score_gradient,
+				Eigen::Matrix<double, 6, 6> &hessian,
+				PointCloudSource &trans_cloud);
+
+		/** \brief Update interval of possible step lengths for More-Thuente method, \f$ I \f$ in More-Thuente (1994)
+		  * \note Updating Algorithm until some value satisfies \f$ \psi(\alpha_k) \leq 0 \f$ and \f$ \phi'(\alpha_k) \geq 0 \f$
+		  * and Modified Updating Algorithm from then on [More, Thuente 1994].
+		  * \param[in,out] a_l first endpoint of interval \f$ I \f$, \f$ \alpha_l \f$ in Moore-Thuente (1994)
+		  * \param[in,out] f_l value at first endpoint, \f$ f_l \f$ in Moore-Thuente (1994), \f$ \psi(\alpha_l) \f$ for Update Algorithm and \f$ \phi(\alpha_l) \f$ for Modified Update Algorithm
+		  * \param[in,out] g_l derivative at first endpoint, \f$ g_l \f$ in Moore-Thuente (1994), \f$ \psi'(\alpha_l) \f$ for Update Algorithm and \f$ \phi'(\alpha_l) \f$ for Modified Update Algorithm
+		  * \param[in,out] a_u second endpoint of interval \f$ I \f$, \f$ \alpha_u \f$ in Moore-Thuente (1994)
+		  * \param[in,out] f_u value at second endpoint, \f$ f_u \f$ in Moore-Thuente (1994), \f$ \psi(\alpha_u) \f$ for Update Algorithm and \f$ \phi(\alpha_u) \f$ for Modified Update Algorithm
+		  * \param[in,out] g_u derivative at second endpoint, \f$ g_u \f$ in Moore-Thuente (1994), \f$ \psi'(\alpha_u) \f$ for Update Algorithm and \f$ \phi'(\alpha_u) \f$ for Modified Update Algorithm
+		  * \param[in] a_t trial value, \f$ \alpha_t \f$ in Moore-Thuente (1994)
+		  * \param[in] f_t value at trial value, \f$ f_t \f$ in Moore-Thuente (1994), \f$ \psi(\alpha_t) \f$ for Update Algorithm and \f$ \phi(\alpha_t) \f$ for Modified Update Algorithm
+		  * \param[in] g_t derivative at trial value, \f$ g_t \f$ in Moore-Thuente (1994), \f$ \psi'(\alpha_t) \f$ for Update Algorithm and \f$ \phi'(\alpha_t) \f$ for Modified Update Algorithm
+		  * \return if interval converges
+		  */
+		bool
+			updateIntervalMT(double &a_l, double &f_l, double &g_l,
+				double &a_u, double &f_u, double &g_u,
+				double a_t, double f_t, double g_t);
+
+		/** \brief Select new trial value for More-Thuente method.
+		  * \note Trial Value Selection [More, Thuente 1994], \f$ \psi(\alpha_k) \f$ is used for \f$ f_k \f$ and \f$ g_k \f$
+		  * until some value satisfies the test \f$ \psi(\alpha_k) \leq 0 \f$ and \f$ \phi'(\alpha_k) \geq 0 \f$
+		  * then \f$ \phi(\alpha_k) \f$ is used from then on.
+		  * \note Interpolation Minimizer equations from Optimization Theory and Methods: Nonlinear Programming By Wenyu Sun, Ya-xiang Yuan (89-100).
+		  * \param[in] a_l first endpoint of interval \f$ I \f$, \f$ \alpha_l \f$ in Moore-Thuente (1994)
+		  * \param[in] f_l value at first endpoint, \f$ f_l \f$ in Moore-Thuente (1994)
+		  * \param[in] g_l derivative at first endpoint, \f$ g_l \f$ in Moore-Thuente (1994)
+		  * \param[in] a_u second endpoint of interval \f$ I \f$, \f$ \alpha_u \f$ in Moore-Thuente (1994)
+		  * \param[in] f_u value at second endpoint, \f$ f_u \f$ in Moore-Thuente (1994)
+		  * \param[in] g_u derivative at second endpoint, \f$ g_u \f$ in Moore-Thuente (1994)
+		  * \param[in] a_t previous trial value, \f$ \alpha_t \f$ in Moore-Thuente (1994)
+		  * \param[in] f_t value at previous trial value, \f$ f_t \f$ in Moore-Thuente (1994)
+		  * \param[in] g_t derivative at previous trial value, \f$ g_t \f$ in Moore-Thuente (1994)
+		  * \return new trial value
+		  */
+		double
+			trialValueSelectionMT(double a_l, double f_l, double g_l,
+				double a_u, double f_u, double g_u,
+				double a_t, double f_t, double g_t);
+
+		/** \brief Auxiliary function used to determine endpoints of More-Thuente interval.
+		  * \note \f$ \psi(\alpha) \f$ in Equation 1.6 (Moore, Thuente 1994)
+		  * \param[in] a the step length, \f$ \alpha \f$ in More-Thuente (1994)
+		  * \param[in] f_a function value at step length a, \f$ \phi(\alpha) \f$ in More-Thuente (1994)
+		  * \param[in] f_0 initial function value, \f$ \phi(0) \f$ in Moore-Thuente (1994)
+		  * \param[in] g_0 initial function gradiant, \f$ \phi'(0) \f$ in More-Thuente (1994)
+		  * \param[in] mu the step length, constant \f$ \mu \f$ in Equation 1.1 [More, Thuente 1994]
+		  * \return sufficient decrease value
+		  */
+		inline double
+			auxiliaryFunction_PsiMT(double a, double f_a, double f_0, double g_0, double mu = 1.e-4)
+		{
+			return (f_a - f_0 - mu * g_0 * a);
+		}
+
+		/** \brief Auxiliary function derivative used to determine endpoints of More-Thuente interval.
+		  * \note \f$ \psi'(\alpha) \f$, derivative of Equation 1.6 (Moore, Thuente 1994)
+		  * \param[in] g_a function gradient at step length a, \f$ \phi'(\alpha) \f$ in More-Thuente (1994)
+		  * \param[in] g_0 initial function gradiant, \f$ \phi'(0) \f$ in More-Thuente (1994)
+		  * \param[in] mu the step length, constant \f$ \mu \f$ in Equation 1.1 [More, Thuente 1994]
+		  * \return sufficient decrease derivative
+		  */
+		inline double
+			auxiliaryFunction_dPsiMT(double g_a, double g_0, double mu = 1.e-4)
+		{
+			return (g_a - mu * g_0);
+		}
+
+		/** \brief The voxel grid generated from target cloud containing point means and covariances. */
+		TargetGrid target_cells_;
+
+		//double fitness_epsilon_;
+
+		/** \brief The side length of voxels. */
+		float resolution_;
+
+		/** \brief The maximum step length. */
+		double step_size_;
+
+		/** \brief The ratio of outliers of points w.r.t. a normal distribution, Equation 6.7 [Magnusson 2009]. */
+		double outlier_ratio_;
+
+		/** \brief The normalization constants used fit the point distribution to a normal distribution, Equation 6.8 [Magnusson 2009]. */
+		double gauss_d1_, gauss_d2_, gauss_d3_;
+
+		/** \brief The probability score of the transform applied to the input cloud, Equation 6.9 and 6.10 [Magnusson 2009]. */
+		double trans_probability_;
+
+		/** \brief Precomputed Angular Gradient
+		  *
+		  * The precomputed angular derivatives for the jacobian of a transformation vector, Equation 6.19 [Magnusson 2009].
+		  */
+		Eigen::Vector3d j_ang_a_, j_ang_b_, j_ang_c_, j_ang_d_, j_ang_e_, j_ang_f_, j_ang_g_, j_ang_h_;
+
+		Eigen::Matrix<float, 8, 4> j_ang;
+
+		/** \brief Precomputed Angular Hessian
+		  *
+		  * The precomputed angular derivatives for the hessian of a transformation vector, Equation 6.19 [Magnusson 2009].
+		  */
+		Eigen::Vector3d h_ang_a2_, h_ang_a3_,
+			h_ang_b2_, h_ang_b3_,
+			h_ang_c2_, h_ang_c3_,
+			h_ang_d1_, h_ang_d2_, h_ang_d3_,
+			h_ang_e1_, h_ang_e2_, h_ang_e3_,
+			h_ang_f1_, h_ang_f2_, h_ang_f3_;
+
+		Eigen::Matrix<float, 16, 4> h_ang;
+
+		/** \brief The first order derivative of the transformation of a point w.r.t. the transform vector, \f$ J_E \f$ in Equation 6.18 [Magnusson 2009]. */
+  //      Eigen::Matrix<double, 3, 6> point_gradient_;
+
+		/** \brief The second order derivative of the transformation of a point w.r.t. the transform vector, \f$ H_E \f$ in Equation 6.20 [Magnusson 2009]. */
+  //      Eigen::Matrix<double, 18, 6> point_hessian_;
+
+    int num_threads_;
+
+	public:
+		NeighborSearchMethod search_method;
+
+		EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+	};
+
+}
+
+#endif // PCL_REGISTRATION_NDT_H_

+ 557 - 0
src/detection/detection_ndt_pclomp/pclomp/pclomp/voxel_grid_covariance_omp.h

@@ -0,0 +1,557 @@
+/*
+ * Software License Agreement (BSD License)
+ *
+ *  Point Cloud Library (PCL) - www.pointclouds.org
+ *  Copyright (c) 2010-2011, Willow Garage, Inc.
+ *
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the copyright holder(s) nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef PCL_VOXEL_GRID_COVARIANCE_OMP_H_
+#define PCL_VOXEL_GRID_COVARIANCE_OMP_H_
+
+#include <pcl/pcl_macros.h>
+#include <pcl/filters/boost.h>
+#include <pcl/filters/voxel_grid.h>
+#include <map>
+#include <unordered_map>
+#include <pcl/point_types.h>
+#include <pcl/kdtree/kdtree_flann.h>
+
+namespace pclomp
+{
+  /** \brief A searchable voxel structure containing the mean and covariance of the data.
+    * \note For more information please see
+    * <b>Magnusson, M. (2009). The Three-Dimensional Normal-Distributions Transform —
+    * an Efficient Representation for Registration, Surface Analysis, and Loop Detection.
+    * PhD thesis, Orebro University. Orebro Studies in Technology 36</b>
+    * \author Brian Okorn (Space and Naval Warfare Systems Center Pacific)
+    */
+  template<typename PointT>
+  class VoxelGridCovariance : public pcl::VoxelGrid<PointT>
+  {
+    protected:
+      using pcl::VoxelGrid<PointT>::filter_name_;
+      using pcl::VoxelGrid<PointT>::getClassName;
+      using pcl::VoxelGrid<PointT>::input_;
+      using pcl::VoxelGrid<PointT>::indices_;
+      using pcl::VoxelGrid<PointT>::filter_limit_negative_;
+      using pcl::VoxelGrid<PointT>::filter_limit_min_;
+      using pcl::VoxelGrid<PointT>::filter_limit_max_;
+      using pcl::VoxelGrid<PointT>::filter_field_name_;
+
+      using pcl::VoxelGrid<PointT>::downsample_all_data_;
+      using pcl::VoxelGrid<PointT>::leaf_layout_;
+      using pcl::VoxelGrid<PointT>::save_leaf_layout_;
+      using pcl::VoxelGrid<PointT>::leaf_size_;
+      using pcl::VoxelGrid<PointT>::min_b_;
+      using pcl::VoxelGrid<PointT>::max_b_;
+      using pcl::VoxelGrid<PointT>::inverse_leaf_size_;
+      using pcl::VoxelGrid<PointT>::div_b_;
+      using pcl::VoxelGrid<PointT>::divb_mul_;
+
+      typedef typename pcl::traits::fieldList<PointT>::type FieldList;
+      typedef typename pcl::Filter<PointT>::PointCloud PointCloud;
+      typedef typename PointCloud::Ptr PointCloudPtr;
+      typedef typename PointCloud::ConstPtr PointCloudConstPtr;
+
+    public:
+
+#if PCL_VERSION >= PCL_VERSION_CALC(1, 10, 0)
+      typedef pcl::shared_ptr< pcl::VoxelGrid<PointT> > Ptr;
+      typedef pcl::shared_ptr< const pcl::VoxelGrid<PointT> > ConstPtr;
+#else
+      typedef boost::shared_ptr< pcl::VoxelGrid<PointT> > Ptr;
+      typedef boost::shared_ptr< const pcl::VoxelGrid<PointT> > ConstPtr;
+#endif
+
+      /** \brief Simple structure to hold a centroid, covariance and the number of points in a leaf.
+        * Inverse covariance, eigen vectors and eigen values are precomputed. */
+      struct Leaf
+      {
+        /** \brief Constructor.
+         * Sets \ref nr_points, \ref icov_, \ref mean_ and \ref evals_ to 0 and \ref cov_ and \ref evecs_ to the identity matrix
+         */
+        Leaf () :
+          nr_points (0),
+          mean_ (Eigen::Vector3d::Zero ()),
+          centroid (),
+          cov_ (Eigen::Matrix3d::Identity ()),
+          icov_ (Eigen::Matrix3d::Zero ()),
+          evecs_ (Eigen::Matrix3d::Identity ()),
+          evals_ (Eigen::Vector3d::Zero ())
+        {
+        }
+
+        /** \brief Get the voxel covariance.
+          * \return covariance matrix
+          */
+        Eigen::Matrix3d
+        getCov () const
+        {
+          return (cov_);
+        }
+
+        /** \brief Get the inverse of the voxel covariance.
+          * \return inverse covariance matrix
+          */
+        Eigen::Matrix3d
+        getInverseCov () const
+        {
+          return (icov_);
+        }
+
+        /** \brief Get the voxel centroid.
+          * \return centroid
+          */
+        Eigen::Vector3d
+        getMean () const
+        {
+          return (mean_);
+        }
+
+        /** \brief Get the eigen vectors of the voxel covariance.
+          * \note Order corresponds with \ref getEvals
+          * \return matrix whose columns contain eigen vectors
+          */
+        Eigen::Matrix3d
+        getEvecs () const
+        {
+          return (evecs_);
+        }
+
+        /** \brief Get the eigen values of the voxel covariance.
+          * \note Order corresponds with \ref getEvecs
+          * \return vector of eigen values
+          */
+        Eigen::Vector3d
+        getEvals () const
+        {
+          return (evals_);
+        }
+
+        /** \brief Get the number of points contained by this voxel.
+          * \return number of points
+          */
+        int
+        getPointCount () const
+        {
+          return (nr_points);
+        }
+
+        /** \brief Number of points contained by voxel */
+        int nr_points;
+
+        /** \brief 3D voxel centroid */
+        Eigen::Vector3d mean_;
+
+        /** \brief Nd voxel centroid
+         * \note Differs from \ref mean_ when color data is used
+         */
+        Eigen::VectorXf centroid;
+
+        /** \brief Voxel covariance matrix */
+        Eigen::Matrix3d cov_;
+
+        /** \brief Inverse of voxel covariance matrix */
+        Eigen::Matrix3d icov_;
+
+        /** \brief Eigen vectors of voxel covariance matrix */
+        Eigen::Matrix3d evecs_;
+
+        /** \brief Eigen values of voxel covariance matrix */
+        Eigen::Vector3d evals_;
+
+      };
+
+      /** \brief Pointer to VoxelGridCovariance leaf structure */
+      typedef Leaf* LeafPtr;
+
+      /** \brief Const pointer to VoxelGridCovariance leaf structure */
+      typedef const Leaf* LeafConstPtr;
+
+    typedef std::map<size_t, Leaf> Map;
+
+    public:
+
+      /** \brief Constructor.
+       * Sets \ref leaf_size_ to 0 and \ref searchable_ to false.
+       */
+      VoxelGridCovariance () :
+        searchable_ (true),
+        min_points_per_voxel_ (6),
+        min_covar_eigvalue_mult_ (0.01),
+        leaves_ (),
+        voxel_centroids_ (),
+        voxel_centroids_leaf_indices_ (),
+        kdtree_ ()
+      {
+        downsample_all_data_ = false;
+        save_leaf_layout_ = false;
+        leaf_size_.setZero ();
+        min_b_.setZero ();
+        max_b_.setZero ();
+        filter_name_ = "VoxelGridCovariance";
+      }
+
+      /** \brief Set the minimum number of points required for a cell to be used (must be 3 or greater for covariance calculation).
+        * \param[in] min_points_per_voxel the minimum number of points for required for a voxel to be used
+        */
+      inline void
+      setMinPointPerVoxel (int min_points_per_voxel)
+      {
+        if(min_points_per_voxel > 2)
+        {
+          min_points_per_voxel_ = min_points_per_voxel;
+        }
+        else
+        {
+          PCL_WARN ("%s: Covariance calculation requires at least 3 points, setting Min Point per Voxel to 3 ", this->getClassName ().c_str ());
+          min_points_per_voxel_ = 3;
+        }
+      }
+
+      /** \brief Get the minimum number of points required for a cell to be used.
+        * \return the minimum number of points for required for a voxel to be used
+        */
+      inline int
+      getMinPointPerVoxel ()
+      {
+        return min_points_per_voxel_;
+      }
+
+      /** \brief Set the minimum allowable ratio between eigenvalues to prevent singular covariance matrices.
+        * \param[in] min_covar_eigvalue_mult the minimum allowable ratio between eigenvalues
+        */
+      inline void
+      setCovEigValueInflationRatio (double min_covar_eigvalue_mult)
+      {
+        min_covar_eigvalue_mult_ = min_covar_eigvalue_mult;
+      }
+
+      /** \brief Get the minimum allowable ratio between eigenvalues to prevent singular covariance matrices.
+        * \return the minimum allowable ratio between eigenvalues
+        */
+      inline double
+      getCovEigValueInflationRatio ()
+      {
+        return min_covar_eigvalue_mult_;
+      }
+
+      /** \brief Filter cloud and initializes voxel structure.
+       * \param[out] output cloud containing centroids of voxels containing a sufficient number of points
+       * \param[in] searchable flag if voxel structure is searchable, if true then kdtree is built
+       */
+      inline void
+      filter (PointCloud &output, bool searchable = false)
+      {
+        searchable_ = searchable;
+        applyFilter (output);
+
+        voxel_centroids_ = PointCloudPtr (new PointCloud (output));
+
+        if (searchable_ && voxel_centroids_->size() > 0)
+        {
+          // Initiates kdtree of the centroids of voxels containing a sufficient number of points
+          kdtree_.setInputCloud (voxel_centroids_);
+        }
+      }
+
+      /** \brief Initializes voxel structure.
+       * \param[in] searchable flag if voxel structure is searchable, if true then kdtree is built
+       */
+      inline void
+      filter (bool searchable = false)
+      {
+        searchable_ = searchable;
+        voxel_centroids_ = PointCloudPtr (new PointCloud);
+        applyFilter (*voxel_centroids_);
+
+        if (searchable_ && voxel_centroids_->size() > 0)
+        {
+          // Initiates kdtree of the centroids of voxels containing a sufficient number of points
+          kdtree_.setInputCloud (voxel_centroids_);
+        }
+      }
+
+      /** \brief Get the voxel containing point p.
+       * \param[in] index the index of the leaf structure node
+       * \return const pointer to leaf structure
+       */
+      inline LeafConstPtr
+      getLeaf (int index)
+      {
+        auto leaf_iter = leaves_.find (index);
+        if (leaf_iter != leaves_.end ())
+        {
+          LeafConstPtr ret (&(leaf_iter->second));
+          return ret;
+        }
+        else
+          return NULL;
+      }
+
+      /** \brief Get the voxel containing point p.
+       * \param[in] p the point to get the leaf structure at
+       * \return const pointer to leaf structure
+       */
+      inline LeafConstPtr
+      getLeaf (PointT &p)
+      {
+        // Generate index associated with p
+        int ijk0 = static_cast<int> (floor (p.x * inverse_leaf_size_[0]) - min_b_[0]);
+        int ijk1 = static_cast<int> (floor (p.y * inverse_leaf_size_[1]) - min_b_[1]);
+        int ijk2 = static_cast<int> (floor (p.z * inverse_leaf_size_[2]) - min_b_[2]);
+
+        // Compute the centroid leaf index
+        int idx = ijk0 * divb_mul_[0] + ijk1 * divb_mul_[1] + ijk2 * divb_mul_[2];
+
+        // Find leaf associated with index
+        auto leaf_iter = leaves_.find (idx);
+        if (leaf_iter != leaves_.end ())
+        {
+          // If such a leaf exists return the pointer to the leaf structure
+          LeafConstPtr ret (&(leaf_iter->second));
+          return ret;
+        }
+        else
+          return NULL;
+      }
+
+      /** \brief Get the voxel containing point p.
+       * \param[in] p the point to get the leaf structure at
+       * \return const pointer to leaf structure
+       */
+      inline LeafConstPtr
+      getLeaf (Eigen::Vector3f &p)
+      {
+        // Generate index associated with p
+        int ijk0 = static_cast<int> (floor (p[0] * inverse_leaf_size_[0]) - min_b_[0]);
+        int ijk1 = static_cast<int> (floor (p[1] * inverse_leaf_size_[1]) - min_b_[1]);
+        int ijk2 = static_cast<int> (floor (p[2] * inverse_leaf_size_[2]) - min_b_[2]);
+
+        // Compute the centroid leaf index
+        int idx = ijk0 * divb_mul_[0] + ijk1 * divb_mul_[1] + ijk2 * divb_mul_[2];
+
+        // Find leaf associated with index
+        auto leaf_iter = leaves_.find (idx);
+        if (leaf_iter != leaves_.end ())
+        {
+          // If such a leaf exists return the pointer to the leaf structure
+          LeafConstPtr ret (&(leaf_iter->second));
+          return ret;
+        }
+        else
+          return NULL;
+
+      }
+
+      /** \brief Get the voxels surrounding point p, not including the voxel containing point p.
+       * \note Only voxels containing a sufficient number of points are used (slower than radius search in practice).
+       * \param[in] reference_point the point to get the leaf structure at
+       * \param[out] neighbors
+       * \return number of neighbors found
+       */
+	  int getNeighborhoodAtPoint(const Eigen::MatrixXi&, const PointT& reference_point, std::vector<LeafConstPtr> &neighbors) const ;
+	  int getNeighborhoodAtPoint(const PointT& reference_point, std::vector<LeafConstPtr> &neighbors) const ;
+	  int getNeighborhoodAtPoint7(const PointT& reference_point, std::vector<LeafConstPtr> &neighbors) const ;
+	  int getNeighborhoodAtPoint1(const PointT& reference_point, std::vector<LeafConstPtr> &neighbors) const ;
+
+      /** \brief Get the leaf structure map
+       * \return a map containing all leaves
+       */
+      inline const Map&
+      getLeaves ()
+      {
+        return leaves_;
+      }
+
+      /** \brief Get a pointcloud containing the voxel centroids
+       * \note Only voxels containing a sufficient number of points are used.
+       * \return a map containing all leaves
+       */
+      inline PointCloudPtr
+      getCentroids ()
+      {
+        return voxel_centroids_;
+      }
+
+
+      /** \brief Get a cloud to visualize each voxels normal distribution.
+       * \param[out] cell_cloud a cloud created by sampling the normal distributions of each voxel
+       */
+      void
+      getDisplayCloud (pcl::PointCloud<pcl::PointXYZ>& cell_cloud);
+
+      /** \brief Search for the k-nearest occupied voxels for the given query point.
+       * \note Only voxels containing a sufficient number of points are used.
+       * \param[in] point the given query point
+       * \param[in] k the number of neighbors to search for
+       * \param[out] k_leaves the resultant leaves of the neighboring points
+       * \param[out] k_sqr_distances the resultant squared distances to the neighboring points
+       * \return number of neighbors found
+       */
+      int
+      nearestKSearch (const PointT &point, int k,
+                      std::vector<LeafConstPtr> &k_leaves, std::vector<float> &k_sqr_distances)
+      {
+        k_leaves.clear ();
+
+        // Check if kdtree has been built
+        if (!searchable_)
+        {
+          PCL_WARN ("%s: Not Searchable", this->getClassName ().c_str ());
+          return 0;
+        }
+
+        // Find k-nearest neighbors in the occupied voxel centroid cloud
+        std::vector<int> k_indices;
+        k = kdtree_.nearestKSearch (point, k, k_indices, k_sqr_distances);
+
+        // Find leaves corresponding to neighbors
+        k_leaves.reserve (k);
+        for (std::vector<int>::iterator iter = k_indices.begin (); iter != k_indices.end (); iter++)
+        {
+          k_leaves.push_back (&leaves_[voxel_centroids_leaf_indices_[*iter]]);
+        }
+        return k;
+      }
+
+      /** \brief Search for the k-nearest occupied voxels for the given query point.
+       * \note Only voxels containing a sufficient number of points are used.
+       * \param[in] cloud the given query point
+       * \param[in] index the index
+       * \param[in] k the number of neighbors to search for
+       * \param[out] k_leaves the resultant leaves of the neighboring points
+       * \param[out] k_sqr_distances the resultant squared distances to the neighboring points
+       * \return number of neighbors found
+       */
+      inline int
+      nearestKSearch (const PointCloud &cloud, int index, int k,
+                      std::vector<LeafConstPtr> &k_leaves, std::vector<float> &k_sqr_distances)
+      {
+        if (index >= static_cast<int> (cloud.points.size ()) || index < 0)
+          return (0);
+        return (nearestKSearch (cloud.points[index], k, k_leaves, k_sqr_distances));
+      }
+
+
+      /** \brief Search for all the nearest occupied voxels of the query point in a given radius.
+       * \note Only voxels containing a sufficient number of points are used.
+       * \param[in] point the given query point
+       * \param[in] radius the radius of the sphere bounding all of p_q's neighbors
+       * \param[out] k_leaves the resultant leaves of the neighboring points
+       * \param[out] k_sqr_distances the resultant squared distances to the neighboring points
+       * \param[in] max_nn
+       * \return number of neighbors found
+       */
+      int
+      radiusSearch (const PointT &point, double radius, std::vector<LeafConstPtr> &k_leaves,
+                    std::vector<float> &k_sqr_distances, unsigned int max_nn = 0) const
+      {
+        k_leaves.clear ();
+
+        // Check if kdtree has been built
+        if (!searchable_)
+        {
+          PCL_WARN ("%s: Not Searchable", this->getClassName ().c_str ());
+          return 0;
+        }
+
+        // Find neighbors within radius in the occupied voxel centroid cloud
+        std::vector<int> k_indices;
+        int k = kdtree_.radiusSearch (point, radius, k_indices, k_sqr_distances, max_nn);
+
+        // Find leaves corresponding to neighbors
+        k_leaves.reserve (k);
+        for (std::vector<int>::iterator iter = k_indices.begin (); iter != k_indices.end (); iter++)
+        {
+		  auto leaf = leaves_.find(voxel_centroids_leaf_indices_[*iter]);
+		  if (leaf == leaves_.end()) {
+			  std::cerr << "error : could not find the leaf corresponding to the voxel" << std::endl;
+			  std::cin.ignore(1);
+		  }
+          k_leaves.push_back (&(leaf->second));
+        }
+        return k;
+      }
+
+      /** \brief Search for all the nearest occupied voxels of the query point in a given radius.
+       * \note Only voxels containing a sufficient number of points are used.
+       * \param[in] cloud the given query point
+       * \param[in] index a valid index in cloud representing a valid (i.e., finite) query point
+       * \param[in] radius the radius of the sphere bounding all of p_q's neighbors
+       * \param[out] k_leaves the resultant leaves of the neighboring points
+       * \param[out] k_sqr_distances the resultant squared distances to the neighboring points
+       * \param[in] max_nn
+       * \return number of neighbors found
+       */
+      inline int
+      radiusSearch (const PointCloud &cloud, int index, double radius,
+                    std::vector<LeafConstPtr> &k_leaves, std::vector<float> &k_sqr_distances,
+                    unsigned int max_nn = 0) const
+      {
+        if (index >= static_cast<int> (cloud.points.size ()) || index < 0)
+          return (0);
+        return (radiusSearch (cloud.points[index], radius, k_leaves, k_sqr_distances, max_nn));
+      }
+
+    protected:
+
+      /** \brief Filter cloud and initializes voxel structure.
+       * \param[out] output cloud containing centroids of voxels containing a sufficient number of points
+       */
+      void applyFilter (PointCloud &output);
+
+      /** \brief Flag to determine if voxel structure is searchable. */
+      bool searchable_;
+
+      /** \brief Minimum points contained with in a voxel to allow it to be usable. */
+      int min_points_per_voxel_;
+
+      /** \brief Minimum allowable ratio between eigenvalues to prevent singular covariance matrices. */
+      double min_covar_eigvalue_mult_;
+
+      /** \brief Voxel structure containing all leaf nodes (includes voxels with less than a sufficient number of points). */
+	  Map leaves_;
+
+      /** \brief Point cloud containing centroids of voxels containing at least minimum number of points. */
+      PointCloudPtr voxel_centroids_;
+
+      /** \brief Indices of leaf structures associated with each point in \ref voxel_centroids_ (used for searching). */
+      std::vector<int> voxel_centroids_leaf_indices_;
+
+      /** \brief KdTree generated using \ref voxel_centroids_ (used for searching). */
+      pcl::KdTreeFLANN<PointT> kdtree_;
+  };
+}
+
+#endif  //#ifndef PCL_VOXEL_GRID_COVARIANCE_H_

+ 5 - 0
src/detection/detection_ndt_pclomp/pclomp/voxel_grid_covariance_omp.cpp

@@ -0,0 +1,5 @@
+#include <pclomp/voxel_grid_covariance_omp.h>
+#include <voxel_grid_covariance_omp_impl.hpp>
+
+template class pclomp::VoxelGridCovariance<pcl::PointXYZ>;
+template class pclomp::VoxelGridCovariance<pcl::PointXYZI>;

+ 487 - 0
src/detection/detection_ndt_pclomp/pclomp/voxel_grid_covariance_omp_impl.hpp

@@ -0,0 +1,487 @@
+/*
+ * Software License Agreement (BSD License)
+ *
+ *  Point Cloud Library (PCL) - www.pointclouds.org
+ *  Copyright (c) 2010-2011, Willow Garage, Inc.
+ *
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the copyright holder(s) nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef PCL_VOXEL_GRID_COVARIANCE_IMPL_OMP_H_
+#define PCL_VOXEL_GRID_COVARIANCE_IMPL_OMP_H_
+
+#include <pcl/common/common.h>
+#include <pcl/filters/boost.h>
+#include <pclomp/voxel_grid_covariance_omp.h>
+#include <Eigen/Dense>
+#include <Eigen/Cholesky>
+
+//////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointT> void
+pclomp::VoxelGridCovariance<PointT>::applyFilter (PointCloud &output)
+{
+  voxel_centroids_leaf_indices_.clear ();
+
+  // Has the input dataset been set already?
+  if (!input_)
+  {
+    PCL_WARN ("[pcl::%s::applyFilter] No input dataset given!\n", getClassName ().c_str ());
+    output.width = output.height = 0;
+    output.points.clear ();
+    return;
+  }
+
+  // Copy the header (and thus the frame_id) + allocate enough space for points
+  output.height = 1;                          // downsampling breaks the organized structure
+  output.is_dense = true;                     // we filter out invalid points
+  output.points.clear ();
+
+  Eigen::Vector4f min_p, max_p;
+  // Get the minimum and maximum dimensions
+  if (!filter_field_name_.empty ()) // If we don't want to process the entire cloud...
+      pcl::getMinMax3D<PointT> (input_, filter_field_name_, static_cast<float> (filter_limit_min_), static_cast<float> (filter_limit_max_), min_p, max_p, filter_limit_negative_);
+  else
+	  pcl::getMinMax3D<PointT> (*input_, min_p, max_p);
+
+  // Check that the leaf size is not too small, given the size of the data
+  int64_t dx = static_cast<int64_t>((max_p[0] - min_p[0]) * inverse_leaf_size_[0])+1;
+  int64_t dy = static_cast<int64_t>((max_p[1] - min_p[1]) * inverse_leaf_size_[1])+1;
+  int64_t dz = static_cast<int64_t>((max_p[2] - min_p[2]) * inverse_leaf_size_[2])+1;
+
+  if((dx*dy*dz) > std::numeric_limits<int32_t>::max())
+  {
+    PCL_WARN("[pcl::%s::applyFilter] Leaf size is too small for the input dataset. Integer indices would overflow.", getClassName().c_str());
+    output.clear();
+    return;
+  }
+
+  // Compute the minimum and maximum bounding box values
+  min_b_[0] = static_cast<int> (floor (min_p[0] * inverse_leaf_size_[0]));
+  max_b_[0] = static_cast<int> (floor (max_p[0] * inverse_leaf_size_[0]));
+  min_b_[1] = static_cast<int> (floor (min_p[1] * inverse_leaf_size_[1]));
+  max_b_[1] = static_cast<int> (floor (max_p[1] * inverse_leaf_size_[1]));
+  min_b_[2] = static_cast<int> (floor (min_p[2] * inverse_leaf_size_[2]));
+  max_b_[2] = static_cast<int> (floor (max_p[2] * inverse_leaf_size_[2]));
+
+  // Compute the number of divisions needed along all axis
+  div_b_ = max_b_ - min_b_ + Eigen::Vector4i::Ones ();
+  div_b_[3] = 0;
+
+  // Clear the leaves
+  leaves_.clear ();
+//  leaves_.reserve(8192);
+
+  // Set up the division multiplier
+  divb_mul_ = Eigen::Vector4i (1, div_b_[0], div_b_[0] * div_b_[1], 0);
+
+  int centroid_size = 4;
+
+  if (downsample_all_data_)
+    centroid_size = boost::mpl::size<FieldList>::value;
+
+  // ---[ RGB special case
+  std::vector<pcl::PCLPointField> fields;
+  int rgba_index = -1;
+  rgba_index = pcl::getFieldIndex<PointT> ("rgb", fields);
+  if (rgba_index == -1)
+    rgba_index = pcl::getFieldIndex<PointT> ("rgba", fields);
+  if (rgba_index >= 0)
+  {
+    rgba_index = fields[rgba_index].offset;
+    centroid_size += 3;
+  }
+
+  // If we don't want to process the entire cloud, but rather filter points far away from the viewpoint first...
+  if (!filter_field_name_.empty ())
+  {
+    // Get the distance field index
+    std::vector<pcl::PCLPointField> fields;
+    int distance_idx = pcl::getFieldIndex<PointT> (filter_field_name_, fields);
+    if (distance_idx == -1)
+      PCL_WARN ("[pcl::%s::applyFilter] Invalid filter field name. Index is %d.\n", getClassName ().c_str (), distance_idx);
+
+    // First pass: go over all points and insert them into the right leaf
+    for (size_t cp = 0; cp < input_->points.size (); ++cp)
+    {
+      if (!input_->is_dense)
+        // Check if the point is invalid
+        if (!std::isfinite (input_->points[cp].x) ||
+            !std::isfinite (input_->points[cp].y) ||
+            !std::isfinite (input_->points[cp].z))
+          continue;
+
+      // Get the distance value
+      const uint8_t* pt_data = reinterpret_cast<const uint8_t*> (&input_->points[cp]);
+      float distance_value = 0;
+      memcpy (&distance_value, pt_data + fields[distance_idx].offset, sizeof (float));
+
+      if (filter_limit_negative_)
+      {
+        // Use a threshold for cutting out points which inside the interval
+        if ((distance_value < filter_limit_max_) && (distance_value > filter_limit_min_))
+          continue;
+      }
+      else
+      {
+        // Use a threshold for cutting out points which are too close/far away
+        if ((distance_value > filter_limit_max_) || (distance_value < filter_limit_min_))
+          continue;
+      }
+
+      int ijk0 = static_cast<int> (floor (input_->points[cp].x * inverse_leaf_size_[0]) - static_cast<float> (min_b_[0]));
+      int ijk1 = static_cast<int> (floor (input_->points[cp].y * inverse_leaf_size_[1]) - static_cast<float> (min_b_[1]));
+      int ijk2 = static_cast<int> (floor (input_->points[cp].z * inverse_leaf_size_[2]) - static_cast<float> (min_b_[2]));
+
+      // Compute the centroid leaf index
+      int idx = ijk0 * divb_mul_[0] + ijk1 * divb_mul_[1] + ijk2 * divb_mul_[2];
+
+      Leaf& leaf = leaves_[idx];
+      if (leaf.nr_points == 0)
+      {
+        leaf.centroid.resize (centroid_size);
+        leaf.centroid.setZero ();
+      }
+
+      Eigen::Vector3d pt3d (input_->points[cp].x, input_->points[cp].y, input_->points[cp].z);
+      // Accumulate point sum for centroid calculation
+      leaf.mean_ += pt3d;
+      // Accumulate x*xT for single pass covariance calculation
+      leaf.cov_ += pt3d * pt3d.transpose ();
+
+      // Do we need to process all the fields?
+      if (!downsample_all_data_)
+      {
+        Eigen::Vector4f pt (input_->points[cp].x, input_->points[cp].y, input_->points[cp].z, 0);
+        leaf.centroid.template head<4> () += pt;
+      }
+      else
+      {
+        // Copy all the fields
+        Eigen::VectorXf centroid = Eigen::VectorXf::Zero (centroid_size);
+        // ---[ RGB special case
+        if (rgba_index >= 0)
+        {
+          // fill r/g/b data
+          int rgb;
+          memcpy (&rgb, reinterpret_cast<const char*> (&input_->points[cp]) + rgba_index, sizeof (int));
+          centroid[centroid_size - 3] = static_cast<float> ((rgb >> 16) & 0x0000ff);
+          centroid[centroid_size - 2] = static_cast<float> ((rgb >> 8) & 0x0000ff);
+          centroid[centroid_size - 1] = static_cast<float> ((rgb) & 0x0000ff);
+        }
+        pcl::for_each_type<FieldList> (pcl::NdCopyPointEigenFunctor<PointT> (input_->points[cp], centroid));
+        leaf.centroid += centroid;
+      }
+      ++leaf.nr_points;
+    }
+  }
+  // No distance filtering, process all data
+  else
+  {
+    // First pass: go over all points and insert them into the right leaf
+    for (size_t cp = 0; cp < input_->points.size (); ++cp)
+    {
+      if (!input_->is_dense)
+        // Check if the point is invalid
+        if (!std::isfinite (input_->points[cp].x) ||
+            !std::isfinite (input_->points[cp].y) ||
+            !std::isfinite (input_->points[cp].z))
+          continue;
+
+      int ijk0 = static_cast<int> (floor (input_->points[cp].x * inverse_leaf_size_[0]) - static_cast<float> (min_b_[0]));
+      int ijk1 = static_cast<int> (floor (input_->points[cp].y * inverse_leaf_size_[1]) - static_cast<float> (min_b_[1]));
+      int ijk2 = static_cast<int> (floor (input_->points[cp].z * inverse_leaf_size_[2]) - static_cast<float> (min_b_[2]));
+
+      // Compute the centroid leaf index
+      int idx = ijk0 * divb_mul_[0] + ijk1 * divb_mul_[1] + ijk2 * divb_mul_[2];
+
+      //int idx = (((input_->points[cp].getArray4fmap () * inverse_leaf_size_).template cast<int> ()).matrix () - min_b_).dot (divb_mul_);
+      Leaf& leaf = leaves_[idx];
+      if (leaf.nr_points == 0)
+      {
+        leaf.centroid.resize (centroid_size);
+        leaf.centroid.setZero ();
+      }
+
+      Eigen::Vector3d pt3d (input_->points[cp].x, input_->points[cp].y, input_->points[cp].z);
+      // Accumulate point sum for centroid calculation
+      leaf.mean_ += pt3d;
+      // Accumulate x*xT for single pass covariance calculation
+      leaf.cov_ += pt3d * pt3d.transpose ();
+
+      // Do we need to process all the fields?
+      if (!downsample_all_data_)
+      {
+        Eigen::Vector4f pt (input_->points[cp].x, input_->points[cp].y, input_->points[cp].z, 0);
+        leaf.centroid.template head<4> () += pt;
+      }
+      else
+      {
+        // Copy all the fields
+        Eigen::VectorXf centroid = Eigen::VectorXf::Zero (centroid_size);
+        // ---[ RGB special case
+        if (rgba_index >= 0)
+        {
+          // Fill r/g/b data, assuming that the order is BGRA
+          int rgb;
+          memcpy (&rgb, reinterpret_cast<const char*> (&input_->points[cp]) + rgba_index, sizeof (int));
+          centroid[centroid_size - 3] = static_cast<float> ((rgb >> 16) & 0x0000ff);
+          centroid[centroid_size - 2] = static_cast<float> ((rgb >> 8) & 0x0000ff);
+          centroid[centroid_size - 1] = static_cast<float> ((rgb) & 0x0000ff);
+        }
+        pcl::for_each_type<FieldList> (pcl::NdCopyPointEigenFunctor<PointT> (input_->points[cp], centroid));
+        leaf.centroid += centroid;
+      }
+      ++leaf.nr_points;
+    }
+  }
+
+  // Second pass: go over all leaves and compute centroids and covariance matrices
+  output.points.reserve (leaves_.size ());
+  if (searchable_)
+    voxel_centroids_leaf_indices_.reserve (leaves_.size ());
+  int cp = 0;
+  if (save_leaf_layout_)
+    leaf_layout_.resize (div_b_[0] * div_b_[1] * div_b_[2], -1);
+
+  // Eigen values and vectors calculated to prevent near singular matrices
+  Eigen::SelfAdjointEigenSolver<Eigen::Matrix3d> eigensolver;
+  Eigen::Matrix3d eigen_val;
+  Eigen::Vector3d pt_sum;
+
+  // Eigen values less than a threshold of max eigen value are inflated to a set fraction of the max eigen value.
+  double min_covar_eigvalue;
+
+  for (auto it = leaves_.begin (); it != leaves_.end (); ++it)
+  {
+
+    // Normalize the centroid
+    Leaf& leaf = it->second;
+
+    // Normalize the centroid
+    leaf.centroid /= static_cast<float> (leaf.nr_points);
+    // Point sum used for single pass covariance calculation
+    pt_sum = leaf.mean_;
+    // Normalize mean
+    leaf.mean_ /= leaf.nr_points;
+
+    // If the voxel contains sufficient points, its covariance is calculated and is added to the voxel centroids and output clouds.
+    // Points with less than the minimum points will have a can not be accurately approximated using a normal distribution.
+    if (leaf.nr_points >= min_points_per_voxel_)
+    {
+      if (save_leaf_layout_)
+        leaf_layout_[it->first] = cp++;
+
+      output.push_back (PointT ());
+
+      // Do we need to process all the fields?
+      if (!downsample_all_data_)
+      {
+        output.points.back ().x = leaf.centroid[0];
+        output.points.back ().y = leaf.centroid[1];
+        output.points.back ().z = leaf.centroid[2];
+      }
+      else
+      {
+        pcl::for_each_type<FieldList> (pcl::NdCopyEigenPointFunctor<PointT> (leaf.centroid, output.back ()));
+        // ---[ RGB special case
+        if (rgba_index >= 0)
+        {
+          // pack r/g/b into rgb
+          float r = leaf.centroid[centroid_size - 3], g = leaf.centroid[centroid_size - 2], b = leaf.centroid[centroid_size - 1];
+          int rgb = (static_cast<int> (r)) << 16 | (static_cast<int> (g)) << 8 | (static_cast<int> (b));
+          memcpy (reinterpret_cast<char*> (&output.points.back ()) + rgba_index, &rgb, sizeof (float));
+        }
+      }
+
+      // Stores the voxel indices for fast access searching
+      if (searchable_)
+        voxel_centroids_leaf_indices_.push_back (static_cast<int> (it->first));
+
+      // Single pass covariance calculation
+      leaf.cov_ = (leaf.cov_ - 2 * (pt_sum * leaf.mean_.transpose ())) / leaf.nr_points + leaf.mean_ * leaf.mean_.transpose ();
+      leaf.cov_ *= (leaf.nr_points - 1.0) / leaf.nr_points;
+
+      //Normalize Eigen Val such that max no more than 100x min.
+      eigensolver.compute (leaf.cov_);
+      eigen_val = eigensolver.eigenvalues ().asDiagonal ();
+      leaf.evecs_ = eigensolver.eigenvectors ();
+
+      if (eigen_val (0, 0) < 0 || eigen_val (1, 1) < 0 || eigen_val (2, 2) <= 0)
+      {
+        leaf.nr_points = -1;
+        continue;
+      }
+
+      // Avoids matrices near singularities (eq 6.11)[Magnusson 2009]
+
+      min_covar_eigvalue = min_covar_eigvalue_mult_ * eigen_val (2, 2);
+      if (eigen_val (0, 0) < min_covar_eigvalue)
+      {
+        eigen_val (0, 0) = min_covar_eigvalue;
+
+        if (eigen_val (1, 1) < min_covar_eigvalue)
+        {
+          eigen_val (1, 1) = min_covar_eigvalue;
+        }
+
+        leaf.cov_ = leaf.evecs_ * eigen_val * leaf.evecs_.inverse ();
+      }
+      leaf.evals_ = eigen_val.diagonal ();
+
+      leaf.icov_ = leaf.cov_.inverse ();
+      if (leaf.icov_.maxCoeff () == std::numeric_limits<float>::infinity ( )
+          || leaf.icov_.minCoeff () == -std::numeric_limits<float>::infinity ( ) )
+      {
+        leaf.nr_points = -1;
+      }
+
+    }
+  }
+
+  output.width = static_cast<uint32_t> (output.points.size ());
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointT> int
+pclomp::VoxelGridCovariance<PointT>::getNeighborhoodAtPoint(const Eigen::MatrixXi& relative_coordinates, const PointT& reference_point, std::vector<LeafConstPtr> &neighbors) const
+{
+	neighbors.clear();
+
+	// Find displacement coordinates
+	Eigen::Vector4i ijk(static_cast<int> (floor(reference_point.x / leaf_size_[0])),
+		static_cast<int> (floor(reference_point.y / leaf_size_[1])),
+		static_cast<int> (floor(reference_point.z / leaf_size_[2])), 0);
+	Eigen::Array4i diff2min = min_b_ - ijk;
+	Eigen::Array4i diff2max = max_b_ - ijk;
+	neighbors.reserve(relative_coordinates.cols());
+
+	// Check each neighbor to see if it is occupied and contains sufficient points
+	// Slower than radius search because needs to check 26 indices
+	for (int ni = 0; ni < relative_coordinates.cols(); ni++)
+	{
+		Eigen::Vector4i displacement = (Eigen::Vector4i() << relative_coordinates.col(ni), 0).finished();
+		// Checking if the specified cell is in the grid
+		if ((diff2min <= displacement.array()).all() && (diff2max >= displacement.array()).all())
+		{
+			auto leaf_iter = leaves_.find(((ijk + displacement - min_b_).dot(divb_mul_)));
+			if (leaf_iter != leaves_.end() && leaf_iter->second.nr_points >= min_points_per_voxel_)
+			{
+				LeafConstPtr leaf = &(leaf_iter->second);
+				neighbors.push_back(leaf);
+			}
+		}
+	}
+
+	return (static_cast<int> (neighbors.size()));
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointT> int
+pclomp::VoxelGridCovariance<PointT>::getNeighborhoodAtPoint(const PointT& reference_point, std::vector<LeafConstPtr> &neighbors) const
+{
+	neighbors.clear();
+
+	// Find displacement coordinates
+	Eigen::MatrixXi relative_coordinates = pcl::getAllNeighborCellIndices();
+	return getNeighborhoodAtPoint(relative_coordinates, reference_point, neighbors);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointT> int
+pclomp::VoxelGridCovariance<PointT>::getNeighborhoodAtPoint7(const PointT& reference_point, std::vector<LeafConstPtr> &neighbors) const
+{
+	neighbors.clear();
+
+	Eigen::MatrixXi relative_coordinates(3, 7);
+	relative_coordinates.setZero();
+	relative_coordinates(0, 1) = 1;
+	relative_coordinates(0, 2) = -1;
+	relative_coordinates(1, 3) = 1;
+	relative_coordinates(1, 4) = -1;
+	relative_coordinates(2, 5) = 1;
+	relative_coordinates(2, 6) = -1;
+
+	return getNeighborhoodAtPoint(relative_coordinates, reference_point, neighbors);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointT> int
+pclomp::VoxelGridCovariance<PointT>::getNeighborhoodAtPoint1(const PointT& reference_point, std::vector<LeafConstPtr> &neighbors) const
+{
+	neighbors.clear();
+	return getNeighborhoodAtPoint(Eigen::MatrixXi::Zero(3,1), reference_point, neighbors);
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+template<typename PointT> void
+pclomp::VoxelGridCovariance<PointT>::getDisplayCloud (pcl::PointCloud<pcl::PointXYZ>& cell_cloud)
+{
+  cell_cloud.clear ();
+
+  int pnt_per_cell = 1000;
+  boost::mt19937 rng;
+  boost::normal_distribution<> nd (0.0, leaf_size_.head (3).norm ());
+  boost::variate_generator<boost::mt19937&, boost::normal_distribution<> > var_nor (rng, nd);
+
+  Eigen::LLT<Eigen::Matrix3d> llt_of_cov;
+  Eigen::Matrix3d cholesky_decomp;
+  Eigen::Vector3d cell_mean;
+  Eigen::Vector3d rand_point;
+  Eigen::Vector3d dist_point;
+
+  // Generate points for each occupied voxel with sufficient points.
+  for (auto it = leaves_.begin (); it != leaves_.end (); ++it)
+  {
+    Leaf& leaf = it->second;
+
+    if (leaf.nr_points >= min_points_per_voxel_)
+    {
+      cell_mean = leaf.mean_;
+      llt_of_cov.compute (leaf.cov_);
+      cholesky_decomp = llt_of_cov.matrixL ();
+
+      // Random points generated by sampling the normal distribution given by voxel mean and covariance matrix
+      for (int i = 0; i < pnt_per_cell; i++)
+      {
+        rand_point = Eigen::Vector3d (var_nor (), var_nor (), var_nor ());
+        dist_point = cell_mean + cholesky_decomp * rand_point;
+        cell_cloud.push_back (pcl::PointXYZ (static_cast<float> (dist_point (0)), static_cast<float> (dist_point (1)), static_cast<float> (dist_point (2))));
+      }
+    }
+  }
+}
+
+#define PCL_INSTANTIATE_VoxelGridCovariance(T) template class PCL_EXPORTS pcl::VoxelGridCovariance<T>;
+
+#endif    // PCL_VOXEL_GRID_COVARIANCE_IMPL_H_