Browse Source

add detection_localization_global_hdl. not test ok.

yuchuli 3 years ago
parent
commit
7c414fac9b
52 changed files with 7793 additions and 12 deletions
  1. 12 12
      src/detection/detection_localization_LeGO_LOAM/imageProjection.cpp
  2. 73 0
      src/detection/detection_localization_global_hdl/.gitignore
  3. 79 0
      src/detection/detection_localization_global_hdl/detection_localization_global_hdl.pro
  4. 176 0
      src/detection/detection_localization_global_hdl/hdl_global_localization/bbs/bbs_localization.cpp
  5. 125 0
      src/detection/detection_localization_global_hdl/hdl_global_localization/engines/global_localization_bbs.cpp
  6. 59 0
      src/detection/detection_localization_global_hdl/hdl_global_localization/engines/global_localization_fpfh_ransac.cpp
  7. 90 0
      src/detection/detection_localization_global_hdl/hdl_global_localization/engines/global_localization_fpfh_teaser.cpp
  8. 191 0
      src/detection/detection_localization_global_hdl/hdl_global_localization/ransac/ransac_pose_estimation.cpp
  9. 72 0
      src/detection/detection_localization_global_hdl/hdl_global_localization/ransac/voxelset.cpp
  10. 92 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/bbs/bbs_localization.hpp
  11. 118 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/bbs/occupancy_gridmap.hpp
  12. 40 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/engines/global_localization_bbs.hpp
  13. 22 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/engines/global_localization_engine.hpp
  14. 34 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/engines/global_localization_fpfh_ransac.hpp
  15. 31 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/engines/global_localization_fpfh_teaser.hpp
  16. 44 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/global_localization_results.hpp
  17. 20 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/ransac/matching_cost_evaluater.hpp
  18. 54 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/ransac/matching_cost_evaluater_flann.hpp
  19. 43 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/ransac/matching_cost_evaluater_voxels.hpp
  20. 89 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/ransac/ransac_pose_estimation.hpp
  21. 36 0
      src/detection/detection_localization_global_hdl/include/hdl_global_localization/ransac/voxelset.hpp
  22. 32 0
      src/detection/detection_localization_global_hdl/include/pmc/pmc.h
  23. 152 0
      src/detection/detection_localization_global_hdl/include/pmc/pmc_graph.h
  24. 41 0
      src/detection/detection_localization_global_hdl/include/pmc/pmc_headers.h
  25. 86 0
      src/detection/detection_localization_global_hdl/include/pmc/pmc_heu.h
  26. 193 0
      src/detection/detection_localization_global_hdl/include/pmc/pmc_input.h
  27. 122 0
      src/detection/detection_localization_global_hdl/include/pmc/pmc_maxclique.h
  28. 142 0
      src/detection/detection_localization_global_hdl/include/pmc/pmc_neigh_coloring.h
  29. 252 0
      src/detection/detection_localization_global_hdl/include/pmc/pmc_neigh_cores.h
  30. 53 0
      src/detection/detection_localization_global_hdl/include/pmc/pmc_utils.h
  31. 52 0
      src/detection/detection_localization_global_hdl/include/pmc/pmc_vertex.h
  32. 125 0
      src/detection/detection_localization_global_hdl/include/pmc/pmcx_maxclique.h
  33. 130 0
      src/detection/detection_localization_global_hdl/include/pmc/pmcx_maxclique_basic.h
  34. 240 0
      src/detection/detection_localization_global_hdl/include/teaser/certification.h
  35. 80 0
      src/detection/detection_localization_global_hdl/include/teaser/fpfh.h
  36. 73 0
      src/detection/detection_localization_global_hdl/include/teaser/geometry.h
  37. 276 0
      src/detection/detection_localization_global_hdl/include/teaser/graph.h
  38. 101 0
      src/detection/detection_localization_global_hdl/include/teaser/linalg.h
  39. 69 0
      src/detection/detection_localization_global_hdl/include/teaser/macros.h
  40. 65 0
      src/detection/detection_localization_global_hdl/include/teaser/matcher.h
  41. 52 0
      src/detection/detection_localization_global_hdl/include/teaser/ply_io.h
  42. 825 0
      src/detection/detection_localization_global_hdl/include/teaser/registration.h
  43. 174 0
      src/detection/detection_localization_global_hdl/include/teaser/utils.h
  44. 902 0
      src/detection/detection_localization_global_hdl/include/tinyply.h
  45. 49 0
      src/detection/detection_localization_global_hdl/main.cpp
  46. 671 0
      src/detection/detection_localization_global_hdl/teaser/certification.cc
  47. 61 0
      src/detection/detection_localization_global_hdl/teaser/fpfh.cc
  48. 125 0
      src/detection/detection_localization_global_hdl/teaser/graph.cc
  49. 336 0
      src/detection/detection_localization_global_hdl/teaser/matcher.cc
  50. 110 0
      src/detection/detection_localization_global_hdl/teaser/ply_io.cc
  51. 699 0
      src/detection/detection_localization_global_hdl/teaser/registration.cc
  52. 5 0
      src/detection/detection_localization_global_hdl/tinyply/tinyply.cpp

+ 12 - 12
src/detection/detection_localization_LeGO_LOAM/imageProjection.cpp

@@ -204,7 +204,7 @@ public:
         // 1. Convert ros message to pcl point cloud
 //        copyPointCloud(laserCloudMsg);
         // 2. Start and end angle of a scan
- //       findStartEndAngle();
+        findStartEndAngle();
         // 3. Range image projection
         projectPointCloud();
         // 4. Mark ground points
@@ -217,17 +217,17 @@ public:
         resetParameters();
     }
 
-//    void findStartEndAngle(){
-//        // start and end orientation of this cloud
-//        segMsg.startOrientation = -atan2(laserCloudIn->points[0].y, laserCloudIn->points[0].x);
-//        segMsg.endOrientation   = -atan2(laserCloudIn->points[laserCloudIn->points.size() - 1].y,
-//                                                     laserCloudIn->points[laserCloudIn->points.size() - 1].x) + 2 * M_PI;
-//        if (segMsg.endOrientation - segMsg.startOrientation > 3 * M_PI) {
-//            segMsg.endOrientation -= 2 * M_PI;
-//        } else if (segMsg.endOrientation - segMsg.startOrientation < M_PI)
-//            segMsg.endOrientation += 2 * M_PI;
-//        segMsg.orientationDiff = segMsg.endOrientation - segMsg.startOrientation;
-//    }
+    void findStartEndAngle(){
+        // start and end orientation of this cloud
+        segMsg.startOrientation = -atan2(laserCloudIn->points[0].y, laserCloudIn->points[0].x);
+        segMsg.endOrientation   = -atan2(laserCloudIn->points[laserCloudIn->points.size() - 1].y,
+                                                     laserCloudIn->points[laserCloudIn->points.size() - 1].x) + 2 * M_PI;
+        if (segMsg.endOrientation - segMsg.startOrientation > 3 * M_PI) {
+            segMsg.endOrientation -= 2 * M_PI;
+        } else if (segMsg.endOrientation - segMsg.startOrientation < M_PI)
+            segMsg.endOrientation += 2 * M_PI;
+        segMsg.orientationDiff = segMsg.endOrientation - segMsg.startOrientation;
+    }
 
     void projectPointCloud(){
         // range image projection

+ 73 - 0
src/detection/detection_localization_global_hdl/.gitignore

@@ -0,0 +1,73 @@
+# This file is used to ignore files which are generated
+# ----------------------------------------------------------------------------
+
+*~
+*.autosave
+*.a
+*.core
+*.moc
+*.o
+*.obj
+*.orig
+*.rej
+*.so
+*.so.*
+*_pch.h.cpp
+*_resource.rc
+*.qm
+.#*
+*.*#
+core
+!core/
+tags
+.DS_Store
+.directory
+*.debug
+Makefile*
+*.prl
+*.app
+moc_*.cpp
+ui_*.h
+qrc_*.cpp
+Thumbs.db
+*.res
+*.rc
+/.qmake.cache
+/.qmake.stash
+
+# qtcreator generated files
+*.pro.user*
+
+# xemacs temporary files
+*.flc
+
+# Vim temporary files
+.*.swp
+
+# Visual Studio generated files
+*.ib_pdb_index
+*.idb
+*.ilk
+*.pdb
+*.sln
+*.suo
+*.vcproj
+*vcproj.*.*.user
+*.ncb
+*.sdf
+*.opensdf
+*.vcxproj
+*vcxproj.*
+
+# MinGW generated files
+*.Debug
+*.Release
+
+# Python byte code
+*.pyc
+
+# Binaries
+# --------
+*.dll
+*.exe
+

+ 79 - 0
src/detection/detection_localization_global_hdl/detection_localization_global_hdl.pro

@@ -0,0 +1,79 @@
+QT -= gui
+
+CONFIG += c++14 console
+CONFIG -= app_bundle
+
+# The following define makes your compiler emit warnings if you use
+# any feature of Qt which as been marked deprecated (the exact warnings
+# depend on your compiler). Please consult the documentation of the
+# deprecated API in order to know how to port your code away from it.
+DEFINES += QT_DEPRECATED_WARNINGS
+
+# You can also make your code fail to compile if you use deprecated APIs.
+# In order to do so, uncomment the following line.
+# You can also select to disable deprecated APIs only up to a certain version of Qt.
+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
+
+SOURCES += main.cpp \
+    hdl_global_localization/engines/global_localization_bbs.cpp \
+    hdl_global_localization/engines/global_localization_fpfh_ransac.cpp \
+    hdl_global_localization/engines/global_localization_fpfh_teaser.cpp \
+    hdl_global_localization/bbs/bbs_localization.cpp \
+    hdl_global_localization/ransac/ransac_pose_estimation.cpp \
+    hdl_global_localization/ransac/voxelset.cpp \
+    teaser/certification.cc \
+    teaser/fpfh.cc \
+    teaser/graph.cc \
+    teaser/matcher.cc \
+    teaser/ply_io.cc \
+    teaser/registration.cc \
+    tinyply/tinyply.cpp
+
+
+INCLUDEPATH += $$PWD/include
+
+!include(../../../include/common.pri ) {
+    error( "Couldn't find the common.pri file!" )
+}
+
+
+!include(../../../include/ivpcl.pri ) {
+    error( "Couldn't find the ivpcl.pri file!" )
+}
+
+!include(../../../include/ivopencv.pri ) {
+    error( "Couldn't find the ivopencv.pri file!" )
+}
+
+HEADERS += \
+    include/hdl_global_localization/global_localization_results.hpp \
+    include/hdl_global_localization/bbs/bbs_localization.hpp \
+    include/hdl_global_localization/bbs/occupancy_gridmap.hpp \
+    include/hdl_global_localization/engines/global_localization_bbs.hpp \
+    include/hdl_global_localization/engines/global_localization_engine.hpp \
+    include/hdl_global_localization/engines/global_localization_fpfh_ransac.hpp \
+    include/hdl_global_localization/engines/global_localization_fpfh_teaser.hpp \
+    include/hdl_global_localization/ransac/matching_cost_evaluater.hpp \
+    include/hdl_global_localization/ransac/matching_cost_evaluater_flann.hpp \
+    include/hdl_global_localization/ransac/matching_cost_evaluater_voxels.hpp \
+    include/hdl_global_localization/ransac/ransac_pose_estimation.hpp \
+    include/hdl_global_localization/ransac/voxelset.hpp \
+    include/teaser/certification.h \
+    include/teaser/fpfh.h \
+    include/teaser/geometry.h \
+    include/teaser/graph.h \
+    include/teaser/linalg.h \
+    include/teaser/macros.h \
+    include/teaser/matcher.h \
+    include/teaser/ply_io.h \
+    include/teaser/registration.h \
+    include/teaser/utils.h \
+    include/tinyply.h
+
+
+LIBS += -lpcl_search -lpcl_io -lpcl_kdtree -lpcl_features -lpcl_common
+
+LIBS += -lboost_system -lboost_thread -llz4 -lomp
+
+
+LIBS +=  -L$$PWD -lpmc

+ 176 - 0
src/detection/detection_localization_global_hdl/hdl_global_localization/bbs/bbs_localization.cpp

@@ -0,0 +1,176 @@
+#include <hdl_global_localization/bbs/bbs_localization.hpp>
+#include <hdl_global_localization/bbs/occupancy_gridmap.hpp>
+
+#include <queue>
+#include <Eigen/Core>
+#include <Eigen/Geometry>
+
+//#include <ros/ros.h>
+#include <iostream>
+
+namespace hdl_global_localization {
+
+DiscreteTransformation::DiscreteTransformation() {
+  this->level = -1;
+}
+
+DiscreteTransformation::DiscreteTransformation(int level, int x, int y, int theta) {
+  this->level = level;
+  this->x = x;
+  this->y = y;
+  this->theta = theta;
+  this->score = 0.0;
+}
+
+DiscreteTransformation::~DiscreteTransformation() {}
+
+bool DiscreteTransformation::operator<(const DiscreteTransformation& rhs) const {
+  return score < rhs.score;
+}
+
+bool DiscreteTransformation::is_leaf() const {
+  return level == 0;
+}
+
+Eigen::Isometry2f DiscreteTransformation::transformation(double theta_resolution, const std::vector<std::shared_ptr<OccupancyGridMap>>& gridmap_pyramid) {
+  double trans_resolution = gridmap_pyramid[level]->grid_resolution();
+
+  Eigen::Isometry2f trans = Eigen::Isometry2f::Identity();
+  trans.linear() = Eigen::Rotation2Df(theta_resolution * theta).toRotationMatrix();
+  trans.translation() = Eigen::Vector2f(trans_resolution * x, trans_resolution * y);
+
+  return trans;
+}
+
+DiscreteTransformation::Points DiscreteTransformation::transform(const Points& points, double trans_resolution, double theta_resolution) {
+  Points transformed(points.size());
+
+  Eigen::Map<const Eigen::Matrix<float, 2, -1>> src(points[0].data(), 2, points.size());
+  Eigen::Map<Eigen::Matrix<float, 2, -1>> dst(transformed[0].data(), 2, points.size());
+
+  Eigen::Matrix2f rot = Eigen::Rotation2Df(theta_resolution * theta).toRotationMatrix();
+  Eigen::Vector2f trans(trans_resolution * x, trans_resolution * y);
+
+  dst = (rot * src).colwise() + trans;
+
+  return transformed;
+}
+
+double DiscreteTransformation::calc_score(const Points& points, double theta_resolution, const std::vector<std::shared_ptr<OccupancyGridMap>>& gridmap_pyramid) {
+  const auto& gridmap = gridmap_pyramid[level];
+  auto transformed = transform(points, gridmap->grid_resolution(), theta_resolution);
+  score = gridmap->calc_score(transformed);
+  return score;
+}
+
+std::vector<DiscreteTransformation> DiscreteTransformation::branch() {
+  std::vector<DiscreteTransformation> b;
+  b.reserve(4);
+  b.emplace_back(DiscreteTransformation(level - 1, x * 2, y * 2, theta));
+  b.emplace_back(DiscreteTransformation(level - 1, x * 2 + 1, y * 2, theta));
+  b.emplace_back(DiscreteTransformation(level - 1, x * 2, y * 2 + 1, theta));
+  b.emplace_back(DiscreteTransformation(level - 1, x * 2 + 1, y * 2 + 1, theta));
+  return b;
+}
+
+BBSLocalization::BBSLocalization(const BBSParams& params) : params(params) {}
+
+BBSLocalization::~BBSLocalization() {}
+
+void BBSLocalization::set_map(const BBSLocalization::Points& map_points, double resolution, int width, int height, int pyramid_levels, int max_points_per_cell) {
+  gridmap_pyramid.resize(pyramid_levels);
+  gridmap_pyramid[0].reset(new OccupancyGridMap(resolution, width, height));
+  gridmap_pyramid[0]->insert_points(map_points, max_points_per_cell);
+
+  for (int i = 1; i < pyramid_levels; i++) {
+    gridmap_pyramid[i] = gridmap_pyramid[i - 1]->pyramid_up();
+  }
+}
+
+boost::optional<Eigen::Isometry2f> BBSLocalization::localize(const BBSLocalization::Points& scan_points, double min_score, double* best_score) {
+  theta_resolution = std::acos(1 - std::pow(gridmap_pyramid[0]->grid_resolution(), 2) / (2 * std::pow(params.max_range, 2)));
+
+  double best_score_storage;
+  best_score = best_score ? best_score : &best_score_storage;
+
+  *best_score = min_score;
+  boost::optional<DiscreteTransformation> best_trans;
+
+  auto trans_queue = create_init_transset(scan_points);
+
+  std::cout<<"Branch-and-Bound"<<std::endl;
+//  ROS_INFO_STREAM("Branch-and-Bound");
+  while (!trans_queue.empty()) {
+    // std::cout << trans_queue.size() << std::endl;
+
+    auto trans = trans_queue.top();
+    trans_queue.pop();
+
+    if (trans.score < *best_score) {
+      continue;
+    }
+
+    if (trans.is_leaf()) {
+      best_trans = trans;
+      *best_score = trans.score;
+    } else {
+      auto children = trans.branch();
+      for (auto& child : children) {
+        child.calc_score(scan_points, theta_resolution, gridmap_pyramid);
+        trans_queue.push(child);
+      }
+    }
+  }
+
+  if (best_trans == boost::none) {
+    return boost::none;
+  }
+
+  return best_trans->transformation(theta_resolution, gridmap_pyramid);
+}
+
+std::shared_ptr<const OccupancyGridMap> BBSLocalization::gridmap() const {
+  return gridmap_pyramid[0];
+}
+
+std::priority_queue<DiscreteTransformation> BBSLocalization::create_init_transset(const Points& scan_points) const {
+  double trans_res = gridmap_pyramid.back()->grid_resolution();
+  std::pair<int, int> tx_range(std::floor(params.min_tx / trans_res), std::ceil(params.max_tx / trans_res));
+  std::pair<int, int> ty_range(std::floor(params.min_ty / trans_res), std::ceil(params.max_ty / trans_res));
+  std::pair<int, int> theta_range(std::floor(params.min_theta / theta_resolution), std::ceil(params.max_theta / theta_resolution));
+
+//  ROS_INFO_STREAM("Resolution trans:" << trans_res << " theta:" << theta_resolution);
+//  ROS_INFO_STREAM("TransX range:" << tx_range.first << " " << tx_range.second);
+//  ROS_INFO_STREAM("TransY range:" << ty_range.first << " " << ty_range.second);
+//  ROS_INFO_STREAM("Theta  range:" << theta_range.first << " " << theta_range.second);
+
+  std::cout<<"Resolution trans:" << trans_res << " theta:" << theta_resolution<<std::endl;
+  std::cout<<"TransX range:" << tx_range.first << " " << tx_range.second<<std::endl;
+  std::cout<<"TransY range:" << ty_range.first << " " << ty_range.second<<std::endl;
+  std::cout<<"Theta  range:" << theta_range.first << " " << theta_range.second<<std::endl;
+
+  std::vector<DiscreteTransformation> transset;
+  transset.reserve((tx_range.second - tx_range.first) * (ty_range.second - ty_range.first) * (theta_range.second - theta_range.first));
+  for (int tx = tx_range.first; tx <= tx_range.second; tx++) {
+    for (int ty = ty_range.first; ty <= ty_range.second; ty++) {
+      for (int theta = theta_range.first; theta <= theta_range.second; theta++) {
+        int level = gridmap_pyramid.size() - 1;
+        transset.emplace_back(DiscreteTransformation(level, tx, ty, theta));
+      }
+    }
+  }
+
+//  ROS_INFO_STREAM("Initial transformation set size:" << transset.size());
+  std::cout<<"Initial transformation set size:" << transset.size()<<std::endl;
+
+#pragma omp parallel for
+  for (int i = 0; i < transset.size(); i++) {
+    auto& trans = transset[i];
+    const auto& gridmap = gridmap_pyramid[trans.level];
+    trans.calc_score(scan_points, theta_resolution, gridmap_pyramid);
+  }
+
+  return std::priority_queue<DiscreteTransformation>(transset.begin(), transset.end());
+}
+
+}  // namespace  hdl_global_localization

+ 125 - 0
src/detection/detection_localization_global_hdl/hdl_global_localization/engines/global_localization_bbs.cpp

@@ -0,0 +1,125 @@
+#include <hdl_global_localization/engines/global_localization_bbs.hpp>
+
+//#include <pcl_ros/point_cloud.h>
+//#include <sensor_msgs/PointCloud2.h>
+
+#include <hdl_global_localization/bbs/bbs_localization.hpp>
+#include <hdl_global_localization/bbs/occupancy_gridmap.hpp>
+
+namespace hdl_global_localization {
+
+//GlobalLocalizationBBS::GlobalLocalizationBBS(ros::NodeHandle& private_nh) : private_nh(private_nh) {
+GlobalLocalizationBBS::GlobalLocalizationBBS()  {
+//  gridmap_pub = private_nh.advertise<nav_msgs::OccupancyGrid>("bbs/gridmap", 1, true);
+//  map_slice_pub = private_nh.advertise<sensor_msgs::PointCloud2>("bbs/map_slice", 1, true);
+//  scan_slice_pub = private_nh.advertise<sensor_msgs::PointCloud2>("bbs/scan_slice", 1, false);
+}
+
+GlobalLocalizationBBS ::~GlobalLocalizationBBS() {}
+
+void GlobalLocalizationBBS::set_global_map(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud) {
+  BBSParams params;
+//  params.max_range = private_nh.param<double>("bbs/max_range", 15.0);
+//  params.min_tx = private_nh.param<double>("bbs/min_tx", -10.0);
+//  params.max_tx = private_nh.param<double>("bbs/max_tx", 10.0);
+//  params.min_ty = private_nh.param<double>("bbs/min_ty", -10.0);
+//  params.max_ty = private_nh.param<double>("bbs/max_ty", 10.0);
+//  params.min_theta = private_nh.param<double>("bbs/min_theta", -3.15);
+//  params.max_theta = private_nh.param<double>("bbs/max_theta", 3.15);
+
+  params.max_range = 15.0;
+  params.min_tx = -10.0;
+  params.max_tx =  10.0;
+  params.min_ty =  -10.0;
+  params.max_ty =  10.0;
+  params.min_theta =  -3.15;
+  params.max_theta =  3.15;
+  bbs.reset(new BBSLocalization(params));
+
+  double map_min_z = 2.0;//private_nh.param<double>("bbs/map_min_z", 2.0);
+  double map_max_z = 2.4;//private_nh.param<double>("bbs/map_max_z", 2.4);
+  auto map_2d = slice(*cloud, map_min_z, map_max_z);
+//  ROS_INFO_STREAM("Set Map " << map_2d.size() << " points");
+  std::cout<<"Set Map " << map_2d.size() << " points"<<std::endl;
+
+  if (map_2d.size() < 128) {
+//    ROS_WARN_STREAM("Num points in the sliced map is too small!!");
+//    ROS_WARN_STREAM("Change the slice range parameters!!");
+    std::cout<<"Num points in the sliced map is too small!!"<<std::endl;
+    std::cout<<"Change the slice range parameters!!"<<std::endl;
+  }
+
+  int map_width = 1024;//private_nh.param<int>("bbs/map_width", 1024);
+  int map_height = 1024;//private_nh.param<int>("bbs/map_height", 1024);
+  double map_resolution = 0.5;//private_nh.param<double>("bbs/map_resolution", 0.5);
+  int map_pyramid_level = 6;//private_nh.param<int>("bbs/map_pyramid_level", 6);
+  int max_points_per_cell = 5;//private_nh.param<int>("bbs/max_points_per_cell", 5);
+  bbs->set_map(map_2d, map_resolution, map_width, map_height, map_pyramid_level, max_points_per_cell);
+
+  auto map_3d = unslice(map_2d);
+  map_3d->header.frame_id = "map";
+//  map_slice_pub.publish(map_3d);
+//  gridmap_pub.publish(bbs->gridmap()->to_rosmsg());
+}
+
+GlobalLocalizationResults GlobalLocalizationBBS::query(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud, int max_num_candidates) {
+  double scan_min_z = -0.2;// private_nh.param<double>("bbs/scan_min_z", -0.2);
+  double scan_max_z = 0.2;//private_nh.param<double>("bbs/scan_max_z", 0.2);
+  auto scan_2d = slice(*cloud, scan_min_z, scan_max_z);
+
+  std::vector<GlobalLocalizationResult::Ptr> results;
+
+  std::cout<<"Query " << scan_2d.size() << " points"<<std::endl;
+//  ROS_INFO_STREAM("Query " << scan_2d.size() << " points");
+  if (scan_2d.size() < 32) {
+//    ROS_WARN_STREAM("Num points in the sliced scan is too small!!");
+//    ROS_WARN_STREAM("Change the slice range parameters!!");
+    std::cout<<"Num points in the sliced scan is too small!!"<<std::endl;
+    std::cout<<"Change the slice range parameters!!"<<std::endl;
+    return GlobalLocalizationResults(results);
+  }
+
+  double best_score = 0.0;
+  auto trans_2d = bbs->localize(scan_2d, 0.0, &best_score);
+  if (trans_2d == boost::none) {
+    return GlobalLocalizationResults(results);
+  }
+
+//  if (scan_slice_pub.getNumSubscribers()) {
+//    auto scan_3d = unslice(scan_2d);
+//    scan_3d->header = cloud->header;
+//    scan_slice_pub.publish(scan_3d);
+//  }
+
+  Eigen::Isometry3f trans_3d = Eigen::Isometry3f::Identity();
+  trans_3d.linear().block<2, 2>(0, 0) = trans_2d->linear();
+  trans_3d.translation().head<2>() = trans_2d->translation();
+
+  results.resize(1);
+  results[0].reset(new GlobalLocalizationResult(best_score, best_score, trans_3d));
+
+  return GlobalLocalizationResults(results);
+}
+
+GlobalLocalizationBBS::Points2D GlobalLocalizationBBS::slice(const pcl::PointCloud<pcl::PointXYZ>& cloud, double min_z, double max_z) const {
+  Points2D points_2d;
+  points_2d.reserve(cloud.size());
+  for (int i = 0; i < cloud.size(); i++) {
+    if (min_z < cloud.at(i).z && cloud.at(i).z < max_z) {
+      points_2d.push_back(cloud.at(i).getVector3fMap().head<2>());
+    }
+  }
+  return points_2d;
+}
+
+pcl::PointCloud<pcl::PointXYZ>::Ptr GlobalLocalizationBBS::unslice(const Points2D& points) {
+  pcl::PointCloud<pcl::PointXYZ>::Ptr cloud(new pcl::PointCloud<pcl::PointXYZ>);
+  cloud->resize(points.size());
+  for (int i = 0; i < points.size(); i++) {
+    cloud->at(i).getVector3fMap().head<2>() = points[i];
+    cloud->at(i).z = 0.0f;
+  }
+
+  return cloud;
+}
+}  // namespace hdl_global_localization

+ 59 - 0
src/detection/detection_localization_global_hdl/hdl_global_localization/engines/global_localization_fpfh_ransac.cpp

@@ -0,0 +1,59 @@
+#include <hdl_global_localization/engines/global_localization_fpfh_ransac.hpp>
+
+//#include <ros/ros.h>
+#include <pcl/features/fpfh_omp.h>
+#include <pcl/features/normal_3d_omp.h>
+#include <pcl/search/impl/kdtree.hpp>
+
+#include <hdl_global_localization/ransac/ransac_pose_estimation.hpp>
+
+namespace hdl_global_localization {
+
+GlobalLocalizationEngineFPFH_RANSAC::GlobalLocalizationEngineFPFH_RANSAC()  {}
+//GlobalLocalizationEngineFPFH_RANSAC::GlobalLocalizationEngineFPFH_RANSAC(ros::NodeHandle& private_nh) : private_nh(private_nh) {}
+
+GlobalLocalizationEngineFPFH_RANSAC::~GlobalLocalizationEngineFPFH_RANSAC() {}
+
+pcl::PointCloud<pcl::FPFHSignature33>::ConstPtr GlobalLocalizationEngineFPFH_RANSAC::extract_fpfh(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud) {
+  double normal_estimation_radius = 2.0;//private_nh.param<double>("fpfh/normal_estimation_radius", 2.0);
+  double search_radius = 8.0;//private_nh.param<double>("fpfh/search_radius", 8.0);
+
+  std::cout<<"Normal Estimation: Radius(" << normal_estimation_radius << ")"<<std::endl;
+//  ROS_INFO_STREAM("Normal Estimation: Radius(" << normal_estimation_radius << ")");
+  pcl::PointCloud<pcl::Normal>::Ptr normals(new pcl::PointCloud<pcl::Normal>);
+  pcl::NormalEstimationOMP<pcl::PointXYZ, pcl::Normal> nest;
+  nest.setRadiusSearch(normal_estimation_radius);
+  nest.setInputCloud(cloud);
+  nest.compute(*normals);
+
+  std::cout<<"FPFH Extraction: Search Radius(" << search_radius << ")"<<std::endl;
+//  ROS_INFO_STREAM("FPFH Extraction: Search Radius(" << search_radius << ")");
+  pcl::PointCloud<pcl::FPFHSignature33>::Ptr features(new pcl::PointCloud<pcl::FPFHSignature33>);
+  pcl::FPFHEstimationOMP<pcl::PointXYZ, pcl::Normal, pcl::FPFHSignature33> fest;
+  fest.setRadiusSearch(search_radius);
+  fest.setInputCloud(cloud);
+  fest.setInputNormals(normals);
+  fest.compute(*features);
+
+  return features;
+}
+
+void GlobalLocalizationEngineFPFH_RANSAC::set_global_map(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud) {
+  global_map = cloud;
+  global_map_features = extract_fpfh(cloud);
+
+  ransac.reset(new RansacPoseEstimation<pcl::FPFHSignature33>());
+//  ransac.reset(new RansacPoseEstimation<pcl::FPFHSignature33>(private_nh));
+  ransac->set_target(global_map, global_map_features);
+}
+
+GlobalLocalizationResults GlobalLocalizationEngineFPFH_RANSAC::query(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud, int max_num_candidates) {
+  pcl::PointCloud<pcl::FPFHSignature33>::ConstPtr cloud_features = extract_fpfh(cloud);
+
+  ransac->set_source(cloud, cloud_features);
+  auto results = ransac->estimate();
+
+  return results.sort(max_num_candidates);
+}
+
+}  // namespace hdl_global_localization

+ 90 - 0
src/detection/detection_localization_global_hdl/hdl_global_localization/engines/global_localization_fpfh_teaser.cpp

@@ -0,0 +1,90 @@
+#include <hdl_global_localization/engines/global_localization_fpfh_teaser.hpp>
+
+//#include <ros/ros.h>
+#include <teaser/matcher.h>
+#include <teaser/registration.h>
+
+#include <pcl/common/transforms.h>
+#include <hdl_global_localization/ransac/matching_cost_evaluater_flann.hpp>
+
+namespace hdl_global_localization {
+
+//GlobalLocalizationEngineFPFH_Teaser::GlobalLocalizationEngineFPFH_Teaser(ros::NodeHandle& private_nh) : GlobalLocalizationEngineFPFH_RANSAC(private_nh) {}
+
+GlobalLocalizationEngineFPFH_Teaser::GlobalLocalizationEngineFPFH_Teaser() : GlobalLocalizationEngineFPFH_RANSAC() {}
+
+GlobalLocalizationEngineFPFH_Teaser::~GlobalLocalizationEngineFPFH_Teaser() {}
+
+void GlobalLocalizationEngineFPFH_Teaser::set_global_map(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud) {
+  this->global_map = cloud;
+  this->global_map_features = extract_fpfh(cloud);
+
+  evaluater.reset(new MatchingCostEvaluaterFlann());
+  evaluater->set_target(this->global_map,1.0);// private_nh.param<double>("ransac/max_correspondence_distance", 1.0));
+}
+
+GlobalLocalizationResults GlobalLocalizationEngineFPFH_Teaser::query(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud, int max_num_candidates) {
+  auto cloud_features = extract_fpfh(cloud);
+
+  teaser::PointCloud target_cloud, source_cloud;
+  teaser::FPFHCloud target_features, source_features;
+  target_cloud.reserve(global_map->size());
+  target_features.reserve(global_map->size());
+  for (int i = 0; i < global_map->size(); i++) {
+    target_cloud.push_back({global_map->at(i).x, global_map->at(i).y, global_map->at(i).z});
+    target_features.push_back(global_map_features->at(i));
+  }
+
+  source_cloud.reserve(cloud->size());
+  source_features.reserve(cloud->size());
+  for (int i = 0; i < cloud->size(); i++) {
+    source_cloud.push_back({cloud->at(i).x, cloud->at(i).y, cloud->at(i).z});
+    source_features.push_back(cloud_features->at(i));
+  }
+
+  std::cout<<"Find Correspondences"<<std::endl;
+//  ROS_INFO_STREAM("Find Correspondences");
+  teaser::Matcher matcher;
+  auto correspondences = matcher.calculateCorrespondences(
+    source_cloud,
+    target_cloud,
+    source_features,
+    target_features,
+    false,
+    false,
+    false,
+    0.95);
+//    private_nh.param<bool>("teaser/corss_check", false),
+//    private_nh.param<bool>("teaser/tuple_test", false),
+//    private_nh.param<double>("teaser/tuple_scale", 0.95));
+
+  std::cout<<"Run Teaser"<<std::endl;
+//  ROS_INFO_STREAM("Run Teaser");
+  teaser::RobustRegistrationSolver::Params params;
+  params.noise_bound = 0.5;// private_nh.param<double>("teaser/noise_bound", 0.5);
+  params.cbar2 = 1.0;// private_nh.param<double>("teaser/cbar2", 1.0);
+  params.estimate_scaling = false;
+  params.rotation_max_iterations = 100;//private_nh.param<int>("teaser/rotation_max_iterations", 100);
+  params.rotation_gnc_factor = 1.4; //private_nh.param<double>("teaser/rotation_gnc_factor", 1.4);
+  params.rotation_estimation_algorithm = teaser::RobustRegistrationSolver::ROTATION_ESTIMATION_ALGORITHM::GNC_TLS;
+  params.rotation_cost_threshold = 0.005;//private_nh.param<double>("teaser/rotation_cost_threshold", 0.005);
+
+  teaser::RobustRegistrationSolver solver(params);
+  solver.solve(source_cloud, target_cloud, correspondences);
+
+  auto solution = solver.getSolution();
+
+  Eigen::Isometry3f transformation = Eigen::Isometry3f::Identity();
+  transformation.linear() = solution.rotation.cast<float>();
+  transformation.translation() = solution.translation.cast<float>();
+
+  double inlier_fraction = 0.0;
+  double error = evaluater->calc_matching_error(*cloud, transformation.matrix(), &inlier_fraction);
+
+  std::vector<GlobalLocalizationResult::Ptr> results(1);
+  results[0].reset(new GlobalLocalizationResult(error, inlier_fraction, transformation));
+
+  return GlobalLocalizationResults(results);
+}
+
+}  // namespace hdl_global_localization

+ 191 - 0
src/detection/detection_localization_global_hdl/hdl_global_localization/ransac/ransac_pose_estimation.cpp

@@ -0,0 +1,191 @@
+/*
+ * This RansacPoseEstimation implementation was written based on pcl::SampleConsensusPrerejective
+ *
+ * Software License Agreement (BSD License)
+ *
+ *  Point Cloud Library (PCL) - www.pointclouds.org
+ *  Copyright (c) 2010-2012, Willow Garage, Inc.
+ *  Copyright (c) 2012-, Open Perception, Inc.
+ *
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the copyright holder(s) nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $Id$
+ *
+ */
+
+#include <atomic>
+#include <vector>
+#include <random>
+#include <pcl/point_types.h>
+#include <pcl/point_cloud.h>
+#include <pcl/search/kdtree.h>
+#include <pcl/registration/transformation_estimation_svd.h>
+#include <pcl/registration/correspondence_rejection_poly.h>
+
+//#include <ros/ros.h>
+
+#include <hdl_global_localization/ransac/ransac_pose_estimation.hpp>
+#include <hdl_global_localization/ransac/matching_cost_evaluater_flann.hpp>
+#include <hdl_global_localization/ransac/matching_cost_evaluater_voxels.hpp>
+
+#include <omp.h>
+
+namespace hdl_global_localization {
+
+template <typename FeatureT>
+RansacPoseEstimation<FeatureT>::RansacPoseEstimation() {}
+//RansacPoseEstimation<FeatureT>::RansacPoseEstimation(ros::NodeHandle& private_nh) : private_nh(private_nh) {}
+
+template <typename FeatureT>
+void RansacPoseEstimation<FeatureT>::set_target(pcl::PointCloud<pcl::PointXYZ>::ConstPtr target, typename pcl::PointCloud<FeatureT>::ConstPtr target_features) {
+  this->target = target;
+  this->target_features = target_features;
+  feature_tree.reset(new pcl::KdTreeFLANN<FeatureT>);
+  feature_tree->setInputCloud(target_features);
+
+  bool bvoxel_based = true;
+//  if (private_nh.param<bool>("ransac/voxel_based", true)) {
+  if(bvoxel_based){
+    evaluater.reset(new MatchingCostEvaluaterVoxels());
+  } else {
+    evaluater.reset(new MatchingCostEvaluaterFlann());
+  }
+  evaluater->set_target(target,1.0);// private_nh.param<double>("ransac/max_correspondence_distance", 1.0));
+}
+
+template <typename FeatureT>
+void RansacPoseEstimation<FeatureT>::set_source(pcl::PointCloud<pcl::PointXYZ>::ConstPtr source, typename pcl::PointCloud<FeatureT>::ConstPtr source_features) {
+  this->source = source;
+  this->source_features = source_features;
+}
+
+template <typename FeatureT>
+GlobalLocalizationResults RansacPoseEstimation<FeatureT>::estimate() {
+  pcl::registration::TransformationEstimationSVD<pcl::PointXYZ, pcl::PointXYZ> transformation_estimation;
+
+  pcl::registration::CorrespondenceRejectorPoly<pcl::PointXYZ, pcl::PointXYZ> correspondence_rejection;
+  correspondence_rejection.setInputTarget(target);
+  correspondence_rejection.setInputSource(source);
+  correspondence_rejection.setCardinality(3);
+  correspondence_rejection.setSimilarityThreshold(0.5);//(private_nh.param<double>("ransac/similarity_threshold", 0.5));
+
+  std::cout<<"RANSAC : Precompute Nearest Features"<<std::endl;
+//  ROS_INFO_STREAM("RANSAC : Precompute Nearest Features");
+  std::vector<std::vector<int>> similar_features(source->size());
+#pragma omp parallel for
+  for (int i = 0; i < source->size(); i++) {
+    std::vector<float> sq_dists;
+//    feature_tree->nearestKSearch(source_features->at(i), private_nh.param<int>("ransac/correspondence_randomness", 2), similar_features[i], sq_dists);
+    feature_tree->nearestKSearch(source_features->at(i), 2, similar_features[i], sq_dists);
+  }
+
+  std::vector<std::mt19937> mts(omp_get_max_threads());
+  for (int i = 0; i < mts.size(); i++) {
+    mts[i] = std::mt19937(i * 8191 + i + target->size() + source->size());
+  }
+
+  std::cout<<"RANSAC : Main Loop"<<std::endl;
+//  ROS_INFO_STREAM("RANSAC : Main Loop");
+  std::atomic_int matching_count(0);
+  std::atomic_int iterations(0);
+//  std::vector<GlobalLocalizationResult::Ptr> results(private_nh.param<int>("ransac/max_iterations", 100000));
+  std::vector<GlobalLocalizationResult::Ptr> results(100000);
+  int matching_budget =10000;// private_nh.param<int>("ransac/matching_budget", 10000);
+  double min_inlier_fraction = 0.25;// private_nh.param<double>("ransac/inlier_fraction", 0.25);
+
+#pragma omp parallel for
+  for (int i = 0; i < results.size(); i++) {
+    if (matching_count > matching_budget) {
+      continue;
+    }
+    iterations++;
+
+    auto& mt = mts[omp_get_thread_num()];
+    std::vector<int> samples;
+    std::vector<int> correspondences;
+    select_samples(mt, similar_features, samples, correspondences);
+
+    if (!correspondence_rejection.thresholdPolygon(samples, correspondences)) {
+      continue;
+    }
+
+    Eigen::Matrix4f transformation;
+    transformation_estimation.estimateRigidTransformation(*source, samples, *target, correspondences, transformation);
+
+    // if (!params.is_valid(Eigen::Isometry3f(transformation))) {
+    //   continue;
+    // }
+
+    matching_count++;
+    double inlier_fraction = 0.0;
+    double matching_error = evaluater->calc_matching_error(*source, transformation, &inlier_fraction);
+    std::cout<<"RANSAC : iteration:" << iterations << " matching_count:" << matching_count << " error:" << matching_error << " inlier:" << inlier_fraction<<std::endl;
+ //   ROS_INFO_STREAM("RANSAC : iteration:" << iterations << " matching_count:" << matching_count << " error:" << matching_error << " inlier:" << inlier_fraction);
+
+    if (inlier_fraction > min_inlier_fraction) {
+      results[i].reset(new GlobalLocalizationResult(matching_error, inlier_fraction, Eigen::Isometry3f(transformation)));
+    }
+  }
+
+  return GlobalLocalizationResults(results);
+}
+
+template <typename FeatureT>
+void RansacPoseEstimation<FeatureT>::select_samples(
+  std::mt19937& mt,
+  const std::vector<std::vector<int>>& similar_features,
+  std::vector<int>& samples,
+  std::vector<int>& correspondences) const {
+  samples.resize(3);
+  for (int i = 0; i < samples.size(); i++) {
+    samples[i] = std::uniform_int_distribution<>(0, similar_features.size() - 1)(mt);
+
+    for (int j = 0; j < i; j++) {
+      if (samples[j] == samples[i]) {
+        i--;
+      }
+    }
+  }
+
+  correspondences.resize(3);
+  for (int i = 0; i < samples.size(); i++) {
+    if (similar_features[samples[i]].size() == 1) {
+      correspondences[i] = similar_features[samples[i]][0];
+    } else {
+      int idx = std::uniform_int_distribution<>(0, similar_features[samples[i]].size() - 1)(mt);
+      correspondences[i] = similar_features[samples[i]][idx];
+    }
+  }
+}
+
+// explicit instantiation
+template class RansacPoseEstimation<pcl::FPFHSignature33>;
+
+}  // namespace hdl_global_localization

+ 72 - 0
src/detection/detection_localization_global_hdl/hdl_global_localization/ransac/voxelset.cpp

@@ -0,0 +1,72 @@
+#include <hdl_global_localization/ransac/voxelset.hpp>
+
+#include <unordered_set>
+#include <boost/functional/hash.hpp>
+
+#include <pcl/point_types.h>
+#include <pcl/point_cloud.h>
+
+namespace hdl_global_localization {
+
+size_t Vector3iHash::operator()(const Eigen::Vector3i& x) const {
+  size_t seed = 0;
+  boost::hash_combine(seed, x[0]);
+  boost::hash_combine(seed, x[1]);
+  boost::hash_combine(seed, x[2]);
+  return seed;
+}
+
+VoxelSet::VoxelSet(double max_correspondence_distance) : resolution(max_correspondence_distance) {}
+
+void VoxelSet::set_cloud(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud) {
+  voxels.clear();
+
+  std::vector<Eigen::Vector3i, Eigen::aligned_allocator<Eigen::Vector3i>> offsets;
+  offsets.reserve(9);
+  for (int i = -1; i <= 1; i++) {
+    for (int j = -1; j <= 1; j++) {
+      for (int k = -1; k <= 1; k++) {
+        offsets.push_back(Eigen::Vector3i(i, j, k));
+      }
+    }
+  }
+
+  for (const auto& pt : *cloud) {
+    Eigen::Vector3i coord = voxel_coord(pt.getVector4fMap());
+    for (const auto& offset : offsets) {
+      Eigen::Vector4f center = voxel_center(coord + offset);
+      if ((center - pt.getVector4fMap()).squaredNorm() < resolution * resolution) {
+        voxels.insert(coord + offset);
+      }
+    }
+  }
+}
+
+double VoxelSet::matching_error(const pcl::PointCloud<pcl::PointXYZ>& cloud, double* inlier_fraction) const {
+  int num_inliers = 0;
+  double errors = 0.0;
+
+  for (const auto& pt : cloud) {
+    Eigen::Vector3i coord = voxel_coord(pt.getVector4fMap());
+    if (voxels.find(coord) != voxels.end()) {
+      Eigen::Vector4f center = voxel_center(coord);
+      num_inliers++;
+      errors += (pt.getVector4fMap() - center).squaredNorm();
+    }
+  }
+
+  *inlier_fraction = static_cast<double>(num_inliers) / cloud.size();
+  return num_inliers ? errors / num_inliers : std::numeric_limits<double>::max();
+}
+
+Eigen::Vector3i VoxelSet::voxel_coord(const Eigen::Vector4f& x) const {
+  return (x.array() / resolution - 0.5).floor().cast<int>().head<3>();
+}
+
+Eigen::Vector4f VoxelSet::voxel_center(const Eigen::Vector3i& coord) const {
+  Eigen::Vector3f origin = (coord.template cast<float>().array() + 0.5) * resolution;
+  Eigen::Vector3f offset = Eigen::Vector3f::Ones() * resolution / 2.0f;
+  return (Eigen::Vector4f() << origin + offset, 1.0).finished();
+}
+
+}  // namespace hdl_global_localization

+ 92 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/bbs/bbs_localization.hpp

@@ -0,0 +1,92 @@
+/**
+ * @brief Branch-and-Bound Search-based 2D Global Localization
+ * @ref Hess et al., "Real-Time Loop Closure in 2D LIDAR SLAM", ICRA2016
+ * @ref https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45466.pdf
+ */
+#ifndef HDL_GLOBAL_LOCALIZATION_BBS_LOCALIZATION_HPP
+#define HDL_GLOBAL_LOCALIZATION_BBS_LOCALIZATION_HPP
+
+#include <memory>
+#include <queue>
+#include <vector>
+#include <boost/optional.hpp>
+
+#include <Eigen/Core>
+#include <Eigen/Geometry>
+
+namespace hdl_global_localization {
+
+class OccupancyGridMap;
+
+struct DiscreteTransformation {
+public:
+  using Points = std::vector<Eigen::Vector2f, Eigen::aligned_allocator<Eigen::Vector2f>>;
+
+  DiscreteTransformation();
+  DiscreteTransformation(int level, int x, int y, int theta);
+  ~DiscreteTransformation();
+
+  bool operator<(const DiscreteTransformation& rhs) const;
+
+  bool is_leaf() const;
+
+  Eigen::Isometry2f transformation(double theta_resolution, const std::vector<std::shared_ptr<OccupancyGridMap>>& gridmap_pyramid);
+
+  Points transform(const Points& points, double trans_resolution, double theta_resolution);
+
+  double calc_score(const Points& points, double theta_resolution, const std::vector<std::shared_ptr<OccupancyGridMap>>& gridmap_pyramid);
+
+  std::vector<DiscreteTransformation> branch();
+
+public:
+  double score;
+  int level;
+  int x;
+  int y;
+  int theta;
+};
+
+struct BBSParams {
+  BBSParams() {
+    max_range = 15.0;
+    min_tx = min_ty = -10.0;
+    max_tx = max_ty = 10.0;
+    min_theta = -M_PI;
+    max_theta = M_PI;
+  }
+
+  double max_range;
+  double min_tx;
+  double max_tx;
+  double min_ty;
+  double max_ty;
+  double min_theta;
+  double max_theta;
+};
+
+class BBSLocalization {
+public:
+  using Points = std::vector<Eigen::Vector2f, Eigen::aligned_allocator<Eigen::Vector2f>>;
+
+  BBSLocalization(const BBSParams& params = BBSParams());
+  ~BBSLocalization();
+
+  void set_map(const Points& map_points, double resolution, int width, int height, int pyramid_levels, int max_points_per_cell);
+
+  boost::optional<Eigen::Isometry2f> localize(const Points& scan_points, double min_score, double* best_score = nullptr);
+
+  std::shared_ptr<const OccupancyGridMap> gridmap() const;
+
+private:
+  std::priority_queue<DiscreteTransformation> create_init_transset(const Points& scan_points) const;
+
+private:
+  BBSParams params;
+
+  double theta_resolution;
+  std::vector<std::shared_ptr<OccupancyGridMap>> gridmap_pyramid;
+};
+
+}  // namespace hdl_global_localization
+
+#endif

+ 118 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/bbs/occupancy_gridmap.hpp

@@ -0,0 +1,118 @@
+#ifndef HDL_GLOBAL_LOCALIZATION_OCCUPANCY_GRID_HPP
+#define HDL_GLOBAL_LOCALIZATION_OCCUPANCY_GRID_HPP
+
+#include <vector>
+#include <Eigen/Core>
+//#include <nav_msgs/OccupancyGrid.h>
+
+#include <opencv2/opencv.hpp>
+
+namespace hdl_global_localization {
+
+class OccupancyGridMap {
+public:
+  using Ptr = std::shared_ptr<OccupancyGridMap>;
+
+  OccupancyGridMap(double resolution, const cv::Mat& values) {
+    this->resolution = resolution;
+    this->values = values;
+  }
+
+  OccupancyGridMap(double resolution, int width, int height) {
+    this->resolution = resolution;
+    values = cv::Mat1f(height, width, 0.0f);
+  }
+
+  double grid_resolution() const { return resolution; }
+  int width() const { return values.cols; }
+  int height() const { return values.rows; }
+  const float* data() const { return reinterpret_cast<const float*>(values.data); }
+
+  void insert_points(const std::vector<Eigen::Vector2f, Eigen::aligned_allocator<Eigen::Vector2f>>& points, int min_points_per_grid) {
+    for (const auto& pt : points) {
+      auto loc = grid_loc(pt);
+      if (in_map(loc)) {
+        value(loc) += 1;
+      }
+    }
+
+    values /= min_points_per_grid;
+    for (auto& x : values) {
+      x = std::min(x, 1.0f);
+    }
+  }
+
+  double calc_score(const std::vector<Eigen::Vector2f, Eigen::aligned_allocator<Eigen::Vector2f>>& points) const {
+    double sum_dists = 0.0;
+    for (const auto& pt : points) {
+      auto loc = grid_loc(pt);
+      loc.x() = std::max<int>(0, std::min<int>(values.cols - 1, loc.x()));
+      loc.y() = std::max<int>(0, std::min<int>(values.rows - 1, loc.y()));
+      sum_dists += value(loc);
+    }
+
+    return sum_dists;
+  }
+
+  OccupancyGridMap::Ptr pyramid_up() const {
+    cv::Mat1f small_map(values.rows / 2, values.cols / 2);
+    for (int i = 0; i < values.rows / 2; i++) {
+      for (int j = 0; j < values.cols / 2; j++) {
+        float x = values.at<float>(i * 2, j * 2);
+        x = std::max(x, values.at<float>(i * 2 + 1, j * 2));
+        x = std::max(x, values.at<float>(i * 2, j * 2 + 1));
+        x = std::max(x, values.at<float>(i * 2 + 1, j * 2 + 1));
+        small_map.at<float>(i, j) = x;
+      }
+    }
+
+    return std::make_shared<OccupancyGridMap>(resolution * 2.0, small_map);
+  }
+
+//  nav_msgs::OccupancyGridConstPtr to_rosmsg() const {
+//    nav_msgs::OccupancyGridPtr msg(new nav_msgs::OccupancyGrid);
+//    msg->header.frame_id = "map";
+//    msg->header.stamp = ros::Time(0);
+
+//    msg->data.resize(values.rows * values.cols);
+//    std::transform(values.begin(), values.end(), msg->data.begin(), [=](auto x) {
+//      double x_ = x * 100.0;
+//      return std::max(0.0, std::min(100.0, x_));
+//    });
+
+//    msg->info.map_load_time = ros::Time::now();
+//    msg->info.width = values.cols;
+//    msg->info.height = values.rows;
+//    msg->info.resolution = resolution;
+
+//    msg->info.origin.position.x = -resolution * values.cols / 2;
+//    msg->info.origin.position.y = -resolution * values.rows / 2;
+//    msg->info.origin.position.z = 0.0;
+
+//    return msg;
+//  }
+
+private:
+  bool in_map(const Eigen::Vector2i& pix) const {
+    bool left_bound = (pix.array() >= Eigen::Array2i::Zero()).all();
+    bool right_bound = (pix.array() < Eigen::Array2i(values.cols, values.rows)).all();
+    return left_bound && right_bound;
+  }
+
+  float value(const Eigen::Vector2i& loc) const { return values.at<float>(loc.y(), loc.x()); }
+  float& value(const Eigen::Vector2i& loc) { return values.at<float>(loc.y(), loc.x()); }
+
+  Eigen::Vector2i grid_loc(const Eigen::Vector2f& pt) const {
+    Eigen::Vector2i loc = (pt / resolution).array().floor().cast<int>();
+    Eigen::Vector2i offset = Eigen::Vector2i(values.cols / 2, values.rows / 2);
+    return loc + offset;
+  }
+
+private:
+  double resolution;
+  cv::Mat1f values;
+};
+
+}  // namespace hdl_global_localization
+
+#endif

+ 40 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/engines/global_localization_bbs.hpp

@@ -0,0 +1,40 @@
+#ifndef HDL_GLOBAL_LOCALIZATION_BBS_HPP
+#define HDL_GLOBAL_LOCALIZATION_BBS_HPP
+
+//#include <ros/ros.h>
+
+#include <hdl_global_localization/engines/global_localization_engine.hpp>
+
+namespace hdl_global_localization {
+
+class BBSLocalization;
+
+class GlobalLocalizationBBS : public GlobalLocalizationEngine {
+public:
+//  GlobalLocalizationBBS(ros::NodeHandle& private_nh);
+  GlobalLocalizationBBS();
+  virtual ~GlobalLocalizationBBS() override;
+
+  virtual void set_global_map(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud) override;
+
+  virtual GlobalLocalizationResults query(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud, int max_num_candidates) override;
+
+private:
+  using Points2D = std::vector<Eigen::Vector2f, Eigen::aligned_allocator<Eigen::Vector2f>>;
+  Points2D slice(const pcl::PointCloud<pcl::PointXYZ>& cloud, double min_z, double max_z) const;
+
+  pcl::PointCloud<pcl::PointXYZ>::Ptr unslice(const Points2D& points);
+
+protected:
+//  ros::NodeHandle& private_nh;
+
+//  ros::Publisher gridmap_pub;
+//  ros::Publisher map_slice_pub;
+//  ros::Publisher scan_slice_pub;
+
+  std::unique_ptr<BBSLocalization> bbs;
+};
+
+}  // namespace hdl_global_localization
+
+#endif

+ 22 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/engines/global_localization_engine.hpp

@@ -0,0 +1,22 @@
+#ifndef HDL_GLOBAL_LOCALIZATION_ENGINE_HPP
+#define HDL_GLOBAL_LOCALIZATION_ENGINE_HPP
+
+#include <pcl/point_types.h>
+#include <pcl/point_cloud.h>
+
+#include <hdl_global_localization/global_localization_results.hpp>
+
+namespace hdl_global_localization {
+
+class GlobalLocalizationEngine {
+public:
+  GlobalLocalizationEngine() {}
+  virtual ~GlobalLocalizationEngine() {}
+
+  virtual void set_global_map(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud) = 0;
+  virtual GlobalLocalizationResults query(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud, int max_num_candidates) = 0;
+};
+
+}
+
+#endif

+ 34 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/engines/global_localization_fpfh_ransac.hpp

@@ -0,0 +1,34 @@
+#ifndef HDL_GLOBAL_LOCALIZATION_FPFH_RANSAC_HPP
+#define HDL_GLOBAL_LOCALIZATION_FPFH_RANSAC_HPP
+
+//#include <ros/ros.h>
+
+#include <hdl_global_localization/engines/global_localization_engine.hpp>
+#include <hdl_global_localization/ransac/ransac_pose_estimation.hpp>
+
+namespace hdl_global_localization {
+
+class GlobalLocalizationEngineFPFH_RANSAC : public GlobalLocalizationEngine {
+public:
+//  GlobalLocalizationEngineFPFH_RANSAC(ros::NodeHandle& private_nh);
+  GlobalLocalizationEngineFPFH_RANSAC();
+  virtual ~GlobalLocalizationEngineFPFH_RANSAC() override;
+
+  virtual void set_global_map(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud) override;
+  virtual GlobalLocalizationResults query(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud, int max_num_candidates) override;
+
+protected:
+  pcl::PointCloud<pcl::FPFHSignature33>::ConstPtr extract_fpfh(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud);
+
+protected:
+//  ros::NodeHandle& private_nh;
+
+  std::unique_ptr<RansacPoseEstimation<pcl::FPFHSignature33>> ransac;
+
+  pcl::PointCloud<pcl::PointXYZ>::ConstPtr global_map;
+  pcl::PointCloud<pcl::FPFHSignature33>::ConstPtr global_map_features;
+};
+
+}  // namespace hdl_global_localization
+
+#endif

+ 31 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/engines/global_localization_fpfh_teaser.hpp

@@ -0,0 +1,31 @@
+/**
+ * @brief Global pose estimation with FPFH features and Teaser++
+ * @ref Yang et al., "TEASER: Fast and Certifiable Point Cloud Registration", T-RO, 2020
+ * @ref https://github.com/MIT-SPARK/TEASER-plusplus
+ */
+#ifndef HDL_GLOBAL_LOCALIZATION_FPFH_TEASER_HPP
+#define HDL_GLOBAL_LOCALIZATION_FPFH_TEASER_HPP
+
+#include <hdl_global_localization/ransac/matching_cost_evaluater.hpp>
+#include <hdl_global_localization/engines/global_localization_fpfh_ransac.hpp>
+
+namespace hdl_global_localization {
+
+class GlobalLocalizationEngineFPFH_Teaser : public GlobalLocalizationEngineFPFH_RANSAC {
+public:
+//  GlobalLocalizationEngineFPFH_Teaser(ros::NodeHandle& private_nh);
+  GlobalLocalizationEngineFPFH_Teaser();
+  virtual ~GlobalLocalizationEngineFPFH_Teaser() override;
+
+  virtual void set_global_map(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud) override;
+  virtual GlobalLocalizationResults query(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud, int max_num_candidates) override;
+
+private:
+//  using GlobalLocalizationEngineFPFH_RANSAC::private_nh;
+
+  std::unique_ptr<MatchingCostEvaluater> evaluater;
+};
+
+}  // namespace hdl_global_localization
+
+#endif

+ 44 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/global_localization_results.hpp

@@ -0,0 +1,44 @@
+#ifndef HDL_GLOBAL_LOCALIZATION_RESULTS_HPP
+#define HDL_GLOBAL_LOCALIZATION_RESULTS_HPP
+
+#include <vector>
+#include <memory>
+#include <Eigen/Core>
+#include <Eigen/Geometry>
+
+namespace hdl_global_localization {
+
+struct GlobalLocalizationResult {
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  using Ptr = std::shared_ptr<GlobalLocalizationResult>;
+
+  GlobalLocalizationResult(double error, double inlier_fraction, const Eigen::Isometry3f& pose) : error(error), inlier_fraction(inlier_fraction), pose(pose) {}
+
+  double error;
+  double inlier_fraction;
+  Eigen::Isometry3f pose;
+};
+
+struct GlobalLocalizationResults {
+  GlobalLocalizationResults(const std::vector<GlobalLocalizationResult::Ptr>& results) : results(results) {}
+
+  GlobalLocalizationResults& sort(int max_num_candidates) {
+    auto remove_loc = std::remove_if(results.begin(), results.end(), [](const auto& result) { return result == nullptr; });
+    results.erase(remove_loc, results.end());
+
+    std::cout << "valid solutions:" << results.size() << std::endl;
+
+    // std::sort(results.begin(), results.end(), [](const auto& lhs, const auto& rhs) { return lhs->error < rhs->error; });
+    std::sort(results.begin(), results.end(), [](const auto& lhs, const auto& rhs) { return lhs->inlier_fraction > rhs->inlier_fraction; });
+    if (results.size() > max_num_candidates) {
+      results.erase(results.begin() + max_num_candidates, results.end());
+    }
+
+    return *this;
+  }
+
+  std::vector<GlobalLocalizationResult::Ptr> results;
+};
+}  // namespace hdl_global_localization
+
+#endif

+ 20 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/ransac/matching_cost_evaluater.hpp

@@ -0,0 +1,20 @@
+#ifndef HDL_GLOBAL_LOCALIZATION_MATCHING_COST_EVALUATER_HPP
+#define HDL_GLOBAL_LOCALIZATION_MATCHING_COST_EVALUATER_HPP
+
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
+
+namespace hdl_global_localization {
+
+class MatchingCostEvaluater {
+public:
+  MatchingCostEvaluater() {}
+  virtual ~MatchingCostEvaluater() {}
+
+  virtual void set_target(pcl::PointCloud<pcl::PointXYZ>::ConstPtr target, double max_correspondence_distance) = 0;
+  virtual double calc_matching_error(const pcl::PointCloud<pcl::PointXYZ>& cloud, const Eigen::Matrix4f& transformation, double* inlier_fraction) = 0;
+};
+
+}  // namespace hdl_global_localization
+
+#endif

+ 54 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/ransac/matching_cost_evaluater_flann.hpp

@@ -0,0 +1,54 @@
+#ifndef HDL_GLOBAL_LOCALIZATION_MATCHING_COST_EVALUATER_FLANN_HPP
+#define HDL_GLOBAL_LOCALIZATION_MATCHING_COST_EVALUATER_FLANN_HPP
+
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
+#include <pcl/search/kdtree.h>
+
+#include <hdl_global_localization/ransac/matching_cost_evaluater.hpp>
+
+namespace hdl_global_localization {
+
+class MatchingCostEvaluaterFlann : public MatchingCostEvaluater {
+public:
+  MatchingCostEvaluaterFlann() {}
+  virtual ~MatchingCostEvaluaterFlann() override {}
+
+  virtual void set_target(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud, double max_correspondence_distance) override {
+    max_correspondence_distance_sq = max_correspondence_distance * max_correspondence_distance;
+
+    tree.reset(new pcl::KdTreeFLANN<pcl::PointXYZ>);
+    tree->setInputCloud(cloud);
+  }
+
+  virtual double calc_matching_error(const pcl::PointCloud<pcl::PointXYZ>& cloud, const Eigen::Matrix4f& transformation, double* inlier_fraction) override {
+    int num_inliers = 0;
+    double matching_error = 0.0;
+
+    pcl::PointCloud<pcl::PointXYZ> transformed;
+    pcl::transformPointCloud(cloud, transformed, transformation);
+
+    std::vector<int> indices;
+    std::vector<float> sq_dists;
+    for (int i = 0; i < transformed.size(); i++) {
+      tree->nearestKSearch(transformed[i], 1, indices, sq_dists);
+      if (sq_dists[0] < max_correspondence_distance_sq) {
+        num_inliers++;
+        matching_error += sq_dists[0];
+      }
+    }
+
+    if (inlier_fraction) {
+      *inlier_fraction = static_cast<double>(num_inliers) / cloud.size();
+    }
+    return num_inliers ? matching_error / num_inliers : std::numeric_limits<double>::max();
+  }
+
+private:
+  double max_correspondence_distance_sq;
+  pcl::KdTreeFLANN<pcl::PointXYZ>::Ptr tree;
+};
+
+}  // namespace hdl_global_localization
+
+#endif

+ 43 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/ransac/matching_cost_evaluater_voxels.hpp

@@ -0,0 +1,43 @@
+#ifndef HDL_GLOBAL_LOCALIZATION_MATCHING_COST_EVALUATER_VOXELS_HPP
+#define HDL_GLOBAL_LOCALIZATION_MATCHING_COST_EVALUATER_VOXELS_HPP
+
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
+#include <pcl/search/kdtree.h>
+#include <pcl/common/transforms.h>
+
+#include <hdl_global_localization/ransac/voxelset.hpp>
+#include <hdl_global_localization/ransac/matching_cost_evaluater.hpp>
+
+namespace hdl_global_localization {
+
+class MatchingCostEvaluaterVoxels : public MatchingCostEvaluater {
+public:
+  MatchingCostEvaluaterVoxels() {}
+  virtual ~MatchingCostEvaluaterVoxels() override {}
+
+  virtual void set_target(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud, double max_correspondence_distance) override {
+    max_correspondence_distance_sq = max_correspondence_distance * max_correspondence_distance;
+
+    voxels.reset(new VoxelSet(max_correspondence_distance));
+    voxels->set_cloud(cloud);
+  }
+
+  virtual double calc_matching_error(const pcl::PointCloud<pcl::PointXYZ>& cloud, const Eigen::Matrix4f& transformation, double* inlier_fraction) override {
+    int num_inliers = 0;
+    double matching_error = 0.0;
+
+    pcl::PointCloud<pcl::PointXYZ> transformed;
+    pcl::transformPointCloud(cloud, transformed, transformation);
+
+    return voxels->matching_error(transformed, inlier_fraction);
+  }
+
+private:
+  double max_correspondence_distance_sq;
+  std::unique_ptr<VoxelSet> voxels;
+};
+
+}  // namespace hdl_global_localization
+
+#endif

+ 89 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/ransac/ransac_pose_estimation.hpp

@@ -0,0 +1,89 @@
+/*
+ * This RansacPoseEstimation implementation was written based on pcl::SampleConsensusPrerejective
+ *
+ * Software License Agreement (BSD License)
+ *
+ *  Point Cloud Library (PCL) - www.pointclouds.org
+ *  Copyright (c) 2010-2012, Willow Garage, Inc.
+ *  Copyright (c) 2012-, Open Perception, Inc.
+ *
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the copyright holder(s) nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $Id$
+ *
+ */
+
+#ifndef HDL_GLOBAL_LOCALIZATION_RANSAC_POSE_ESTIMATION_HPP
+#define HDL_GLOBAL_LOCALIZATION_RANSAC_POSE_ESTIMATION_HPP
+
+#include <atomic>
+#include <vector>
+#include <random>
+//#include <ros/ros.h>
+#include <pcl/point_types.h>
+#include <pcl/point_cloud.h>
+#include <pcl/search/kdtree.h>
+
+#include <hdl_global_localization/global_localization_results.hpp>
+#include <hdl_global_localization/ransac/voxelset.hpp>
+#include <hdl_global_localization/ransac/matching_cost_evaluater.hpp>
+
+namespace hdl_global_localization {
+
+template <typename FeatureT>
+class RansacPoseEstimation {
+public:
+//  RansacPoseEstimation(ros::NodeHandle& private_nh);
+  RansacPoseEstimation();
+
+  void set_target(pcl::PointCloud<pcl::PointXYZ>::ConstPtr target, typename pcl::PointCloud<FeatureT>::ConstPtr target_features);
+  void set_source(pcl::PointCloud<pcl::PointXYZ>::ConstPtr source, typename pcl::PointCloud<FeatureT>::ConstPtr source_features);
+
+  GlobalLocalizationResults estimate();
+
+private:
+  void select_samples(std::mt19937& mt, const std::vector<std::vector<int>>& similar_features, std::vector<int>& samples, std::vector<int>& correspondences) const;
+
+private:
+//  ros::NodeHandle& private_nh;
+
+  pcl::PointCloud<pcl::PointXYZ>::ConstPtr target;
+  typename pcl::PointCloud<FeatureT>::ConstPtr target_features;
+
+  pcl::PointCloud<pcl::PointXYZ>::ConstPtr source;
+  typename pcl::PointCloud<FeatureT>::ConstPtr source_features;
+
+  typename pcl::KdTreeFLANN<FeatureT>::Ptr feature_tree;
+  std::unique_ptr<MatchingCostEvaluater> evaluater;
+};
+
+}  // namespace hdl_global_localization
+
+#endif

+ 36 - 0
src/detection/detection_localization_global_hdl/include/hdl_global_localization/ransac/voxelset.hpp

@@ -0,0 +1,36 @@
+#ifndef HDL_GLOBAL_LOCALIZATION_VOXELSET_HPP
+#define HDL_GLOBAL_LOCALIZATION_VOXELSET_HPP
+
+#include <unordered_set>
+
+#include <pcl/point_types.h>
+#include <pcl/point_cloud.h>
+
+namespace hdl_global_localization {
+
+class Vector3iHash {
+public:
+  size_t operator()(const Eigen::Vector3i& x) const;
+};
+
+class VoxelSet {
+public:
+  VoxelSet(double max_correspondence_distance);
+
+  void set_cloud(pcl::PointCloud<pcl::PointXYZ>::ConstPtr cloud);
+  double matching_error(const pcl::PointCloud<pcl::PointXYZ>& cloud, double* inlier_fraction) const;
+
+private:
+  Eigen::Vector3i voxel_coord(const Eigen::Vector4f& x) const;
+  Eigen::Vector4f voxel_center(const Eigen::Vector3i& coord) const;
+
+private:
+  double resolution;
+
+  using Voxels = std::unordered_set<Eigen::Vector3i, Vector3iHash, std::equal_to<Eigen::Vector3i>, Eigen::aligned_allocator<Eigen::Vector3i>>;
+  Voxels voxels;
+};  // namespace hdl_global_localization
+
+}  // namespace hdl_global_localization
+
+#endif

+ 32 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmc.h

@@ -0,0 +1,32 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef __PMC_H__
+#define __PMC_H__
+
+#include "pmc_headers.h"
+#include "pmc_input.h"
+#include "pmc_utils.h"
+
+#include "pmc_heu.h"
+#include "pmc_maxclique.h"
+#include "pmcx_maxclique.h"
+#include "pmcx_maxclique_basic.h"
+
+#endif

+ 152 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmc_graph.h

@@ -0,0 +1,152 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMC_GRAPH_H_
+#define PMC_GRAPH_H_
+
+#include <float.h>
+#include <cstddef>
+#include <sys/time.h>
+#include <unistd.h>
+#include <iostream>
+#include <limits>
+#include "math.h"
+#include "pmc_headers.h"
+#include "pmc_utils.h"
+#include "pmc_vertex.h"
+
+
+namespace pmc {
+    class pmc_graph {
+        private:
+            // helper functions
+            void read_mtx(const string& filename);
+            void read_edges(const string& filename);
+            void read_metis(const string& filename);
+
+        public:
+            vector<int> edges;
+            vector<long long> vertices;
+            vector<int> degree;
+            int min_degree;
+            int max_degree;
+            double avg_degree;
+            bool is_gstats;
+            string fn;
+            vector<vector<bool>> adj;
+
+            // constructor
+            pmc_graph(const string& filename);
+            pmc_graph(bool graph_stats, const string& filename);
+            pmc_graph(const string& filename, bool make_adj);
+            pmc_graph(vector<long long> vs, vector<int> es) {
+                edges = std::move(es);
+                vertices = std::move(vs);
+                vertex_degrees();
+            }
+            pmc_graph(long long nedges, const int *ei, const int *ej, int offset);
+            pmc_graph(map<int,vector<int> > v_map);
+                
+            // destructor
+            ~pmc_graph();
+
+            void read_graph(const string& filename);
+            void create_adj();
+            void reduce_graph(int* &pruned);
+            void reduce_graph(
+                    vector<long long>& vs,
+                    vector<int>& es,
+                    int* &pruned,
+                    int id,
+                    int& mc);
+
+            int num_vertices() { return vertices.size() - 1; }
+            int num_edges() { return edges.size()/2; }
+            vector <long long>* get_vertices(){ return &vertices; }
+            vector<int>* get_edges(){ return &edges; }
+            vector<int>* get_degree(){ return &degree; }
+            vector<int> get_edges_array() { return edges; }
+            vector<long long> get_vertices_array() { return vertices; };
+            vector<long long> e_v, e_u, eid;
+
+            int vertex_degree(int v) { return vertices[v] - vertices[v+1]; }
+            long long first_neigh(int v) { return vertices[v]; }
+            long long last_neigh(int v) { return vertices[v+1]; }
+
+            void sum_vertex_degrees();
+            void vertex_degrees();
+            void update_degrees();
+            void update_degrees(bool flag);
+            void update_degrees(int* &pruned, int& mc);
+            double density() { return (double)num_edges() / (num_vertices() * (num_vertices() - 1.0) / 2.0); }
+            int get_max_degree() { return max_degree; }
+            int get_min_degree() { return min_degree; }
+            double get_avg_degree() { return avg_degree; }
+
+            void initialize();
+            string get_file_extension(const string& filename);
+            void basic_stats(double sec);
+            void bound_stats(int alg, int lb, pmc_graph& G);
+
+            // vertex sorter
+            void compute_ordering(vector<int>& bound, vector<int>& order);
+            void compute_ordering(string degree, vector<int>& order);
+            // edge sorters
+            void degree_bucket_sort();
+            void degree_bucket_sort(bool desc);
+
+            int max_core;
+            vector<int> kcore;
+            vector<int> kcore_order;
+            vector<int>* get_kcores() { return &kcore; }
+            vector<int>* get_kcore_ordering() { return &kcore_order; }
+            int get_max_core() { return max_core; }
+            void update_kcores(int* &pruned);
+
+            void compute_cores();
+            void induced_cores_ordering(
+                    vector<long long>& V,
+                    vector<int>& E,
+                    int* &pruned);
+
+            // clique utils
+            int initial_pruning(pmc_graph& G, int* &pruned, int lb);
+            int initial_pruning(pmc_graph& G, int* &pruned, int lb, vector<vector<bool>> &adj);
+            void order_vertices(vector<Vertex> &V, pmc_graph &G,
+                    int &lb_idx, int &lb, string vertex_ordering, bool decr_order);
+
+            void print_info(vector<int> &C_max, double &sec);
+            void print_break();
+            bool time_left(vector<int> &C_max, double sec,
+                    double time_limit, bool &time_expired_msg);
+            void graph_stats(pmc_graph& G, int& mc, int id, double &sec);
+
+            void reduce_graph(
+                    vector<long long>& vs,
+                    vector<int>& es,
+                    int* &pruned,
+                    pmc_graph& G,
+                    int id,
+                    int& mc);
+
+            bool clique_test(pmc_graph& G, vector<int> C);
+    };
+
+}
+#endif

+ 41 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmc_headers.h

@@ -0,0 +1,41 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMC_HEADERS_H_
+#define PMC_HEADERS_H_
+
+#include <iostream>
+#include <stdlib.h>
+#include <vector>
+#include <time.h>
+#include <string.h>
+#include <stdio.h>
+#include <map>
+#include <fstream>
+#include <sstream>
+#include <stdint.h>
+#include <omp.h>
+using namespace std;
+
+#ifndef LINE_LENGTH
+#define LINE_LENGTH 256
+#endif
+#define NANOSECOND 1000000000
+
+#endif

+ 86 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmc_heu.h

@@ -0,0 +1,86 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMC_HEU_H_
+#define PMC_HEU_H_
+
+#include "pmc_headers.h"
+#include "pmc_graph.h"
+#include "pmc_utils.h"
+#include "pmc_input.h"
+#include "pmc_vertex.h"
+#include <algorithm>
+
+namespace pmc {
+
+    class pmc_heu {
+        public:
+            vector<int>* E;
+            vector<long long>* V;
+            vector<int>* K;
+            vector<int>* order;
+            vector<int>* degree;
+            double sec;
+            int ub;
+            string strat;
+
+            pmc_heu(pmc_graph& G, input& params) {
+                K = G.get_kcores();
+                order = G.get_kcore_ordering();
+                ub = params.ub;
+                strat = params.heu_strat;
+                initialize();
+            }
+
+            pmc_heu(pmc_graph& G, int tmp_ub) {
+                K = G.get_kcores();
+                order = G.get_kcore_ordering();
+                ub = tmp_ub;
+                strat = "kcore";
+                initialize();
+            }
+
+            inline void initialize() {
+                sec = get_time();
+                srand (time(NULL));
+            };
+
+            int strategy(vector<int>& P);
+            void set_strategy(string s) { strat = s; }
+            int compute_heuristic(int v);
+
+            static bool desc_heur(Vertex v,  Vertex u) {
+                return (v.get_bound() > u.get_bound());
+            }
+
+            static bool incr_heur(Vertex v,  Vertex u) {
+                return (v.get_bound() < u.get_bound());
+            }
+
+            int search(pmc_graph& graph, vector<int>& C_max);
+            int search_cores(pmc_graph& graph, vector<int>& C_max, int lb);
+            int search_bounds(pmc_graph& graph, vector<int>& C_max);
+
+            inline void branch(vector<Vertex>& P, int sz,
+                    int& mc, vector<int>& C, vector<short>& ind);
+
+            inline void print_info(vector<int> C_max);
+    };
+};
+#endif

+ 193 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmc_input.h

@@ -0,0 +1,193 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMC_INPUT_H_
+#define PMC_INPUT_H_
+
+#include "pmc_headers.h"
+#include "pmc_utils.h"
+
+using namespace std;
+
+namespace pmc {
+class input {
+    public:
+        // instance variables
+        int algorithm;
+        int threads;
+        int experiment;
+        int lb;
+        int ub;
+        int param_ub;
+        int adj_limit;
+        double time_limit;
+        double remove_time;
+        bool graph_stats;
+        bool verbose;
+        bool help;
+        bool MCE;
+        bool decreasing_order;
+        string heu_strat;
+        string format;
+        string graph;
+        string output;
+        string edge_sorter;
+        string vertex_search_order;
+
+        input() {
+            // default values
+            algorithm = 0;
+            threads = omp_get_max_threads();
+            experiment = 0;
+            lb = 0;
+            ub = 0;
+            param_ub = 0;
+            adj_limit = 20000;
+            time_limit = 60 * 60; 			// max time to search
+            remove_time = 4.0; 				// time to wait before reducing graph
+            verbose = false;
+            graph_stats = false;
+            help = false;
+            MCE = false;
+            decreasing_order = false;
+            heu_strat = "kcore";
+            vertex_search_order = "deg";
+            format = "mtx";
+            graph = "data/sample.mtx";
+            output = "";
+            string edge_sorter = "";
+
+            // both off, use default alg
+            if (heu_strat == "0" && algorithm == -1)
+                algorithm = 0;
+
+            if (threads <= 0) threads = 1;
+        }
+
+        input(int argc, char *argv[]) {
+            // default values
+            algorithm = 0;
+            threads = omp_get_max_threads();
+            experiment = 0;
+            lb = 0;
+            ub = 0;
+            param_ub = 0;
+            adj_limit = 20000;
+            time_limit = 60 * 60; 			// max time to search
+            remove_time = 4.0; 				// time to wait before reducing graph
+            verbose = false;
+            graph_stats = false;
+            help = false;
+            MCE = false;
+            decreasing_order = false;
+            heu_strat = "kcore";
+            vertex_search_order = "deg";
+            format = "mtx";
+            graph = "data/sample.mtx";
+            output = "";
+            string edge_sorter = "";
+
+            int opt;
+            while ((opt=getopt(argc,argv,"i:t:f:u:l:o:e:a:r:w:h:k:dgsv")) != EOF) {
+                switch (opt) {
+                    case 'a':
+                        algorithm = atoi(optarg);
+                        if (algorithm > 9) MCE = true;
+                        break;
+                    case 't':
+                        threads = atoi(optarg);
+                        break;
+                    case 'f':
+                        graph = optarg;
+                        break;
+                    case 's':
+                        graph_stats = true;
+                        break;
+                    case 'u':
+                        param_ub = atoi(optarg); // find k-clique fast
+                        lb = 2;                  // skip heuristic
+                        break;
+                    case 'k':
+                        param_ub = atoi(optarg);
+                        lb = param_ub-1;
+                        break;
+                    case 'l':
+                        lb = atoi(optarg);
+                        break;
+                    case 'h':
+                        heu_strat = optarg;
+                        break;
+                    case 'v':
+                        verbose = 1;
+                        break;
+                    case 'w':
+                        time_limit = atof(optarg) * 60;  // convert minutes to seconds
+                        break;
+                    case 'r':
+                        remove_time = atof(optarg);
+                        break;
+                    case 'e':
+                        edge_sorter = optarg;
+                        break;
+                    case 'o':
+                        vertex_search_order = optarg;
+                        break;
+                    case 'd':
+                        // direction of which vertices are ordered
+                        decreasing_order = true;
+                        break;
+                    case '?':
+                        usage(argv[0]);
+                        break;
+                    default:
+                        usage(argv[0]);
+                        break;
+                }
+            }
+
+            // both off, use default alg
+            if (heu_strat == "0" && algorithm == -1)
+                algorithm = 0;
+
+            if (threads <= 0) threads = 1;
+
+            if (!fexists(graph.c_str())) {
+                usage(argv[0]);
+                exit(-1);
+            }
+
+            FILE* fin = fopen(graph.c_str(), "r+t");
+            if (fin == NULL) {
+                usage(argv[0]);
+                exit(-1);
+            }
+            fclose(fin);
+
+            cout << "\n\nFile Name ------------------------ " << graph.c_str() << endl;
+            if (!fexists(graph.c_str()) ) {
+                cout << "File not found!" << endl;
+                return;
+            }
+            cout << "workers: " << threads <<endl;
+            omp_set_num_threads(threads);
+        }
+
+};
+} // namespace pmc
+#endif

+ 122 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmc_maxclique.h

@@ -0,0 +1,122 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMC_MAXCLIQUE_H_
+#define PMC_MAXCLIQUE_H_
+
+#include <cstddef>
+#include <sys/time.h>
+#include <unistd.h>
+#include <iostream>
+#include <algorithm>
+#include "pmc_headers.h"
+#include "pmc_utils.h"
+#include "pmc_graph.h"
+#include "pmc_input.h"
+#include "pmc_vertex.h"
+
+using namespace std;
+
+namespace pmc {
+
+    class pmc_maxclique {
+        public:
+            vector<int>* edges;
+            vector<long long>* vertices;
+            vector<int>* bound;
+            vector<int>* order;
+            vector<int>* degree;
+            int param_ub;
+            int ub;
+            int lb;
+            double time_limit;
+            double sec;
+            double wait_time;
+            bool not_reached_ub;
+            bool time_expired_msg;
+            bool decr_order;
+
+            string vertex_ordering;
+            int edge_ordering;
+            int style_bounds;
+            int style_dynamic_bounds;
+
+            int num_threads;
+
+            void initialize() {
+                vertex_ordering = "kcore";
+                edge_ordering = 0;
+                style_bounds = 0;
+                style_dynamic_bounds = 0;
+                not_reached_ub = true;
+                time_expired_msg = true;
+                decr_order = false;
+            }
+
+            void setup_bounds(input& params) {
+                lb = params.lb;
+                ub = params.ub;
+                param_ub = params.param_ub;
+                if (param_ub == 0)
+                    param_ub = ub;
+                time_limit = params.time_limit;
+                wait_time = params.remove_time;
+                sec = get_time();
+
+                num_threads = params.threads;
+            }
+
+
+            pmc_maxclique(pmc_graph& G, input& params) {
+                bound = G.get_kcores();
+                order = G.get_kcore_ordering();
+                setup_bounds(params);
+                initialize();
+                vertex_ordering = params.vertex_search_order;
+                decr_order = params.decreasing_order;
+            }
+
+            ~pmc_maxclique() {};
+
+            int search(pmc_graph& G, vector<int>& sol);
+
+            void branch(
+                    vector<Vertex> &P,
+                    vector<short>& ind,
+                    vector<int>& C,
+                    vector<int>& C_max,
+                    int* &pruned,
+                    int& mc);
+
+
+            int search_dense(pmc_graph& G, vector<int>& sol);
+
+            void branch_dense(
+                    vector<Vertex> &P,
+                    vector<short>& ind,
+                    vector<int>& C,
+                    vector<int>& C_max,
+                    int* &pruned,
+                    int& mc,
+                    vector<vector<bool>> &adj);
+
+    };
+};
+
+#endif

+ 142 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmc_neigh_coloring.h

@@ -0,0 +1,142 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMC_NEIGH_COLORING_H_
+#define PMC_NEIGH_COLORING_H_
+
+#include "pmc_vertex.h"
+
+using namespace std;
+
+namespace pmc {
+
+    // sequential dynamic greedy coloring and sort
+    static void neigh_coloring_bound(
+            vector<long long>& vs,
+            vector<int>& es,
+            vector<Vertex> &P,
+            vector<short>& ind,
+            vector<int>& C,
+            vector<int>& C_max,
+            vector< vector<int> >& colors,
+            int* pruned,
+            int& mc) {
+
+        int j = 0, u = 0, k = 1, k_prev = 0;
+        int max_k = 1;
+        int min_k = mc - C.size() + 1;
+        colors[1].clear();   colors[2].clear();
+
+        for (int w=0; w < P.size(); w++) {
+            u = P[w].get_id();
+            k = 1, k_prev = 0;
+
+            for (long long h = vs[u]; h < vs[u + 1]; h++)  ind[es[h]] = 1;
+
+            while (k > k_prev) {
+                k_prev = k;
+                for (int i = 0; i < colors[k].size(); i++) {
+                    if (ind[colors[k][i]]) {
+                        k++;
+                        break;
+                    }
+                }
+            }
+
+            for (long long h = vs[u]; h < vs[u + 1]; h++)  ind[es[h]] = 0;
+
+            if (k > max_k) {
+                max_k = k;
+                colors[max_k+1].clear();
+            }
+
+            colors[k].push_back(u);
+            if (k < min_k) {
+                P[j].set_id(u);
+                j++;
+            }
+        }
+
+        if (j > 0)  P[j-1].set_bound(0);
+        if (min_k <= 0)  min_k = 1;
+
+        for (k = min_k; k <= max_k; k++)
+            for (int w = 0; w < colors[k].size(); w++) {
+                P[j].set_id(colors[k][w]);
+                P[j].set_bound(k);
+                j++;
+            }
+    }
+
+    // sequential dynamic greedy coloring and sort
+    static void neigh_coloring_dense(
+            vector<long long>& vs,
+            vector<int>& es,
+            vector<Vertex> &P,
+            vector<short>& ind,
+            vector<int>& C,
+            vector<int>& C_max,
+            vector< vector<int> >& colors,
+            int& mc,
+            vector<vector<bool>> &adj) {
+
+        int j = 0, u = 0, k = 1, k_prev = 0;
+        int max_k = 1;
+        int min_k = mc - C.size() + 1;
+
+        colors[1].clear();   colors[2].clear();
+
+        for (int w=0; w < P.size(); w++) {
+            u = P[w].get_id();
+            k = 1, k_prev = 0;
+
+            while (k > k_prev) {
+                k_prev = k;
+                for (int i = 0; i < colors[k].size(); i++) { //use directly, sort makes it fast!
+                    if (adj[u][colors[k][i]]) {
+                        k++;
+                        break;
+                    }
+                }
+            }
+
+            if (k > max_k) {
+                max_k = k;
+                colors[max_k+1].clear();
+            }
+
+            colors[k].push_back(u);
+            if (k < min_k) {
+                P[j].set_id(u);
+                j++;
+            }
+        }
+
+        if (j > 0)  P[j-1].set_bound(0);
+        if (min_k <= 0)  min_k = 1;
+
+        for (k = min_k; k <= max_k; k++)
+            for (int w = 0; w < colors[k].size(); w++) {
+                P[j].set_id(colors[k][w]);
+                P[j].set_bound(k);
+                j++;
+            }
+    }
+}
+#endif

+ 252 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmc_neigh_cores.h

@@ -0,0 +1,252 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMC_NEIGH_CORES_H_
+#define PMC_NEIGH_CORES_H_
+
+#include "pmc_vertex.h"
+
+using namespace std;
+
+namespace pmc {
+
+    static void neigh_cores_bound(
+            vector<long long>& vs,
+            vector<int>& es,
+            vector<Vertex> &P,
+            vector<short>& ind,
+            int& mc) {
+
+        int n = P.size() + 1;
+
+        // lookup table
+        vector<int> newids_to_actual(n, 0);
+        vector<int> vert_order(n,0);
+        vector<int> deg(n,0);
+        vector<int> pos(n,0);
+
+        // lookup table for neighbors
+        for (int v = 1; v < n; v++) ind[P[v-1].get_id()] = 1;
+
+        // compute degrees of induced neighborhood
+        int md = 0, x, u;
+        for (int v = 1; v < n; v++) { 	// for each v in P
+            u = P[v-1].get_id();
+            x = 0;
+            for (long long j=vs[u]; j<vs[u+1]; j++) { //induced degree
+                if (ind[es[j]]) x++;
+            }
+            deg[v] = x;
+            if (deg[v] > md)  md = deg[v];
+        }
+
+        int md_end = md+1;
+        vector<int> bin(md_end,0);
+        for (int v = 1; v < n; v++) bin[deg[v]]++;
+
+        int start = 1, num = 0;
+        for (int d=0; d < md_end; d++) { //for each deg, set bin to be the pos of the first vertex of that degree
+            num = bin[d];
+            bin[d] = start;
+            start = start + num;
+        }
+
+
+        for (int v=1; v<n; v++) {
+            pos[v] = bin[deg[v]];
+
+            //view this step as relabeling the vertices
+            vert_order[pos[v]] = v;
+            ind[P[v-1].get_id()] = v; 		   		// set bit for actual vertex id
+            newids_to_actual[v] = P[v-1].get_id();
+            bin[deg[v]]++;
+        }
+
+        for (int d=md; d > 1; d--)  bin[d] = bin[d-1];
+        bin[0] = 1;
+
+
+        int v_newid, v_actual, u_newid, du, pu, pw, w;
+        long long j = 0;
+        for (int i = 1; i < n; i++) {  							// neighborhood K-cores
+            v_newid = vert_order[i]; 							//relabeled id
+            v_actual = newids_to_actual[v_newid]; 				// real id
+            for (j = vs[v_actual]; j<vs[v_actual+1]; j++) {
+                if (ind[es[j]] > 0) { 							// find common induced neighbors of k
+
+                    u_newid = ind[es[j]];
+                    if (deg[u_newid] > deg[v_newid]) {
+                        du = deg[u_newid];
+                        pu = pos[u_newid];
+                        pw = bin[du];
+                        w = vert_order[pw];
+                        if (u_newid != w) {
+                            pos[u_newid] = pw;
+                            vert_order[pu] = w;
+                            pos[w] = pu;
+                            vert_order[pw] = u_newid;
+                        }
+                        bin[du]++;   deg[u_newid]--;
+                    }
+                }
+            }
+        }
+
+        // reset index
+        for (int v=1; v<n; v++) ind[P[v-1].get_id()] = 0;
+
+
+        // neighborhood core pruning and sorting
+        int id = 0, prune_vert = 0;
+        for (int i = n-1; i > 0; --i) {
+            u = vert_order[i];
+            if (deg[u]+1 > mc) {
+                P[id].set_bound(deg[u]);
+                P[id].set_id(newids_to_actual[u]);
+                id++;
+            }
+            else prune_vert++;
+        }
+
+        // remove pruned verts from P
+        for (int i = 0; i < prune_vert; i++)
+            P.pop_back();
+    }
+
+
+    static void neigh_cores_tight(
+            vector<long long>& vs,
+            vector<int>& es,
+            vector<Vertex> &P,
+            vector<short>& ind,
+            int& mc) {
+
+        int n = P.size() + 1;
+
+        // lookup table
+        vector<int> newids_to_actual(n, 0);
+        vector<int> vert_order(n,0);
+        vector<int> deg(n,0);
+        vector<int> pos(n,0);
+
+
+        // lookup table for neighbors
+        for (int v = 1; v < n; v++) ind[P[v-1].get_id()] = 1;
+
+        // compute degrees of induced neighborhood
+        int md = 0, x, u;
+        for (int v = n-1; v >= 1; v--) {
+            u = P[v-1].get_id();
+            x = 0;
+            for (long long j=vs[u]; j<vs[u+1]; j++) { //induced degree
+                if (ind[es[j]]) x++;
+            }
+            if (x >= mc) {
+                deg[v] = x;
+                if (deg[v] > md)  md = deg[v];
+            }
+            else {
+                deg[v] = 0;
+                ind[P[v-1].get_id()] = 0;
+            }
+        }
+
+        int md_end = md+1;
+        vector<int> bin(md_end,0);
+        for (int v = 1; v < n; v++) bin[deg[v]]++;
+
+        int start = 1, num = 0;
+        for (int d=0; d < md_end; d++) { //for each deg, set bin to be the pos of the first vertex of that degree
+            num = bin[d];
+            bin[d] = start;
+            start = start + num;
+        }
+
+
+        for (int v=1; v<n; v++) {
+            if (deg[v] > 0) {
+                pos[v] = bin[deg[v]];
+
+                //view this step as relabeling the vertices
+                vert_order[pos[v]] = v;
+                ind[P[v-1].get_id()] = v; 		   		// set bit for actual vertex id
+                newids_to_actual[v] = P[v-1].get_id();
+                bin[deg[v]]++;
+            }
+            else
+                ind[P[v-1].get_id()] = 0;
+        }
+
+        for (int d=md; d > 1; d--)  bin[d] = bin[d-1];
+        bin[0] = 1;
+
+
+        int v_newid, v_actual, u_newid, du, pu, pw, w;
+        long long j = 0;
+        for (int i = 1; i < n; i++) {  							// neighborhood K-cores
+            v_newid = vert_order[i];
+            if (deg[v_newid] > 0) {
+                //relabeled id
+                v_actual = newids_to_actual[v_newid]; 				// real id
+                for (j = vs[v_actual]; j<vs[v_actual+1]; j++) {
+                    if (ind[es[j]] > 0) { 							// find common induced neighbors of k
+
+                        u_newid = ind[es[j]];
+                        if (deg[u_newid] > deg[v_newid]) {
+                            du = deg[u_newid];
+                            pu = pos[u_newid];
+                            pw = bin[du];
+                            w = vert_order[pw];
+                            if (u_newid != w) {
+                                pos[u_newid] = pw;
+                                vert_order[pu] = w;
+                                pos[w] = pu;
+                                vert_order[pw] = u_newid;
+                            }
+                            bin[du]++;   deg[u_newid]--;
+                        }
+                    }
+                }
+            }
+        }
+
+        // reset index
+        for (int v=1; v<n; v++) ind[P[v-1].get_id()] = 0;
+
+
+        // neighborhood core pruning and sorting
+        int id = 0, prune_vert = 0;
+        for (int i = n-1; i > 0; --i) {
+            u = vert_order[i];
+            if (deg[u]+1 > mc) {
+                P[id].set_bound(deg[u]);
+                P[id].set_id(newids_to_actual[u]);
+                id++;
+            }
+            else prune_vert++;
+        }
+
+        // remove pruned verts from P
+        for (int i = 0; i < prune_vert; i++)
+            P.pop_back();
+    }
+}
+
+
+#endif

+ 53 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmc_utils.h

@@ -0,0 +1,53 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMC_UTILS_H_
+#define PMC_UTILS_H_
+
+#include <cstddef>
+#include <sys/time.h>
+#include <unistd.h>
+#include <iostream>
+#include "assert.h"
+#include <sys/types.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string>
+#include <set>
+#include "pmc_headers.h"
+
+
+using namespace std;
+
+bool fexists(const char *filename);
+void usage(char *argv0);
+
+double get_time();
+string memory_usage();
+
+void validate(bool condition, const string& msg);
+
+void indent(int level);
+void indent(int level, string str);
+void print_max_clique(vector<int>& max_clique_data);
+void print_n_maxcliques(set< vector<int> > C, int n);
+
+int getdir (string dir, vector<string> &files);
+
+#endif

+ 52 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmc_vertex.h

@@ -0,0 +1,52 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMC_VERTEX_H_
+#define PMC_VERTEX_H_
+
+using namespace std;
+
+namespace pmc {
+    class Vertex {
+        private:
+            int id, b;
+        public:
+            Vertex(int vertex_id, int bound): id(vertex_id), b(bound) {};
+
+            void set_id(int vid)        { id = vid; }
+            int get_id()                { return id; }
+
+            void set_bound(int value)   { b = value; }
+            int get_bound()             { return b; }
+    };
+
+    static bool decr_bound(Vertex v,  Vertex u) {
+        return (v.get_bound() > u.get_bound());
+    }
+    static bool incr_bound(Vertex v,  Vertex u) {
+        return (v.get_bound() < u.get_bound());
+    };
+
+    inline static void print_mc_info(vector<int> &C_max, double &sec) {
+        cout << "*** [pmc: thread " << omp_get_thread_num() + 1;
+        cout << "]   current max clique = " << C_max.size();
+        cout << ",  time = " << get_time() - sec << " sec" <<endl;
+    };
+};
+#endif

+ 125 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmcx_maxclique.h

@@ -0,0 +1,125 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMCX_MAXCLIQUE_H_
+#define PMCX_MAXCLIQUE_H_
+
+#include <cstddef>
+#include <sys/time.h>
+#include <unistd.h>
+#include <iostream>
+#include "pmc_headers.h"
+#include "pmc_utils.h"
+#include "pmc_graph.h"
+#include "pmc_input.h"
+#include "pmc_vertex.h"
+#include "pmc_neigh_cores.h"
+#include "pmc_neigh_coloring.h"
+#include <algorithm>
+
+using namespace std;
+
+namespace pmc {
+
+    class pmcx_maxclique {
+        public:
+            vector<int>* edges;
+            vector<long long>* vertices;
+            vector<int>* bound;
+            vector<int>* order;
+            int param_ub;
+            int ub;
+            int lb;
+            double time_limit;
+            double sec;
+            double wait_time;
+            bool not_reached_ub;
+            bool time_expired_msg;
+            bool decr_order;
+
+            string vertex_ordering;
+            int edge_ordering;
+            int style_bounds;
+            int style_dynamic_bounds;
+
+            int num_threads;
+
+            void initialize() {
+                vertex_ordering = "deg";
+                edge_ordering = 0;
+                style_bounds = 0;
+                style_dynamic_bounds = 0;
+                not_reached_ub = true;
+                time_expired_msg = true;
+                decr_order = false;
+            }
+
+            void setup_bounds(input& params) {
+                lb = params.lb;
+                ub = params.ub;
+                param_ub = params.param_ub;
+                if (param_ub == 0)
+                    param_ub = ub;
+                time_limit = params.time_limit;
+                wait_time = params.remove_time;
+                sec = get_time();
+
+                num_threads = params.threads;
+            }
+
+            pmcx_maxclique(pmc_graph& G, input& params) {
+                bound = G.get_kcores();
+                order = G.get_kcore_ordering();
+                setup_bounds(params);
+                initialize();
+                vertex_ordering = params.vertex_search_order;
+                decr_order = params.decreasing_order;
+            }
+
+            ~pmcx_maxclique() {};
+
+            int search(pmc_graph& G, vector<int>& sol);
+            inline void branch(
+                    vector<long long>& vs,
+                    vector<int>& es,
+                    vector<Vertex> &P,
+                    vector<short>& ind,
+                    vector<int>& C,
+                    vector<int>& C_max,
+                    vector< vector<int> >& colors,
+                    int* &pruned,
+                    int& mc);
+
+            int search_dense(pmc_graph& G, vector<int>& sol);
+            inline void branch_dense(
+                    vector<long long>& vs,
+                    vector<int>& es,
+                    vector<Vertex> &P,
+                    vector<short>& ind,
+                    vector<int>& C,
+                    vector<int>& C_max,
+                    vector< vector<int> >& colors,
+                    int* &pruned,
+                    int& mc,
+                    vector<vector<bool>> &adj);
+
+    };
+};
+
+#endif

+ 130 - 0
src/detection/detection_localization_global_hdl/include/pmc/pmcx_maxclique_basic.h

@@ -0,0 +1,130 @@
+/**
+ ============================================================================
+ Name        : Parallel Maximum Clique (PMC) Library
+ Author      : Ryan A. Rossi   (rrossi@purdue.edu)
+ Description : A general high-performance parallel framework for computing
+               maximum cliques. The library is designed to be fast for large
+               sparse graphs.
+
+ Copyright (C) 2012-2013, Ryan A. Rossi, All rights reserved.
+
+ Please cite the following paper if used:
+   Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa
+   Patwary, A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
+   and Temporal Strong Components, arXiv preprint 1302.6256, 2013.
+
+ See http://ryanrossi.com/pmc for more information.
+ ============================================================================
+ */
+
+#ifndef PMCX_MAXCLIQUE_BASIC_H_
+#define PMCX_MAXCLIQUE_BASIC_H_
+
+#include <cstddef>
+#include <sys/time.h>
+#include <unistd.h>
+#include <iostream>
+#include <algorithm>
+#include "pmc_headers.h"
+#include "pmc_utils.h"
+#include "pmc_graph.h"
+#include "pmc_input.h"
+#include "pmc_vertex.h"
+#include "pmc_neigh_cores.h"
+#include "pmc_neigh_coloring.h"
+
+using namespace std;
+
+namespace pmc {
+
+    class pmcx_maxclique_basic {
+        public:
+            vector<int>* edges;
+            vector<long long>* vertices;
+            vector<int>* bound;
+            vector<int>* order;
+            vector<int>* degree;
+            int param_ub;
+            int ub;
+            int lb;
+            double time_limit;
+            double sec;
+            double wait_time;
+            bool not_reached_ub;
+            bool time_expired_msg;
+            bool decr_order;
+
+            string vertex_ordering;
+            int edge_ordering;
+            int style_bounds;
+            int style_dynamic_bounds;
+
+            int num_threads;
+
+            void initialize() {
+                vertex_ordering = "deg";
+                edge_ordering = 0;
+                style_bounds = 0;
+                style_dynamic_bounds = 0;
+                not_reached_ub = true;
+                time_expired_msg = true;
+                decr_order = false;
+            }
+
+            void setup_bounds(input& params) {
+                lb = params.lb;
+                ub = params.ub;
+                param_ub = params.param_ub;
+                if (param_ub == 0)
+                    param_ub = ub;
+                time_limit = params.time_limit;
+                wait_time = params.remove_time;
+                sec = get_time();
+
+                num_threads = params.threads;
+            }
+
+
+            pmcx_maxclique_basic(pmc_graph& G, input& params) {
+                bound = G.get_kcores();
+                order = G.get_kcore_ordering();
+                setup_bounds(params);
+                initialize();
+                vertex_ordering = params.vertex_search_order;
+                decr_order = params.decreasing_order;
+            }
+
+            ~pmcx_maxclique_basic() {};
+
+            int search(pmc_graph& G, vector<int>& sol);
+
+            void branch(
+                    vector<long long>& vs,
+                    vector<int>& es,
+                    vector<Vertex> &P,
+                    vector<short>& ind,
+                    vector<int>& C,
+                    vector<int>& C_max,
+                    vector< vector<int> >& colors,
+                    int* &pruned,
+                    int& mc);
+
+
+            int search_dense(pmc_graph& G, vector<int>& sol);
+
+            void branch_dense(
+                    vector<long long>& vs,
+                    vector<int>& es,
+                    vector<Vertex> &P,
+                    vector<short>& ind,
+                    vector<int>& C,
+                    vector<int>& C_max,
+                    vector< vector<int> >& colors,
+                    int* &pruned,
+                    int& mc,
+                    vector<vector<bool>> &adj);
+
+    };
+};
+
+#endif

+ 240 - 0
src/detection/detection_localization_global_hdl/include/teaser/certification.h

@@ -0,0 +1,240 @@
+/**
+ * Copyright (c) 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#pragma once
+
+#include <Eigen/Core>
+#include <Eigen/Geometry>
+#include <Eigen/SparseCore>
+
+namespace teaser {
+
+using SparseMatrix = Eigen::SparseMatrix<double, Eigen::ColMajor, int64_t>;
+
+struct CertificationResult {
+  bool is_optimal = false;
+  double best_suboptimality = -1;
+  std::vector<double> suboptimality_traj;
+};
+
+/**
+ * Abstract virtual class representing certification of registration results
+ */
+class AbstractRotationCertifier {
+public:
+  virtual ~AbstractRotationCertifier() {}
+
+  /**
+   * Abstract function for certifying rotation estimation results
+   * @param rotation_solution [in] a solution to the rotatoin registration problem
+   * @param src [in] Assume dst = R * src
+   * @param dst [in] Assume dst = R * src
+   * @param theta [in] a binary vector indicating inliers vs. outliers
+   * @return  relative sub-optimality gap
+   */
+  virtual CertificationResult certify(const Eigen::Matrix3d& rotation_solution,
+                                      const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                                      const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                                      const Eigen::Matrix<bool, 1, Eigen::Dynamic>& theta) = 0;
+};
+
+/**
+ * A rotation registration certifier using Douglas–Rachford Splitting (DRS)
+ *
+ * Please refer to:
+ * [1] H. Yang, J. Shi, and L. Carlone, “TEASER: Fast and Certifiable Point Cloud Registration,”
+ * arXiv:2001.07715 [cs, math], Jan. 2020.
+ */
+class DRSCertifier : public AbstractRotationCertifier {
+public:
+
+  /**
+   * Solver for eigendecomposition solver / spectral decomposition.
+   *
+   * @brief For most cases, the default solvers in Eigen should be used.
+   * For extremely large matrices, it may make sense to use Spectra instead.
+   */
+  enum class EIG_SOLVER_TYPE {
+    EIGEN = 0, ///< Use solvers in the Eigen library
+    SPECTRA = 1, ///< Use solvers in the Spectra library
+  };
+
+  /**
+   * Parameter struct for DRSCertifier
+   */
+  struct Params {
+    /**
+     * Noise bound for the vectors used for certification
+     */
+    double noise_bound = 0.01;
+
+    /**
+     * Square of the ratio between acceptable noise and noise bound. Usually set to 1.
+     */
+    double cbar2 = 1;
+
+    /**
+     * Suboptimality gap
+     *
+     * This is not a percentage. Multiply by 100 to get a percentage.
+     */
+    double sub_optimality = 1e-3;
+
+    /**
+     * Maximum iterations allowed
+     */
+    double max_iterations = 2e2;
+
+    /**
+     * Gamma value (refer to [1] for details)
+     */
+    double gamma_tau = 1.999999;
+
+    /**
+     * Solver for eigendecomposition / spectral decomposition
+     */
+    EIG_SOLVER_TYPE eig_decomposition_solver = EIG_SOLVER_TYPE::EIGEN;
+  };
+
+  DRSCertifier() = delete;
+
+  /**
+   * Constructor for DRSCertifier that takes in a parameter struct
+   * @param params [in] struct holding all parameters
+   */
+  DRSCertifier(const Params& params) : params_(params) {};
+
+  /**
+   * Constructor for DRSCertifier
+   * @param noise_bound [in] bound on the noise
+   * @param cbar2 [in] maximal allowed residual^2 to noise bound^2 ratio, usually set to 1
+   */
+  DRSCertifier(double noise_bound, double cbar2) {
+    params_.noise_bound = noise_bound;
+    params_.cbar2 = cbar2;
+  };
+
+  /**
+   * Main certification function
+   *
+   * @param R_solution [in] a feasible rotation solution
+   * @param src [in] vectors under rotation
+   * @param dst [in] vectors after rotation
+   * @param theta [in] binary (1 vs. 0) vector indicating inliers vs. outliers
+   * @return  relative sub-optimality gap
+   */
+  CertificationResult certify(const Eigen::Matrix3d& R_solution,
+                              const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                              const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                              const Eigen::Matrix<bool, 1, Eigen::Dynamic>& theta) override;
+
+  /**
+   * Main certification function
+   *
+   * @param R_solution [in] a feasible rotation solution
+   * @param src [in] vectors under rotation
+   * @param dst [in] vectors after rotation
+   * @param theta [in] binary (1 vs. -1) vector indicating inliers vs. outliers
+   * @return  relative sub-optimality gap
+   */
+  CertificationResult certify(const Eigen::Matrix3d& R_solution,
+                              const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                              const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                              const Eigen::Matrix<double, 1, Eigen::Dynamic>& theta);
+
+  /**
+   * Compute sub-optimality gap
+   * @param M
+   * @param mu
+   * @param N
+   * @return
+   */
+  double computeSubOptimalityGap(const Eigen::MatrixXd& M, double mu, int N);
+
+  /**
+   * Get the Omega_1 matrix given a quaternion
+   * @param q an Eigen quaternion
+   * @param omega1 4-by-4 omega_1 matrix
+   */
+  Eigen::Matrix4d getOmega1(const Eigen::Quaterniond& q);
+
+  /**
+   * Get a 4-by-4 block diagonal matrix with each block represents omega_1
+   * @param q
+   * @param theta
+   * @param D_omega
+   */
+  void getBlockDiagOmega(int Npm, const Eigen::Quaterniond& q,
+                         Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>* D_omega);
+
+  /**
+   * Get Q cost matrix (see Proposition 10 in [1])
+   * @param v1 vectors under rotation
+   * @param v2 vectors after rotation
+   */
+  void getQCost(const Eigen::Matrix<double, 3, Eigen::Dynamic>& v1,
+                const Eigen::Matrix<double, 3, Eigen::Dynamic>& v2,
+                Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>* Q);
+
+  /**
+   * Given an arbitrary matrix W, project W to the correct dual structure
+    (1) off-diagonal blocks must be skew-symmetric
+    (2) diagonal blocks must satisfy W_00 = - sum(W_ii)
+    (3) W_dual must also satisfy complementary slackness (because M_init satisfies complementary
+   slackness) This projection is optimal in the sense of minimum Frobenious norm
+   */
+  void getOptimalDualProjection(const Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>& W,
+                                const Eigen::Matrix<double, 1, Eigen::Dynamic>& theta_prepended,
+                                const SparseMatrix& A_inv, Eigen::MatrixXd* W_dual);
+
+  /**
+   * Generate an initial guess (see Appendix U of [1]).
+   *
+   * The initial guess satisfies:
+   * 1. KKT complementary slackness
+   * 2. diagonal blocks of (Q - Lambda_guess) is PSD (except the first diagonal block)
+   *
+   * @param R [in] rotation matrix
+   * @param theta [in] a binary (1 & -1) vector indicating inliers vs. outliers
+   * @param src [in]
+   * @param dst [in]
+   * @param lambda_guess [out]
+   */
+  void getLambdaGuess(const Eigen::Matrix<double, 3, 3>& R,
+                      const Eigen::Matrix<double, 1, Eigen::Dynamic>& theta,
+                      const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                      const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                      SparseMatrix* lambda_guess);
+
+  /**
+   * Calculate the inverse of the linear projection matrix A mentioned in Theorem 35 of our TEASER
+   * paper [1].
+   *
+   * @param theta_prepended [in] a binary (1 & -1) vector indicating inliers vs. outliers, with 1
+   * prepended
+   * @param A_inv [out] inverse of A
+   */
+  void getLinearProjection(const Eigen::Matrix<double, 1, Eigen::Dynamic>& theta_prepended,
+                           SparseMatrix* A_inv);
+
+private:
+  /**
+   * Calculate the sum of a block (4-by-N) along the columns
+   * @param A [in]
+   * @param row [in]
+   * @param theta [in]
+   * @param output [out]
+   */
+  void getBlockRowSum(const Eigen::MatrixXd& A, const int& row,
+                      const Eigen::Matrix<double, 1, Eigen::Dynamic>& theta,
+                      Eigen::Vector4d* output);
+
+  Params params_;
+};
+
+} // namespace teaser

+ 80 - 0
src/detection/detection_localization_global_hdl/include/teaser/fpfh.h

@@ -0,0 +1,80 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#pragma once
+
+#include <boost/smart_ptr/shared_ptr.hpp>
+#include <pcl/features/fpfh.h>
+
+#include "teaser/geometry.h"
+
+namespace teaser {
+
+using FPFHCloud = pcl::PointCloud<pcl::FPFHSignature33>;
+using FPFHCloudPtr = pcl::PointCloud<pcl::FPFHSignature33>::Ptr;
+
+class FPFHEstimation {
+public:
+  FPFHEstimation()
+      : fpfh_estimation_(
+            new pcl::FPFHEstimation<pcl::PointXYZ, pcl::Normal, pcl::FPFHSignature33>){};
+
+  /**
+   * Compute FPFH features.
+   *
+   * @return A shared pointer to the FPFH feature point cloud
+   * @param input_cloud
+   * @param normal_search_radius Radius for estimating normals
+   * @param fpfh_search_radius Radius for calculating FPFH (needs to be at least normalSearchRadius)
+   */
+  FPFHCloudPtr computeFPFHFeatures(const PointCloud& input_cloud, double normal_search_radius = 0.03,
+                                        double fpfh_search_radius = 0.05);
+
+  /**
+   * Return the pointer to the underlying pcl::FPFHEstimation object
+   * @return
+   */
+  inline pcl::FPFHEstimation<pcl::PointXYZ, pcl::Normal, pcl::FPFHSignature33>::Ptr
+  getImplPointer() const {
+    return fpfh_estimation_;
+  }
+
+private:
+  pcl::FPFHEstimation<pcl::PointXYZ, pcl::Normal, pcl::FPFHSignature33>::Ptr fpfh_estimation_;
+
+  /**
+   * Wrapper function for the corresponding PCL function.
+   * @param output_cloud
+   */
+  void compute(pcl::PointCloud<pcl::FPFHSignature33>& output_cloud);
+
+  /**
+   * Wrapper function for the corresponding PCL function.
+   * @param input_cloud
+   */
+  void setInputCloud(pcl::PointCloud<pcl::PointXYZ>::Ptr input_cloud);
+
+  /**
+   * Wrapper function for the corresponding PCL function.
+   * @param input_normals
+   */
+  void setInputNormals(pcl::PointCloud<pcl::Normal>::Ptr input_normals);
+
+  /**
+   * Wrapper function for the corresponding PCL function.
+   * @param search_method
+   */
+  void setSearchMethod(pcl::search::KdTree<pcl::PointXYZ>::Ptr search_method);
+
+  /**
+   * Wrapper function for the corresponding PCL function.
+   */
+  void setRadiusSearch(double);
+};
+
+} // namespace teaser

+ 73 - 0
src/detection/detection_localization_global_hdl/include/teaser/geometry.h

@@ -0,0 +1,73 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#pragma once
+
+#include <vector>
+
+namespace teaser {
+
+struct PointXYZ {
+  float x;
+  float y;
+  float z;
+
+  friend inline bool operator==(const PointXYZ& lhs, const PointXYZ& rhs) {
+    return (lhs.x == rhs.x) && (lhs.y == rhs.y) && (lhs.z == rhs.z);
+  }
+  friend inline bool operator!=(const PointXYZ& lhs, const PointXYZ& rhs) { return !(lhs == rhs); }
+};
+
+class PointCloud {
+public:
+  /**
+   * @brief Default constructor for PointCloud
+   */
+  PointCloud() = default;
+
+  // c++ container named requirements
+  using value_type = PointXYZ;
+  using reference = PointXYZ&;
+  using const_reference = const PointXYZ&;
+  using difference_type = std::vector<PointXYZ>::difference_type;
+  using size_type = std::vector<PointXYZ>::size_type;
+
+  // iterators
+  using iterator = std::vector<PointXYZ>::iterator;
+  using const_iterator = std::vector<PointXYZ>::const_iterator;
+  inline iterator begin() { return points_.begin(); }
+  inline iterator end() { return points_.end(); }
+  inline const_iterator begin() const { return points_.begin(); }
+  inline const_iterator end() const { return points_.end(); }
+
+  // capacity
+  inline size_t size() const { return points_.size(); }
+  inline void reserve(size_t n) { points_.reserve(n); }
+  inline bool empty() { return points_.empty(); }
+
+  // element access
+  inline PointXYZ& operator[](size_t i) { return points_[i]; }
+  inline const PointXYZ& operator[](size_t i) const { return points_[i]; }
+  inline PointXYZ& at(size_t n) { return points_.at(n); }
+  inline const PointXYZ& at(size_t n) const { return points_.at(n); }
+  inline PointXYZ& front() { return points_.front(); }
+  inline const PointXYZ& front() const { return points_.front(); }
+  inline PointXYZ& back() { return points_.back(); }
+  inline const PointXYZ& back() const { return points_.back(); }
+
+  inline void push_back(const PointXYZ& pt) { points_.push_back(pt); }
+  inline void push_back(PointXYZ& pt) { points_.push_back(pt); }
+
+  inline void clear() { points_.clear(); }
+
+private:
+  std::vector<PointXYZ> points_;
+};
+
+
+} // namespace teaser

+ 276 - 0
src/detection/detection_localization_global_hdl/include/teaser/graph.h

@@ -0,0 +1,276 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#pragma once
+
+#include <unordered_set>
+#include <map>
+#include <vector>
+
+#include <Eigen/Core>
+
+#include "teaser/macros.h"
+
+namespace teaser {
+
+/**
+ * A simple undirected graph class
+ *
+ * This graph assumes that vertices are numbered. In addition, the vertices numbers have to be
+ * consecutive starting from 0.
+ *
+ * For example, if the graph have 3 vertices, they have to be named 0, 1, and 2.
+ */
+class Graph {
+public:
+  Graph() : num_edges_(0){};
+
+  /**
+   * Constructor that takes in an adjacency list. Notice that for an edge connecting two arbitrary
+   * vertices v1 & v2, we assume that v2 exists in v1's list, and v1 also exists in v2's list. This
+   * condition is not enforced. If violated, removeEdge() function might exhibit undefined
+   * behaviors.
+   * @param [in] adj_list an map representing an adjacency list
+   */
+  explicit Graph(const std::map<int, std::vector<int>>& adj_list) {
+    adj_list_.resize(adj_list.size());
+    num_edges_ = 0;
+    for (const auto& e_list : adj_list) {
+      const auto& v = e_list.first;
+      adj_list_[e_list.first] = e_list.second;
+      num_edges_ += e_list.second.size();
+    }
+    num_edges_ /= 2;
+  };
+
+  /**
+   * Add a vertex with no edges.
+   * @param [in] id the id of vertex to be added
+   */
+  void addVertex(const int& id) {
+    if (id < adj_list_.size()) {
+      TEASER_DEBUG_ERROR_MSG("Vertex already exists.");
+    } else {
+      adj_list_.resize(id + 1);
+    }
+  }
+
+  /**
+   * Populate the graph with the provided number of vertices without any edges.
+   * @param num_vertices
+   */
+  void populateVertices(const int& num_vertices) { adj_list_.resize(num_vertices); }
+
+  /**
+   * Return true if said edge exists
+   * @param [in] vertex_1
+   * @param [in] vertex_2
+   */
+  bool hasEdge(const int& vertex_1, const int& vertex_2) {
+    if (vertex_1 >= adj_list_.size() || vertex_2 >= adj_list_.size()) {
+      return false;
+    }
+    auto& connected_vs = adj_list_[vertex_1];
+    bool exists =
+        std::find(connected_vs.begin(), connected_vs.end(), vertex_2) != connected_vs.end();
+    return exists;
+  }
+
+  /**
+   * Return true if the vertex exists.
+   * @param vertex
+   * @return
+   */
+  bool hasVertex(const int& vertex) { return vertex < adj_list_.size(); }
+
+  /**
+   * Add an edge between two vertices
+   * @param [in] vertex_1 one vertex of the edge
+   * @param [in] vertex_2 another vertex of the edge
+   */
+  void addEdge(const int& vertex_1, const int& vertex_2) {
+    if (hasEdge(vertex_1, vertex_2)) {
+      TEASER_DEBUG_ERROR_MSG("Edge exists.");
+      return;
+    }
+    adj_list_[vertex_1].push_back(vertex_2);
+    adj_list_[vertex_2].push_back(vertex_1);
+    num_edges_++;
+  }
+
+  /**
+   * Remove the edge between two vertices.
+   * @param [in] vertex_1 one vertex of the edge
+   * @param [in] vertex_2 another vertex of the edge
+   */
+  void removeEdge(const int& vertex_1, const int& vertex_2) {
+    if (vertex_1 >= adj_list_.size() || vertex_2 >= adj_list_.size()) {
+      TEASER_DEBUG_ERROR_MSG("Trying to remove non-existent edge.");
+      return;
+    }
+
+    adj_list_[vertex_1].erase(
+        std::remove(adj_list_[vertex_1].begin(), adj_list_[vertex_1].end(), vertex_2),
+        adj_list_[vertex_1].end());
+    adj_list_[vertex_2].erase(
+        std::remove(adj_list_[vertex_2].begin(), adj_list_[vertex_2].end(), vertex_1),
+        adj_list_[vertex_2].end());
+
+    num_edges_--;
+  }
+
+  /**
+   * Get the number of vertices
+   * @return total number of vertices
+   */
+  [[nodiscard]] int numVertices() const { return adj_list_.size(); }
+
+  /**
+   * Get the number of edges
+   * @return total number of edges
+   */
+  [[nodiscard]] int numEdges() const { return num_edges_; }
+
+  /**
+   * Get edges originated from a specific vertex
+   * @param [in] id
+   * @return an unordered set of edges
+   */
+  [[nodiscard]] const std::vector<int>& getEdges(int id) const { return adj_list_[id]; }
+
+  /**
+   * Get all vertices
+   * @return a vector of all vertices
+   */
+  [[nodiscard]] std::vector<int> getVertices() const {
+    std::vector<int> v;
+    for (int i = 0; i < adj_list_.size(); ++i) {
+      v.push_back(i);
+    }
+    return v;
+  }
+
+  [[nodiscard]] Eigen::MatrixXi getAdjMatrix() const {
+    const int num_v = numVertices();
+    Eigen::MatrixXi adj_matrix(num_v, num_v);
+    for (size_t i = 0; i < num_v; ++i) {
+      const auto& c_edges = getEdges(i);
+      for (size_t j = 0; j < num_v; ++j) {
+        if (std::find(c_edges.begin(), c_edges.end(), j) != c_edges.end()) {
+          adj_matrix(i, j) = 1;
+        } else {
+          adj_matrix(i, j) = 0;
+        }
+      }
+    }
+    return adj_matrix;
+  }
+
+  [[nodiscard]] std::vector<std::vector<int>> getAdjList() const { return adj_list_; }
+
+  /**
+   * Preallocate spaces for vertices
+   * @param num_vertices
+   */
+  void reserve(const int& num_vertices) { adj_list_.reserve(num_vertices); }
+
+  /**
+   * Clear the contents of the graph
+   */
+  void clear() {
+    adj_list_.clear();
+    num_edges_ = 0;
+  }
+
+  /**
+   * Reserve space for complete graph. A complete undirected graph should have N*(N-1)/2 edges
+   * @param num_vertices
+   */
+  void reserveForCompleteGraph(const int& num_vertices) {
+    adj_list_.reserve(num_vertices);
+    for (int i = 0; i < num_vertices - 1; ++i) {
+      std::vector<int> c_edges;
+      c_edges.reserve(num_vertices - 1);
+      adj_list_.push_back(c_edges);
+    }
+    adj_list_.emplace_back(std::initializer_list<int>{});
+  }
+
+private:
+  std::vector<std::vector<int>> adj_list_;
+  size_t num_edges_;
+};
+
+/**
+ * A facade to the Parallel Maximum Clique (PMC) library.
+ *
+ * For details about PMC, please refer to:
+ * https://github.com/ryanrossi/pmc
+ * and
+ * Ryan A. Rossi, David F. Gleich, Assefaw H. Gebremedhin, Md. Mostofa Patwary, A Fast Parallel
+ * Maximum Clique Algorithm for Large Sparse Graphs and Temporal Strong Components, arXiv preprint
+ * 1302.6256, 2013.
+ */
+class MaxCliqueSolver {
+public:
+  /**
+   * Enum representing the solver algorithm to use
+   */
+  enum class CLIQUE_SOLVER_MODE {
+    PMC_EXACT = 0,
+    PMC_HEU = 1,
+    KCORE_HEU = 2,
+  };
+
+  /**
+   * Parameter struct for MaxCliqueSolver
+   */
+  struct Params {
+
+    /**
+     * Algorithm used for finding max clique.
+     */
+    CLIQUE_SOLVER_MODE solver_mode = CLIQUE_SOLVER_MODE::PMC_EXACT;
+
+    /**
+     * \deprecated Use solver_mode instead
+     * Set this to false to enable heuristic-only max clique finding.
+     */
+    bool solve_exactly = true;
+
+    /**
+     * The threshold ratio for determining whether to skip max clique and go straightly to
+     * GNC rotation estimation. Set this to 1 to always use exact max clique selection, 0 to always
+     * skip exact max clique selection.
+     */
+    double kcore_heuristic_threshold = 1;
+
+    /**
+     * Time limit on running the solver.
+     */
+    double time_limit = 3600;
+  };
+
+  MaxCliqueSolver() = default;
+
+  MaxCliqueSolver(Params params) : params_(params){};
+
+  /**
+   * Find the maximum clique within the graph provided. By maximum clique, it means the clique of
+   * the largest size in an undirected graph.
+   * @param graph
+   * @return a vector of indices of cliques
+   */
+  std::vector<int> findMaxClique(Graph graph);
+
+private:
+  Graph graph_;
+  Params params_;
+};
+
+} // namespace teaser

+ 101 - 0
src/detection/detection_localization_global_hdl/include/teaser/linalg.h

@@ -0,0 +1,101 @@
+/**
+ * Copyright (c) 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#pragma once
+
+#include <iostream>
+
+#include <Eigen/Core>
+#include <Eigen/SparseCore>
+#include <Eigen/Eigenvalues>
+
+namespace teaser {
+
+/**
+ * Return the hat map of the provided vector (a skew symmetric matrix).
+ * @param u 3-by-1 vector
+ * @param x 3-by-3 skew symmetric matrix
+ */
+Eigen::Matrix<double, 3, 3> hatmap(const Eigen::Matrix<double, 3, 1>& u) {
+  Eigen::Matrix<double, 3, 3> x;
+  // clang-format off
+  x << 0,           -u(2),  u(1),
+      u(2),   0,          -u(0),
+      -u(1),  u(0),  0;
+  // clang-format on
+  return x;
+}
+
+/**
+ * Vector-vector kronecker product function with fixed-size output
+ * @tparam NumT
+ * @tparam N size of the first vector
+ * @tparam M size of the second vector
+ * @param v1 [in] first vector
+ * @param v2 [in] second vector
+ * @param output [out] output vector
+ */
+template <typename NumT, int N, int M>
+void vectorKron(const Eigen::Matrix<NumT, N, 1>& v1, const Eigen::Matrix<NumT, M, 1>& v2,
+                Eigen::Matrix<NumT, N * M, 1>* output) {
+#pragma omp parallel for collapse(2) shared(v1, v2, output) default(none)
+  for (size_t i = 0; i < N; ++i) {
+    for (size_t j = 0; j < M; ++j) {
+      (*output)[i * M + j] = v1[i] * v2[j];
+    }
+  }
+}
+
+/**
+ * Vector-vector kronecker product function with dynamic-size output
+ * @tparam NumT numerical type for Eigen matrices (double, float, etc.)
+ * @param v1 [in] first vector
+ * @param v2 [in] second vector
+ * @return Result of kronecker product
+ */
+template <typename NumT, int N, int M>
+Eigen::Matrix<NumT, Eigen::Dynamic, 1> vectorKron(const Eigen::Matrix<NumT, N, 1>& v1,
+                                                  const Eigen::Matrix<NumT, M, 1>& v2) {
+  Eigen::Matrix<double, Eigen::Dynamic, 1> output(v1.rows() * v2.rows(), 1);
+#pragma omp parallel for collapse(2) shared(v1, v2, output) default(none)
+  for (size_t i = 0; i < v1.rows(); ++i) {
+    for (size_t j = 0; j < v2.rows(); ++j) {
+      output[i * v2.rows() + j] = v1[i] * v2[j];
+    }
+  }
+  return output;
+}
+
+/**
+ * Find the nearest (in Frobenius norm) Symmetric Positive Definite matrix to A
+ *
+ * See: https://www.sciencedirect.com/science/article/pii/0024379588902236
+ *
+ * @tparam NumT numerical type for Eigen matrices (double, float, etc.)
+ * @param A [in] input matrix
+ * @param nearestPSD [out] output neaest positive semi-definite matrix
+ * @param eig_threshold [in] optional threshold of determining the smallest eigen values
+ */
+template <typename NumT>
+void getNearestPSD(const Eigen::Matrix<NumT, Eigen::Dynamic, Eigen::Dynamic>& A,
+                   Eigen::Matrix<NumT, Eigen::Dynamic, Eigen::Dynamic>* nearestPSD) {
+  assert(A.rows() == A.cols());
+  nearestPSD->resize(A.rows(), A.cols());
+
+  // symmetrize A into B
+  Eigen::MatrixXd B = (A + A.transpose()) / 2;
+
+  // eigendecomposition of B
+  Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eig_B(B);
+  Eigen::VectorXd De = eig_B.eigenvalues();
+  Eigen::MatrixXd De_positive = (De.array() < 0).select(0, De).asDiagonal();
+  Eigen::MatrixXd Ve = eig_B.eigenvectors();
+  *nearestPSD = Ve * De_positive * Ve.transpose();
+}
+
+} // namespace teaser

+ 69 - 0
src/detection/detection_localization_global_hdl/include/teaser/macros.h

@@ -0,0 +1,69 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#pragma once
+
+#include <iostream>
+
+#define TEASER_DEBUG_ERROR_VAR(x)                                                                  \
+  do {                                                                                             \
+    std::cerr << #x << " = " << x << std::endl;                                                    \
+  } while (0)
+
+#define TEASER_INFO_MSG(x)                                                                         \
+  do {                                                                                             \
+    std::cout << x;                                                                                \
+  } while (0)
+
+#define TEASER_INFO_MSG_THROTTLE(x, i, N)                                                          \
+  do {                                                                                             \
+    if (i % N == 0) {                                                                              \
+      std::cout << x;                                                                              \
+    }                                                                                              \
+  } while (0)
+
+#if defined(NDEBUG) && !defined(TEASER_DIAG_PRINT)
+// Use NOOP to turn off the defined debug macros
+#define TEASER_DEBUG_ERROR_MSG(x)                                                                  \
+  do {                                                                                             \
+  } while (0)
+#define TEASER_DEBUG_INFO_MSG(x)                                                                   \
+  do {                                                                                             \
+  } while (0)
+// Timing macros
+#define TEASER_DEBUG_DECLARE_TIMING(s)                                                             \
+  do {                                                                                             \
+  } while (0)
+#define TEASER_DEBUG_START_TIMING(s)                                                               \
+  do {                                                                                             \
+  } while (0)
+#define TEASER_DEBUG_STOP_TIMING(s)                                                                \
+  do {                                                                                             \
+  } while (0)
+#define TEASER_DEBUG_GET_TIMING(s)                                                                 \
+  do {                                                                                             \
+  } while (0)
+#else
+// Debug messages
+#define TEASER_DEBUG_ERROR_MSG(x)                                                                  \
+  do {                                                                                             \
+    std::cerr << x << std::endl;                                                                   \
+  } while (0)
+#define TEASER_DEBUG_INFO_MSG(x)                                                                   \
+  do {                                                                                             \
+    std::cout << x << std::endl;                                                                   \
+  } while (0)
+// Timing macros
+#define TEASER_DEBUG_DECLARE_TIMING(s) std::chrono::steady_clock clock_##s;
+#define TEASER_DEBUG_START_TIMING(s) auto t_start_##s = clock_##s.now();
+#define TEASER_DEBUG_STOP_TIMING(s)                                                                \
+  auto t_end_##s = clock_##s.now();                                                                \
+  std::chrono::duration<double, std::milli> diff_dur_##s = t_end_##s - t_start_##s;                \
+  double diff_##s = diff_dur_##s.count();
+#define TEASER_DEBUG_GET_TIMING(s)(double)(diff_##s / 1000.0)
+#endif

+ 65 - 0
src/detection/detection_localization_global_hdl/include/teaser/matcher.h

@@ -0,0 +1,65 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#pragma once
+
+#include <flann/flann.hpp>
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
+
+#include "teaser/geometry.h"
+#include "fpfh.h"
+
+namespace teaser {
+
+class Matcher {
+public:
+  typedef std::vector<Eigen::VectorXf> Feature;
+  typedef flann::Index<flann::L2<float>> KDTree;
+
+  // New methods
+  // Public methods:
+  // 1. calculateCorrespondences
+  //    input: source point cloud, target point cloud
+  //    output: correspondences
+  Matcher() = default;
+
+  /**
+   * Calculate correspondences based on given features and point clouds.
+   * @param source_points
+   * @param target_points
+   * @param use_absolute_scale
+   * @param use_crosscheck
+   * @param use_tuple_test
+   * @return
+   */
+  std::vector<std::pair<int, int>>
+  calculateCorrespondences(teaser::PointCloud& source_points, teaser::PointCloud& target_points,
+                           teaser::FPFHCloud& source_features, teaser::FPFHCloud& target_features,
+                           bool use_absolute_scale = true, bool use_crosscheck = true,
+                           bool use_tuple_test = true, float tuple_scale = 0);
+
+private:
+  template <typename T> void buildKDTree(const std::vector<T>& data, KDTree* tree);
+
+  template <typename T>
+  void searchKDTree(KDTree* tree, const T& input, std::vector<int>& indices,
+                    std::vector<float>& dists, int nn);
+
+  void advancedMatching(bool use_crosscheck, bool use_tuple_test, float tuple_scale);
+
+  void normalizePoints(bool use_absolute_scale);
+
+  std::vector<std::pair<int, int>> corres_;
+  std::vector<teaser::PointCloud> pointcloud_;
+  std::vector<Feature> features_;
+  std::vector<Eigen::Vector3f, Eigen::aligned_allocator<Eigen::Vector3f> > means_; // for normalization
+  float global_scale_;
+};
+
+} // namespace teaser

+ 52 - 0
src/detection/detection_localization_global_hdl/include/teaser/ply_io.h

@@ -0,0 +1,52 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#pragma once
+
+#include "teaser/geometry.h"
+
+namespace teaser {
+
+/**
+ * @brief A class for reading PLY files.
+ *
+ */
+class PLYReader {
+public:
+  /**
+   * @brief Default constructor
+   */
+  PLYReader() {}
+
+  /**
+   * @brief A wrapper function for reading ply files into PointCloud
+   */
+  int read(const std::string& file_name, PointCloud& cloud);
+};
+
+/**
+ * @brief A class for writing PLY files.
+ */
+class PLYWriter {
+public:
+  /**
+   * @brief Default constructor
+   */
+  PLYWriter() {}
+
+  /**
+   * @brief Write point cloud to ply files
+   * @param file_name
+   * @param cloud
+   * @param binary_mode Set to true to write in binary mode
+   * @return A status code
+   */
+  int write(const std::string& file_name, const PointCloud& cloud, bool binary_mode = false);
+};
+
+} // namespace teaser

+ 825 - 0
src/detection/detection_localization_global_hdl/include/teaser/registration.h

@@ -0,0 +1,825 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#pragma once
+
+#include <memory>
+#include <vector>
+#include <tuple>
+
+#include <Eigen/Core>
+#include <Eigen/SVD>
+#include <Eigen/Geometry>
+
+#include "teaser/graph.h"
+#include "teaser/geometry.h"
+
+// TODO: might be a good idea to template Eigen::Vector3f and Eigen::VectorXf such that later on we
+// can decide to use doulbe if we want. Double vs float might give nontrivial differences..
+
+namespace teaser {
+
+/**
+ * Struct to hold solution to a registration problem
+ */
+struct RegistrationSolution {
+  bool valid = true;
+  double scale;
+  Eigen::Vector3d translation;
+  Eigen::Matrix3d rotation;
+
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+};
+
+/**
+ * Abstract virtual class for decoupling specific scale estimation methods with interfaces.
+ */
+class AbstractScaleSolver {
+public:
+  virtual ~AbstractScaleSolver() {}
+
+  /**
+   * Pure virtual method for solving scale. Different implementations may have different assumptions
+   * about input data.
+   * @param src
+   * @param dst
+   * @return estimated scale (s)
+   */
+  virtual void solveForScale(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                             const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst, double* scale,
+                             Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) = 0;
+};
+
+/**
+ * Abstract virtual class for decoupling specific rotation estimation method implementations with
+ * interfaces.
+ */
+class AbstractRotationSolver {
+public:
+  virtual ~AbstractRotationSolver() {}
+
+  /**
+   * Pure virtual method for solving rotation. Different implementations may have different
+   * assumptions about input data.
+   * @param src
+   * @param dst
+   * @return estimated rotation matrix (R)
+   */
+  virtual void solveForRotation(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                                const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                                Eigen::Matrix3d* rotation,
+                                Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) = 0;
+};
+
+/**
+ * Abstract virtual class for decoupling specific translation estimation method implementations with
+ * interfaces.
+ */
+class AbstractTranslationSolver {
+public:
+  virtual ~AbstractTranslationSolver() {}
+
+  /**
+   * Pure virtual method for solving translation. Different implementations may have different
+   * assumptions about input data.
+   * @param src
+   * @param dst
+   * @return estimated translation vector
+   */
+  virtual void solveForTranslation(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                                   const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                                   Eigen::Vector3d* translation,
+                                   Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) = 0;
+};
+
+/**
+ * Performs scalar truncated least squares estimation
+ */
+class ScalarTLSEstimator {
+public:
+  ScalarTLSEstimator() = default;
+  /**
+   * Use truncated least squares method to estimate true x given measurements X
+   * TODO: call measurements Z or Y to avoid confusion with x
+   * TODO: specify which type/size is x and X in the comments
+   * @param X Available measurements
+   * @param ranges Maximum admissible errors for measurements X
+   * @param estimate (output) pointer to a double holding the estimate
+   * @param inliers (output) pointer to a Eigen row vector of inliers
+   */
+  void estimate(const Eigen::RowVectorXd& X, const Eigen::RowVectorXd& ranges, double* estimate,
+                Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers);
+
+  /**
+   * A slightly different implementation of TLS estimate. Use loop tiling to achieve potentially
+   * faster performance.
+   * @param X Available measurements
+   * @param ranges Maximum admissible errors for measurements X
+   * @param s scale for tiling
+   * @param estimate (output) pointer to a double holding the estimate
+   * @param inliers (output) pointer to a Eigen row vector of inliers
+   */
+  void estimate_tiled(const Eigen::RowVectorXd& X, const Eigen::RowVectorXd& ranges, const int& s,
+                      double* estimate, Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers);
+};
+
+/**
+ * Perform scale estimation using truncated least-squares (TLS)
+ */
+class TLSScaleSolver : public AbstractScaleSolver {
+public:
+  TLSScaleSolver() = delete;
+
+  explicit TLSScaleSolver(double noise_bound, double cbar2)
+      : noise_bound_(noise_bound), cbar2_(cbar2) {
+    assert(noise_bound > 0);
+    assert(cbar2 > 0);
+  };
+
+  /**
+   * Use TLS (adaptive voting) to solve for scale. Assume dst = s * R * src
+   * @param src
+   * @param dst
+   * @return a double indicating the estimated scale
+   */
+  void solveForScale(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                     const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst, double* scale,
+                     Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) override;
+
+private:
+  double noise_bound_;
+  double cbar2_; // maximal allowed residual^2 to noise bound^2 ratio
+  ScalarTLSEstimator tls_estimator_;
+};
+
+/**
+ * Perform outlier pruning / inlier selection based on scale. This class does not perform scale
+ * estimation. Rather, it estimates outliers based on the assumption that there is no scale
+ * difference between the two provided vector of points.
+ */
+class ScaleInliersSelector : public AbstractScaleSolver {
+public:
+  ScaleInliersSelector() = delete;
+
+  explicit ScaleInliersSelector(double noise_bound, double cbar2)
+      : noise_bound_(noise_bound), cbar2_(cbar2){};
+  /**
+   * Assume dst = src + noise. The scale output will always be set to 1.
+   * @param src [in] a vector of points
+   * @param dst [in] a vector of points
+   * @param scale [out] a constant of 1
+   * @param inliers [out] a row vector of booleans indicating whether a measurement is an inlier
+   */
+  void solveForScale(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                     const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst, double* scale,
+                     Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) override;
+
+private:
+  double noise_bound_;
+  double cbar2_; // maximal allowed residual^2 to noise bound^2 ratio
+};
+
+/**
+ * Perform translation estimation using truncated least-squares (TLS)
+ */
+class TLSTranslationSolver : public AbstractTranslationSolver {
+public:
+  TLSTranslationSolver() = delete;
+
+  explicit TLSTranslationSolver(double noise_bound, double cbar2)
+      : noise_bound_(noise_bound), cbar2_(cbar2){};
+
+  /**
+   * Estimate translation between src and dst points. Assume dst = src + t.
+   * @param src
+   * @param dst
+   * @param translation output parameter for the translation vector
+   * @param inliers output parameter for detected outliers
+   */
+  void solveForTranslation(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                           const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                           Eigen::Vector3d* translation,
+                           Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) override;
+
+private:
+  double noise_bound_;
+  double cbar2_; // maximal allowed residual^2 to noise bound^2 ratio
+  ScalarTLSEstimator tls_estimator_;
+};
+
+/**
+ * Base class for GNC-based rotation solvers
+ */
+class GNCRotationSolver : public AbstractRotationSolver {
+
+public:
+  struct Params {
+    size_t max_iterations;
+    double cost_threshold;
+    double gnc_factor;
+    double noise_bound;
+  };
+
+  GNCRotationSolver(Params params) : params_(params) {}
+
+  Params getParams() { return params_; }
+
+  void setParams(Params params) { params_ = params; }
+
+  /**
+   * Return the cost of the GNC solver at termination. Details of the cost function is dependent on
+   * the specific solver implementation.
+   *
+   * @return cost at termination of the GNC solver. Undefined if run before running the solver.
+   */
+  double getCostAtTermination() { return cost_; }
+
+protected:
+  Params params_;
+  double cost_;
+};
+
+/**
+ * Use GNC-TLS to solve rotation estimation problems.
+ *
+ * For more information, please refer to:
+ * H. Yang, P. Antonante, V. Tzoumas, and L. Carlone, “Graduated Non-Convexity for Robust Spatial
+ * Perception: From Non-Minimal Solvers to Global Outlier Rejection,” arXiv:1909.08605 [cs, math],
+ * Sep. 2019.
+ */
+class GNCTLSRotationSolver : public GNCRotationSolver {
+public:
+  GNCTLSRotationSolver() = delete;
+
+  /**
+   * Parametrized constructor
+   * @param params
+   */
+  explicit GNCTLSRotationSolver(Params params) : GNCRotationSolver(params){};
+
+  /**
+   * Estimate rotation between src & dst using GNC-TLS method
+   * @param src
+   * @param dst
+   * @param rotation
+   * @param inliers
+   */
+  void solveForRotation(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                        const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                        Eigen::Matrix3d* rotation,
+                        Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) override;
+};
+
+/**
+ * Use Fast Global Registration to solve for pairwise registration problems
+ *
+ * For more information, please see the original paper on FGR:
+ * Q.-Y. Zhou, J. Park, and V. Koltun, “Fast Global Registration,” in Computer Vision – ECCV 2016,
+ * Cham, 2016, vol. 9906, pp. 766–782.
+ * Notice that our implementation differ from the paper on the estimation of T matrix. We
+ * only estimate rotation, instead of rotation and translation.
+ *
+ */
+class FastGlobalRegistrationSolver : public GNCRotationSolver {
+public:
+  /**
+   * Remove default constructor
+   */
+  FastGlobalRegistrationSolver() = delete;
+
+  /**
+   * Parametrized constructor
+   * @param params
+   * @param rotation_only
+   */
+  explicit FastGlobalRegistrationSolver(Params params) : GNCRotationSolver(params){};
+
+  /**
+   * Solve a pairwise registration problem given two sets of points.
+   * Notice that we assume no scale difference between v1 & v2.
+   * @param src
+   * @param dst
+   * @return a RegistrationSolution struct.
+   */
+  void solveForRotation(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                        const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                        Eigen::Matrix3d* rotation,
+                        Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) override;
+};
+
+/**
+ * Solve registration problems robustly.
+ *
+ * For more information, please refer to:
+ * H. Yang, J. Shi, and L. Carlone, “TEASER: Fast and Certifiable Point Cloud Registration,”
+ * arXiv:2001.07715 [cs, math], Jan. 2020.
+ */
+class RobustRegistrationSolver {
+public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+
+  /**
+   * An enum class representing the available GNC rotation estimation algorithms.
+   *
+   * GNC_TLS: see H. Yang, P. Antonante, V. Tzoumas, and L. Carlone, “Graduated Non-Convexity for
+   * Robust Spatial Perception: From Non-Minimal Solvers to Global Outlier Rejection,”
+   * arXiv:1909.08605 [cs, math], Sep. 2019.
+   *
+   * FGR: see Q.-Y. Zhou, J. Park, and V. Koltun, “Fast Global Registration,” in Computer Vision –
+   * ECCV 2016, Cham, 2016, vol. 9906, pp. 766–782. and H. Yang, P. Antonante, V. Tzoumas, and L.
+   * Carlone, “Graduated Non-Convexity for Robust Spatial Perception: From Non-Minimal Solvers to
+   * Global Outlier Rejection,” arXiv:1909.08605 [cs, math], Sep. 2019.
+   */
+  enum class ROTATION_ESTIMATION_ALGORITHM {
+    GNC_TLS = 0,
+    FGR = 1,
+  };
+
+  /**
+   * Enum representing the type of graph-based inlier selection algorithm to use
+   *
+   * PMC_EXACT: Use PMC to find exact clique from the inlier graph
+   * PMC_HEU: Use PMC's heuristic finder to find approximate max clique
+   * KCORE_HEU: Use k-core heuristic to select inliers
+   * NONE: No inlier selection
+   */
+  enum class INLIER_SELECTION_MODE {
+    PMC_EXACT = 0,
+    PMC_HEU = 1,
+    KCORE_HEU = 2,
+    NONE = 3,
+  };
+
+  /**
+   * Enum representing the formulation of the TIM graph after maximum clique filtering.
+   *
+   * CHAIN: formulate TIMs by only calculating the TIMs for consecutive measurements
+   * COMPLETE: formulate a fully connected TIM graph
+   */
+  enum class INLIER_GRAPH_FORMULATION {
+    CHAIN = 0,
+    COMPLETE = 1,
+  };
+
+  /**
+   * A struct representing params for initializing the RobustRegistrationSolver
+   *
+   * Note: the default values needed to be changed accordingly for best performance.
+   */
+  struct Params {
+
+    /**
+     * A bound on the noise of each provided measurement.
+     */
+    double noise_bound = 0.01;
+
+    /**
+     * Square of the ratio between acceptable noise and noise bound. Usually set to 1.
+     */
+    double cbar2 = 1;
+
+    /**
+     * Whether the scale is known. If set to False, the solver assumes no scale differences
+     * between the src and dst points. If set to True, the solver will first solve for scale.
+     *
+     * When the solver does not estimate scale, it solves the registration problem much faster.
+     */
+    bool estimate_scaling = true;
+
+    /**
+     * Which algorithm to use to estimate rotations.
+     */
+    ROTATION_ESTIMATION_ALGORITHM rotation_estimation_algorithm =
+        ROTATION_ESTIMATION_ALGORITHM::GNC_TLS;
+
+    /**
+     * Factor to multiple/divide the control parameter in the GNC algorithm.
+     *
+     * For FGR: the algorithm divides the control parameter by the factor every iteration.
+     * For GNC-TLS: the algorithm multiples the control parameter by the factor every iteration.
+     */
+    double rotation_gnc_factor = 1.4;
+
+    /**
+     * Maximum iterations allowed for the GNC rotation estimators.
+     */
+    size_t rotation_max_iterations = 100;
+
+    /**
+     * Cost threshold for the GNC rotation estimators.
+     *
+     * For FGR / GNC-TLS algorithm, the cost thresholds represent different meanings.
+     * For FGR: the cost threshold compares with the computed cost at each iteration
+     * For GNC-TLS: the cost threshold compares with the difference between costs of consecutive
+     * iterations.
+     */
+    double rotation_cost_threshold = 1e-6;
+
+    /**
+     * Type of TIM graph given to GNC rotation solver
+     */
+    INLIER_GRAPH_FORMULATION rotation_tim_graph = INLIER_GRAPH_FORMULATION::CHAIN;
+
+    /**
+     * \brief Type of the inlier selection
+     */
+    INLIER_SELECTION_MODE inlier_selection_mode = INLIER_SELECTION_MODE::PMC_EXACT;
+
+    /**
+     * \brief The threshold ratio for determining whether to skip max clique and go straightly to
+     * GNC rotation estimation. Set this to 1 to always use exact max clique selection, 0 to always
+     * skip exact max clique selection.
+     *
+     * \attention Note that the use_max_clique option takes precedence. In other words, if
+     * use_max_clique is set to false, then kcore_heuristic_threshold will be ignored. If
+     * use_max_clique is set to true, then the following will happen: if the max core number of the
+     * inlier graph is lower than the kcore_heuristic_threshold as a percentage of the total nodes
+     * in the inlier graph, then the code will preceed to call the max clique finder. Otherwise, the
+     * graph will be directly fed to the GNC rotation solver.
+     *
+     */
+    double kcore_heuristic_threshold = 0.5;
+
+    /**
+     * \deprecated Use inlier_selection_mode instead
+     * Set this to true to enable max clique inlier selection, false to skip it.
+     */
+    bool use_max_clique = true;
+
+    /**
+     * \deprecated Use inlier_selection_mode instead
+     * Set this to false to enable heuristic only max clique finding.
+     */
+    bool max_clique_exact_solution = true;
+
+    /**
+     * Time limit on running the max clique algorithm (in seconds).
+     */
+    double max_clique_time_limit = 3600;
+  };
+
+  RobustRegistrationSolver() = default;
+
+  /**
+   * A constructor that takes in parameters and initialize the estimators accordingly.
+   *
+   * This is the preferred way of initializing the different estimators, instead of setting
+   * each estimator one by one.
+   * @param params
+   */
+  RobustRegistrationSolver(const Params& params);
+
+  /**
+   * Given a 3-by-N matrix representing points, return Translation Invariant Measurements (TIMs)
+   * @param v a 3-by-N matrix
+   * @return a 3-by-(N-1)*N matrix representing TIMs
+   */
+  Eigen::Matrix<double, 3, Eigen::Dynamic>
+  computeTIMs(const Eigen::Matrix<double, 3, Eigen::Dynamic>& v,
+              Eigen::Matrix<int, 2, Eigen::Dynamic>* map);
+
+  /**
+   * Solve for scale, translation and rotation.
+   *
+   * @param src_cloud source point cloud (to be transformed)
+   * @param dst_cloud target point cloud (after transformation)
+   * @param correspondences A vector of tuples representing the correspondences between pairs of
+   * points in the two clouds
+   */
+  RegistrationSolution solve(const teaser::PointCloud& src_cloud,
+                             const teaser::PointCloud& dst_cloud,
+                             const std::vector<std::pair<int, int>> correspondences);
+
+  /**
+   * Solve for scale, translation and rotation. Assumes v2 is v1 after transformation.
+   * @param v1
+   * @param v2
+   */
+  RegistrationSolution solve(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                             const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst);
+
+  /**
+   * Solve for scale. Assume v2 = s * R * v1, this function estimates s.
+   * @param v1
+   * @param v2
+   */
+  double solveForScale(const Eigen::Matrix<double, 3, Eigen::Dynamic>& v1,
+                       const Eigen::Matrix<double, 3, Eigen::Dynamic>& v2);
+
+  /**
+   * Solve for translation.
+   * @param v1
+   * @param v2
+   */
+  Eigen::Vector3d solveForTranslation(const Eigen::Matrix<double, 3, Eigen::Dynamic>& v1,
+                                      const Eigen::Matrix<double, 3, Eigen::Dynamic>& v2);
+
+  /**
+   * Solve for rotation. Assume v2 = R * v1, this function estimates find R.
+   * @param v1
+   * @param v2
+   */
+  Eigen::Matrix3d solveForRotation(const Eigen::Matrix<double, 3, Eigen::Dynamic>& v1,
+                                   const Eigen::Matrix<double, 3, Eigen::Dynamic>& v2);
+
+  /**
+   * Return the cost at termination of the GNC rotation solver. Can be used to
+   * assess the quality of the solution.
+   *
+   * @return cost at termination of the GNC solver. Undefined if run before running the solver.
+   */
+  inline double getGNCRotationCostAtTermination() {
+    return rotation_solver_->getCostAtTermination();
+  }
+
+  /**
+   * Return the solution to the registration problem.
+   * @return
+   */
+  inline RegistrationSolution getSolution() { return solution_; };
+
+  /**
+   * Set the scale estimator used
+   * @param estimator
+   */
+  inline void setScaleEstimator(std::unique_ptr<AbstractScaleSolver> estimator) {
+    scale_solver_ = std::move(estimator);
+  }
+
+  /**
+   * Set the rotation estimator used.
+   *
+   * Note: due to the fact that rotation estimator takes in a noise bound that is dependent on the
+   * estimated scale, make sure to set the correct params before calling solve.
+   * @param estimator
+   */
+  inline void setRotationEstimator(std::unique_ptr<GNCRotationSolver> estimator) {
+    rotation_solver_ = std::move(estimator);
+  }
+
+  /**
+   * Set the translation estimator used.
+   * @param estimator
+   */
+  inline void setTranslationEstimator(std::unique_ptr<AbstractTranslationSolver> estimator) {
+    translation_solver_ = std::move(estimator);
+  }
+
+  /**
+   * Return a boolean Eigen row vector indicating whether specific measurements are inliers
+   * according to scales.
+   *
+   * @return a 1-by-(number of TIMs) boolean Eigen matrix
+   */
+  inline Eigen::Matrix<bool, 1, Eigen::Dynamic> getScaleInliersMask() {
+    return scale_inliers_mask_;
+  }
+
+  /**
+   * Return the index map for scale inliers (equivalent to the index map for TIMs).
+   *
+   * @return a 2-by-(number of TIMs) Eigen matrix. Entries in one column represent the indices of
+   * the two measurements used to calculate the corresponding TIM.
+   */
+  inline Eigen::Matrix<int, 2, Eigen::Dynamic> getScaleInliersMap() { return src_tims_map_; }
+
+  /**
+   * Return inlier TIMs from scale estimation
+   *
+   * @return a vector of tuples. Entries in each tuple represent the indices of
+   * the two measurements used to calculate the corresponding TIM: measurement at indice 0 minus
+   * measurement at indice 1.
+   */
+  inline std::vector<std::tuple<int, int>> getScaleInliers() {
+    std::vector<std::tuple<int, int>> result;
+    for (size_t i = 0; i < scale_inliers_mask_.cols(); ++i) {
+      if (scale_inliers_mask_(i)) {
+        result.emplace_back(src_tims_map_(0, i), src_tims_map_(1, i));
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Return a boolean Eigen row vector indicating whether specific measurements are inliers
+   * according to the rotation solver.
+   *
+   * @return a 1-by-(size of TIMs used in rotation estimation) boolean Eigen matrix. It is
+   * equivalent to a binary mask on the TIMs used in rotation estimation, with true representing
+   * that the measurement is an inlier after rotation estimation.
+   */
+  inline Eigen::Matrix<bool, 1, Eigen::Dynamic> getRotationInliersMask() {
+    return rotation_inliers_mask_;
+  }
+
+  /**
+   * Return the index map for translation inliers (equivalent to max clique).
+   * /TODO: This is obsolete now. Remove or update
+   *
+   * @return a 1-by-(size of max clique) Eigen matrix. Entries represent the indices of the original
+   * measurements.
+   */
+  inline Eigen::Matrix<int, 1, Eigen::Dynamic> getRotationInliersMap() {
+    Eigen::Matrix<int, 1, Eigen::Dynamic> map = Eigen::Map<Eigen::Matrix<int, 1, Eigen::Dynamic>>(
+        max_clique_.data(), 1, max_clique_.size());
+    return map;
+  }
+
+  /**
+   * Return inliers from rotation estimation
+   *
+   * @return a vector of indices of TIMs passed to rotation estimator deemed as inliers by rotation
+   * estimation. Note that depending on the rotation_tim_graph parameter, number of TIMs passed to
+   * rotation estimation will be different.
+   */
+  inline std::vector<int> getRotationInliers() { return rotation_inliers_; }
+
+  /**
+   * Return a boolean Eigen row vector indicating whether specific measurements are inliers
+   * according to translation measurements.
+   *
+   * @return a 1-by-(size of max clique) boolean Eigen matrix. It is equivalent to a binary mask on
+   * the inlier max clique, with true representing that the measurement is an inlier after
+   * translation estimation.
+   */
+  inline Eigen::Matrix<bool, 1, Eigen::Dynamic> getTranslationInliersMask() {
+    return translation_inliers_mask_;
+  }
+
+  /**
+   * Return the index map for translation inliers (equivalent to max clique).
+   *
+   * @return a 1-by-(size of max clique) Eigen matrix. Entries represent the indices of the original
+   * measurements.
+   */
+  inline Eigen::Matrix<int, 1, Eigen::Dynamic> getTranslationInliersMap() {
+    Eigen::Matrix<int, 1, Eigen::Dynamic> map = Eigen::Map<Eigen::Matrix<int, 1, Eigen::Dynamic>>(
+        max_clique_.data(), 1, max_clique_.size());
+    return map;
+  }
+
+  /**
+   * Return inliers from rotation estimation
+   *
+   * @return a vector of indices of measurements deemed as inliers by rotation estimation
+   */
+  inline std::vector<int> getTranslationInliers() { return translation_inliers_; }
+
+  /**
+   * Return a boolean Eigen row vector indicating whether specific measurements are inliers
+   * according to translation measurements.
+   * @return
+   */
+  inline std::vector<int> getInlierMaxClique() { return max_clique_; }
+
+  inline std::vector<std::vector<int>> getInlierGraph() { return inlier_graph_.getAdjList(); }
+
+  /**
+   * Get TIMs built from source point cloud.
+   * @return
+   */
+  inline Eigen::Matrix<double, 3, Eigen::Dynamic> getSrcTIMs() { return src_tims_; }
+
+  /**
+   * Get TIMs built from target point cloud.
+   * @return
+   */
+  inline Eigen::Matrix<double, 3, Eigen::Dynamic> getDstTIMs() { return dst_tims_; }
+
+  /**
+   * Get src TIMs built after max clique pruning.
+   * @return
+   */
+  inline Eigen::Matrix<double, 3, Eigen::Dynamic> getMaxCliqueSrcTIMs() { return pruned_src_tims_; }
+
+  /**
+   * Get dst TIMs built after max clique pruning.
+   * @return
+   */
+  inline Eigen::Matrix<double, 3, Eigen::Dynamic> getMaxCliqueDstTIMs() { return pruned_dst_tims_; }
+
+  /**
+   * Get the index map of the TIMs built from source point cloud.
+   * @return
+   */
+  inline Eigen::Matrix<int, 2, Eigen::Dynamic> getSrcTIMsMap() { return src_tims_map_; }
+
+  /**
+   * Get the index map of the TIMs built from target point cloud.
+   * @return
+   */
+  inline Eigen::Matrix<int, 2, Eigen::Dynamic> getDstTIMsMap() { return dst_tims_map_; }
+
+  /**
+   * Get the index map of the TIMs used in rotation estimation.
+   * @return
+   */
+  inline Eigen::Matrix<int, 2, Eigen::Dynamic> getSrcTIMsMapForRotation() {
+    return src_tims_map_rotation_;
+  }
+
+  /**
+   * Get the index map of the TIMs used in rotation estimation.
+   * @return
+   */
+  inline Eigen::Matrix<int, 2, Eigen::Dynamic> getDstTIMsMapForRotation() {
+    return dst_tims_map_rotation_;
+  }
+
+  /**
+   * Reset the solver using the provided params
+   * @param params a Params struct
+   */
+  void reset(const Params& params) {
+    params_ = params;
+
+    // Initialize the scale estimator
+    if (params_.estimate_scaling) {
+      setScaleEstimator(
+          std::make_unique<teaser::TLSScaleSolver>(params_.noise_bound, params_.cbar2));
+    } else {
+      setScaleEstimator(
+          std::make_unique<teaser::ScaleInliersSelector>(params_.noise_bound, params_.cbar2));
+    }
+
+    // Initialize the rotation estimator
+    teaser::GNCRotationSolver::Params rotation_params{
+        params_.rotation_max_iterations, params_.rotation_cost_threshold,
+        params_.rotation_gnc_factor, params_.noise_bound};
+    switch (params_.rotation_estimation_algorithm) {
+    case ROTATION_ESTIMATION_ALGORITHM::GNC_TLS: { // GNC-TLS method
+      setRotationEstimator(std::make_unique<teaser::GNCTLSRotationSolver>(rotation_params));
+      break;
+    }
+    case ROTATION_ESTIMATION_ALGORITHM::FGR: { // FGR method
+      setRotationEstimator(std::make_unique<teaser::FastGlobalRegistrationSolver>(rotation_params));
+      break;
+    }
+    }
+
+    // Initialize the translation estimator
+    setTranslationEstimator(
+        std::make_unique<teaser::TLSTranslationSolver>(params_.noise_bound, params_.cbar2));
+
+    // Clear member variables
+    max_clique_.clear();
+    rotation_inliers_.clear();
+    translation_inliers_.clear();
+    inlier_graph_.clear();
+  }
+
+  /**
+   * Return the params
+   * @return a Params struct
+   */
+  Params getParams() { return params_; }
+
+private:
+  Params params_;
+  RegistrationSolution solution_;
+
+  // Inlier Binary Vectors
+  Eigen::Matrix<bool, 1, Eigen::Dynamic> scale_inliers_mask_;
+  Eigen::Matrix<bool, 1, Eigen::Dynamic> rotation_inliers_mask_;
+  Eigen::Matrix<bool, 1, Eigen::Dynamic> translation_inliers_mask_;
+
+  // TIMs
+  // TIMs used for scale estimation/pruning
+  Eigen::Matrix<double, 3, Eigen::Dynamic> src_tims_;
+  Eigen::Matrix<double, 3, Eigen::Dynamic> dst_tims_;
+  // TIMs used for rotation estimation
+  Eigen::Matrix<double, 3, Eigen::Dynamic> pruned_src_tims_;
+  Eigen::Matrix<double, 3, Eigen::Dynamic> pruned_dst_tims_;
+
+  // TIM maps
+  // for scale estimation
+  Eigen::Matrix<int, 2, Eigen::Dynamic> src_tims_map_;
+  Eigen::Matrix<int, 2, Eigen::Dynamic> dst_tims_map_;
+  // for rotation estimation
+  Eigen::Matrix<int, 2, Eigen::Dynamic> src_tims_map_rotation_;
+  Eigen::Matrix<int, 2, Eigen::Dynamic> dst_tims_map_rotation_;
+
+  // Max clique vector
+  std::vector<int> max_clique_;
+
+  // Inliers after rotation estimation
+  std::vector<int> rotation_inliers_;
+
+  // Inliers after translation estimation (final inliers)
+  std::vector<int> translation_inliers_;
+
+  // Inlier graph
+  teaser::Graph inlier_graph_;
+
+  // Ptrs to Solvers
+  std::unique_ptr<AbstractScaleSolver> scale_solver_;
+  std::unique_ptr<GNCRotationSolver> rotation_solver_;
+  std::unique_ptr<AbstractTranslationSolver> translation_solver_;
+};
+
+} // namespace teaser

+ 174 - 0
src/detection/detection_localization_global_hdl/include/teaser/utils.h

@@ -0,0 +1,174 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#pragma once
+
+#include <unordered_set>
+#include <vector>
+
+#include <Eigen/Core>
+#include <Eigen/SVD>
+
+namespace teaser {
+namespace utils {
+
+/**
+ * A templated random sample function (w/o replacement). Based on MATLAB implementation of
+ * randsample()
+ * @tparam T A number type
+ * @tparam URBG A UniformRandomBitGenerator type
+ * @param input A input vector containing the whole population
+ * @param num_samples Number of samples we want
+ * @param g
+ * @return
+ */
+template <class T, class URBG>
+std::vector<T> randomSample(std::vector<T> input, size_t num_samples, URBG&& g) {
+
+  std::vector<T> output;
+  output.reserve(num_samples);
+  if (4 * num_samples > input.size()) {
+    // if the sample is a sizeable fraction of the whole population,
+    // just randomly shuffle the entire population and return the
+    // first num_samples
+    std::shuffle(input.begin(), input.end(), g);
+    for (size_t i = 0; i < num_samples; ++i) {
+      output.push_back(input[i]);
+    }
+  } else {
+    // if the sample is small, repeatedly sample with replacement until num_samples
+    // unique values
+    std::unordered_set<size_t> sample_indices;
+    std::uniform_int_distribution<> dis(0, input.size());
+    while (sample_indices.size() < num_samples) {
+      sample_indices.insert(dis(std::forward<URBG>(g)));
+    }
+    for (auto&& i : sample_indices) {
+      output.push_back(input[i]);
+    }
+  }
+  return output;
+}
+
+/**
+ * Remove one row from matrix.
+ * Credit to: https://stackoverflow.com/questions/13290395
+ * @param matrix an Eigen::Matrix.
+ * @param rowToRemove index of row to remove. If >= matrix.rows(), no operation will be taken
+ */
+template <class T, int R, int C>
+void removeRow(Eigen::Matrix<T, R, C>& matrix, unsigned int rowToRemove) {
+  if (rowToRemove >= matrix.rows()) {
+    return;
+  }
+  unsigned int numRows = matrix.rows() - 1;
+  unsigned int numCols = matrix.cols();
+
+  if (rowToRemove < numRows) {
+    matrix.block(rowToRemove, 0, numRows - rowToRemove, numCols) =
+        matrix.bottomRows(numRows - rowToRemove);
+  }
+
+  matrix.conservativeResize(numRows, numCols);
+}
+
+/**
+ * Remove one column from matrix.
+ * Credit to: https://stackoverflow.com/questions/13290395
+ * @param matrix
+ * @param colToRemove index of col to remove. If >= matrix.cols(), no operation will be taken
+ */
+template <class T, int R, int C>
+void removeColumn(Eigen::Matrix<T, R, C>& matrix, unsigned int colToRemove) {
+  if (colToRemove >= matrix.cols()) {
+    return;
+  }
+  unsigned int numRows = matrix.rows();
+  unsigned int numCols = matrix.cols() - 1;
+
+  if (colToRemove < numCols) {
+    matrix.block(0, colToRemove, numRows, numCols - colToRemove) =
+        matrix.rightCols(numCols - colToRemove);
+  }
+
+  matrix.conservativeResize(numRows, numCols);
+}
+
+/**
+ * Helper function to calculate the diameter of a row vector of points
+ * @param X
+ * @return the diameter of the set of points given
+ */
+template <class T, int D> float calculateDiameter(const Eigen::Matrix<T, D, Eigen::Dynamic>& X) {
+  Eigen::Matrix<T, D, 1> cog = X.rowwise().sum() / X.cols();
+  Eigen::Matrix<T, D, Eigen::Dynamic> P = X.colwise() - cog;
+  Eigen::Matrix<T, 1, Eigen::Dynamic> temp = P.array().square().colwise().sum();
+  return 2 * std::sqrt(temp.maxCoeff());
+}
+
+/**
+ * Helper function to use svd to estimate rotation.
+ * Method described here: http://igl.ethz.ch/projects/ARAP/svd_rot.pdf
+ * @param X
+ * @param Y
+ * @return a rotation matrix R
+ */
+inline Eigen::Matrix3d svdRot(const Eigen::Matrix<double, 3, Eigen::Dynamic>& X,
+                              const Eigen::Matrix<double, 3, Eigen::Dynamic>& Y,
+                              const Eigen::Matrix<double, 1, Eigen::Dynamic>& W) {
+  // Assemble the correlation matrix H = X * Y'
+  Eigen::Matrix3d H = X * W.asDiagonal() * Y.transpose();
+
+  Eigen::JacobiSVD<Eigen::Matrix3d> svd(H, Eigen::ComputeFullU | Eigen::ComputeFullV);
+  Eigen::Matrix3d U = svd.matrixU();
+  Eigen::Matrix3d V = svd.matrixV();
+
+  if (U.determinant() * V.determinant() < 0) {
+    V.col(2) *= -1;
+  }
+
+  return V * U.transpose();
+}
+
+/**
+ * Use an boolean Eigen matrix to mask a vector
+ * @param mask a 1-by-N boolean Eigen matrix
+ * @param elements vector to be masked
+ * @return
+ */
+template <class T>
+inline std::vector<T> maskVector(Eigen::Matrix<bool, 1, Eigen::Dynamic> mask,
+                                 const std::vector<T>& elements) {
+  assert(mask.cols() == elements.size());
+  std::vector<T> result;
+  for (size_t i = 0; i < mask.cols(); ++i) {
+    if (mask(i)) {
+      result.emplace_back(elements[i]);
+    }
+  }
+  return result;
+}
+
+/**
+ * Get indices of non-zero elements in an Eigen row vector
+ * @param mask a 1-by-N boolean Eigen matrix
+ * @return A vector containing indices of the true elements in the row vector
+ */
+template <class T>
+inline std::vector<int> findNonzero(Eigen::Matrix<T, 1, Eigen::Dynamic> mask) {
+  std::vector<int> result;
+  for (size_t i = 0; i < mask.cols(); ++i) {
+    if (mask(i)) {
+      result.emplace_back(i);
+    }
+  }
+  return result;
+}
+
+} // namespace utils
+} // namespace teaser

+ 902 - 0
src/detection/detection_localization_global_hdl/include/tinyply.h

@@ -0,0 +1,902 @@
+/*
+ * tinyply 2.2 (https://github.com/ddiakopoulos/tinyply)
+ *
+ * A single-header, zero-dependency (except the C++ STL) public domain implementation 
+ * of the PLY mesh file format. Requires C++11; errors are handled through exceptions.
+ *
+ * This software is in the public domain. Where that dedication is not
+ * recognized, you are granted a perpetual, irrevocable license to copy,
+ * distribute, and modify this file as you see fit.
+ *
+ * Authored by Dimitri Diakopoulos (http://www.dimitridiakopoulos.com)
+ *
+ * tinyply.h may be included in many files, however in a single compiled file,
+ * the implementation must be created with the following defined
+ * before including the header.
+ * #define TINYPLY_IMPLEMENTATION
+ */
+
+////////////////////////
+//   tinyply header   //
+////////////////////////
+
+#ifndef tinyply_h
+#define tinyply_h
+
+#include <vector>
+#include <string>
+#include <stdint.h>
+#include <sstream>
+#include <memory>
+#include <unordered_map>
+#include <map>
+
+namespace tinyply
+{
+
+    enum class Type : uint8_t
+    {
+        INVALID,
+        INT8,
+        UINT8,
+        INT16,
+        UINT16,
+        INT32,
+        UINT32,
+        FLOAT32,
+        FLOAT64
+    };
+
+    struct PropertyInfo
+    {
+        PropertyInfo() {};
+        PropertyInfo(int stride, std::string str)
+            : stride(stride), str(str) {}
+        int stride {0};
+        std::string str;
+    };
+
+    static std::map<Type, PropertyInfo> PropertyTable
+    {
+        { Type::INT8,    PropertyInfo(1, std::string("char")) },
+        { Type::UINT8,   PropertyInfo(1, std::string("uchar")) },
+        { Type::INT16,   PropertyInfo(2, std::string("short")) },
+        { Type::UINT16,  PropertyInfo(2, std::string("ushort")) },
+        { Type::INT32,   PropertyInfo(4, std::string("int")) },
+        { Type::UINT32,  PropertyInfo(4, std::string("uint")) },
+        { Type::FLOAT32, PropertyInfo(4, std::string("float")) },
+        { Type::FLOAT64, PropertyInfo(8, std::string("double")) },
+        { Type::INVALID, PropertyInfo(0, std::string("INVALID"))}
+    };
+
+    class Buffer
+    {
+        uint8_t * alias{ nullptr };
+        struct delete_array { void operator()(uint8_t * p) { delete[] p; } };
+        std::unique_ptr<uint8_t, decltype(Buffer::delete_array())> data;
+        size_t size {0};
+    public:
+        Buffer() {};
+        Buffer(const size_t size) : data(new uint8_t[size], delete_array()), size(size) { alias = data.get(); } // allocating
+        Buffer(uint8_t * ptr): alias(ptr) { } // non-allocating, todo: set size?
+        uint8_t * get() { return alias; }
+        size_t size_bytes() const { return size; }
+    };
+
+    struct PlyData
+    {
+        Type t;
+        Buffer buffer;
+        size_t count {0};
+        bool isList {false};
+    };
+
+    struct PlyProperty
+    {
+        PlyProperty(std::istream & is);
+        PlyProperty(Type type, std::string & _name) : name(_name), propertyType(type) {}
+        PlyProperty(Type list_type, Type prop_type, std::string & _name, size_t list_count) 
+            : name(_name), propertyType(prop_type), isList(true), listType(list_type), listCount(list_count) {}
+        std::string name;
+        Type propertyType;
+        bool isList{ false };
+        Type listType{ Type::INVALID };
+        size_t listCount{ 0 };
+    };
+
+    struct PlyElement
+    {
+        PlyElement(std::istream & istream);
+        PlyElement(const std::string & _name, size_t count) : name(_name), size(count) {}
+        std::string name;
+        size_t size {0};
+        std::vector<PlyProperty> properties;
+    };
+
+    struct PlyFile
+    {
+        struct PlyFileImpl;
+        std::unique_ptr<PlyFileImpl> impl;
+
+        PlyFile();
+        ~PlyFile();
+
+        /*
+         * The ply format requires an ascii header. This can be used to determine at 
+         * runtime which properties or elements exist in the file. Limited validation of the 
+         * header is performed; it is assumed the header correctly reflects the contents of the 
+         * payload. This function may throw. Returns true on success, false on failure. 
+         */ 
+        bool parse_header(std::istream & is);
+
+        /* 
+         * Execute a read operation. Data must be requested via `request_properties_from_element(...)` 
+         * prior to calling this function.
+         */
+        void read(std::istream & is);
+
+        /* 
+         * `write` performs no validation and assumes that the data passed into 
+         * `add_properties_to_element` is well-formed. 
+         */
+        void write(std::ostream & os, bool isBinary);
+
+        /* 
+         * These functions are valid after a call to `parse_header(...)`. In the case of
+         * writing, get_comments() may also be used to add new comments to the ply header.
+         */
+        std::vector<PlyElement> get_elements() const;
+        std::vector<std::string> get_info() const;
+        std::vector<std::string> & get_comments();
+
+        /*
+         * In the general case where |list_size_hint| is zero, `read` performs a two-pass
+         * parse to support variable length lists. The most general use of the
+         * ply format is storing triangle meshes. When this fact is known a-priori, we can pass
+         * an expected list length that will apply to this element. Doing so results in an up-front
+         * memory allocation and a single-pass import, a 2x performance optimization.
+         */
+        std::shared_ptr<PlyData> request_properties_from_element(const std::string & elementKey, 
+            const std::initializer_list<std::string> propertyKeys, const uint32_t list_size_hint = 0);
+
+        void add_properties_to_element(const std::string & elementKey, 
+            const std::initializer_list<std::string> propertyKeys, 
+            const Type type, 
+            const size_t count, 
+            uint8_t * data, 
+            const Type listType, 
+            const size_t listCount);
+    };
+
+} // end namespace tinyply
+
+#endif // end tinyply_h
+
+////////////////////////////////
+//   tinyply implementation   //
+////////////////////////////////
+
+#ifdef TINYPLY_IMPLEMENTATION
+
+#include <algorithm>
+#include <functional>
+#include <type_traits>
+#include <iostream>
+#include <cstring>
+
+using namespace tinyply;
+using namespace std;
+
+template<typename T, typename T2> inline T2 endian_swap(const T & v) { return v; }
+template<> inline uint16_t endian_swap<uint16_t, uint16_t>(const uint16_t & v) { return (v << 8) | (v >> 8); }
+template<> inline uint32_t endian_swap<uint32_t, uint32_t>(const uint32_t & v) { return (v << 24) | ((v << 8) & 0x00ff0000) | ((v >> 8) & 0x0000ff00) | (v >> 24); }
+template<> inline uint64_t endian_swap<uint64_t, uint64_t>(const uint64_t & v)
+{
+    return (((v & 0x00000000000000ffLL) << 56) |
+            ((v & 0x000000000000ff00LL) << 40) |
+            ((v & 0x0000000000ff0000LL) << 24) |
+            ((v & 0x00000000ff000000LL) << 8)  |
+            ((v & 0x000000ff00000000LL) >> 8)  |
+            ((v & 0x0000ff0000000000LL) >> 24) |
+            ((v & 0x00ff000000000000LL) >> 40) |
+            ((v & 0xff00000000000000LL) >> 56));
+}
+template<> inline int16_t endian_swap<int16_t, int16_t>(const int16_t & v) { uint16_t r = endian_swap<uint16_t, uint16_t>(*(uint16_t*)&v); return *(int16_t*)&r; }
+template<> inline int32_t endian_swap<int32_t, int32_t>(const int32_t & v) { uint32_t r = endian_swap<uint32_t, uint32_t>(*(uint32_t*)&v); return *(int32_t*)&r; }
+template<> inline int64_t endian_swap<int64_t, int64_t>(const int64_t & v) { uint64_t r = endian_swap<uint64_t, uint64_t>(*(uint64_t*)&v); return *(int64_t*)&r; }
+template<> inline float endian_swap<uint32_t, float>(const uint32_t & v) { union { float f; uint32_t i; }; i = endian_swap<uint32_t, uint32_t>(v); return f; }
+template<> inline double endian_swap<uint64_t, double>(const uint64_t & v) { union { double d; uint64_t i; }; i = endian_swap<uint64_t, uint64_t>(v); return d; }
+
+inline uint32_t hash_fnv1a(const std::string & str)
+{
+    static const uint32_t fnv1aBase32 = 0x811C9DC5u;
+    static const uint32_t fnv1aPrime32 = 0x01000193u;
+    uint32_t result = fnv1aBase32;
+    for (auto & c : str) { result ^= static_cast<uint32_t>(c); result *= fnv1aPrime32; }
+    return result;
+}
+
+inline Type property_type_from_string(const std::string & t)
+{
+    if (t == "int8" || t == "char")           return Type::INT8;
+    else if (t == "uint8" || t == "uchar")    return Type::UINT8;
+    else if (t == "int16" || t == "short")    return Type::INT16;
+    else if (t == "uint16" || t == "ushort")  return Type::UINT16;
+    else if (t == "int32" || t == "int")      return Type::INT32;
+    else if (t == "uint32" || t == "uint")    return Type::UINT32;
+    else if (t == "float32" || t == "float")  return Type::FLOAT32;
+    else if (t == "float64" || t == "double") return Type::FLOAT64;
+    return Type::INVALID;
+}
+
+typedef std::function<void(void * dest, const char * src, bool be)> cast_t;
+
+struct PlyFile::PlyFileImpl
+{
+    struct PlyDataCursor
+    {
+        size_t byteOffset{ 0 };
+        size_t totalSizeBytes{ 0 };
+    };
+
+    struct ParsingHelper
+    {
+        std::shared_ptr<PlyData> data;
+        std::shared_ptr<PlyDataCursor> cursor;
+        uint32_t list_size_hint;
+    };
+
+    struct PropertyLookup
+    {
+        ParsingHelper * helper{ nullptr };
+        bool skip{ false };
+        size_t prop_stride{ 0 }; // precomputed
+        size_t list_stride{ 0 }; // precomputed
+    };
+
+    std::unordered_map<uint32_t, ParsingHelper> userData;
+
+    bool isBinary = false;
+    bool isBigEndian = false;
+    std::vector<PlyElement> elements;
+    std::vector<std::string> comments;
+    std::vector<std::string> objInfo;
+    uint8_t scratch[64]; // large enough for max list size
+
+    void read(std::istream & is);
+    void write(std::ostream & os, bool isBinary);
+
+    std::shared_ptr<PlyData> request_properties_from_element(const std::string & elementKey,
+        const std::initializer_list<std::string> propertyKeys,
+        const uint32_t list_size_hint);
+
+    void add_properties_to_element(const std::string & elementKey,
+        const std::initializer_list<std::string> propertyKeys,
+        const Type type, const size_t count, uint8_t * data, const Type listType, const size_t listCount);
+
+    size_t read_property_binary(const Type & t, const size_t & stride, void * dest, size_t & destOffset, std::istream & is);
+    size_t read_property_ascii(const Type & t, const size_t & stride, void * dest, size_t & destOffset, std::istream & is);
+
+    std::vector<std::vector<PropertyLookup>> make_property_lookup_table()
+    {
+        std::vector<std::vector<PropertyLookup>> element_property_lookup;
+
+        for (auto & element : elements)
+        {
+            std::vector<PropertyLookup> lookups;
+
+            for (auto & property : element.properties)
+            {
+                PropertyLookup f;
+
+                auto cursorIt = userData.find(hash_fnv1a(element.name + property.name));
+                if (cursorIt != userData.end()) f.helper = &cursorIt->second;
+                else f.skip = true;
+
+                f.prop_stride = PropertyTable[property.propertyType].stride;
+                if (property.isList) f.list_stride = PropertyTable[property.listType].stride;
+
+                lookups.push_back(f);
+            }
+
+            element_property_lookup.push_back(lookups);
+        }
+
+        return element_property_lookup;
+    }
+
+    bool parse_header(std::istream & is);
+    void parse_data(std::istream & is, bool firstPass);
+    void read_header_format(std::istream & is);
+    void read_header_element(std::istream & is);
+    void read_header_property(std::istream & is);
+    void read_header_text(std::string line, std::istream & is, std::vector<std::string> & place, int erase = 0);
+
+    void write_header(std::ostream & os);
+    void write_ascii_internal(std::ostream & os);
+    void write_binary_internal(std::ostream & os);
+    void write_property_ascii(Type t, std::ostream & os, uint8_t * src, size_t & srcOffset);
+    void write_property_binary(Type t, std::ostream & os, uint8_t * src, size_t & srcOffset, const size_t & stride);
+};
+
+PlyProperty::PlyProperty(std::istream & is) : isList(false)
+{
+    std::string type;
+    is >> type;
+    if (type == "list")
+    {
+        std::string countType;
+        is >> countType >> type;
+        listType = property_type_from_string(countType);
+        isList = true;
+    }
+    propertyType = property_type_from_string(type);
+    is >> name;
+}
+
+PlyElement::PlyElement(std::istream & is)
+{
+    is >> name >> size;
+}
+
+template<typename T> inline T ply_read_ascii(std::istream & is)
+{
+    T data;
+    is >> data;
+    return data;
+}
+
+template<typename T, typename T2>
+inline void endian_swap_buffer(uint8_t * data_ptr, const size_t num_bytes, const size_t stride)
+{
+    for (size_t count = 0; count < num_bytes; count += stride)
+    {
+        *(reinterpret_cast<T2 *>(data_ptr)) = endian_swap<T, T2>(*(reinterpret_cast<const T *>(data_ptr)));
+        data_ptr += stride;
+    }
+}
+
+template<typename T> void ply_cast_ascii(void * dest, std::istream & is)
+{
+    *(static_cast<T *>(dest)) = ply_read_ascii<T>(is);
+}
+
+int64_t find_element(const std::string & key, const std::vector<PlyElement> & list)
+{
+    for (size_t i = 0; i < list.size(); i++) if (list[i].name == key) return i;
+    return -1;
+}
+
+int64_t find_property(const std::string & key, const std::vector<PlyProperty> & list)
+{
+    for (size_t i = 0; i < list.size(); ++i) if (list[i].name == key) return i;
+    return -1;
+}
+
+bool PlyFile::PlyFileImpl::parse_header(std::istream & is)
+{
+    std::string line;
+    while (std::getline(is, line))
+    {
+        std::istringstream ls(line);
+        std::string token;
+        ls >> token;
+        if (token == "ply" || token == "PLY" || token == "") continue;
+        else if (token == "comment")    read_header_text(line, ls, comments, 8);
+        else if (token == "format")     read_header_format(ls);
+        else if (token == "element")    read_header_element(ls);
+        else if (token == "property")   read_header_property(ls);
+        else if (token == "obj_info")   read_header_text(line, ls, objInfo, 9);
+        else if (token == "end_header") break;
+        else return false; // unexpected header field
+    }
+    return true;
+}
+
+void PlyFile::PlyFileImpl::read_header_text(std::string line, std::istream & is, std::vector<std::string>& place, int erase)
+{
+    place.push_back((erase > 0) ? line.erase(0, erase) : line);
+}
+
+void PlyFile::PlyFileImpl::read_header_format(std::istream & is)
+{
+    std::string s;
+    (is >> s);
+    if (s == "binary_little_endian") isBinary = true;
+    else if (s == "binary_big_endian") isBinary = isBigEndian = true;
+}
+
+void PlyFile::PlyFileImpl::read_header_element(std::istream & is)
+{
+    elements.emplace_back(is);
+}
+
+void PlyFile::PlyFileImpl::read_header_property(std::istream & is)
+{
+    if (!elements.size()) throw std::runtime_error("no elements defined; file is malformed");
+    elements.back().properties.emplace_back(is);
+}
+
+size_t PlyFile::PlyFileImpl::read_property_binary(const Type & t, const size_t & stride, void * dest, size_t & destOffset, std::istream & is)
+{
+    destOffset += stride;
+    is.read((char*)dest, stride);
+    return stride;
+}
+
+size_t PlyFile::PlyFileImpl::read_property_ascii(const Type & t, const size_t & stride, void * dest, size_t & destOffset, std::istream & is)
+{
+    destOffset += stride;
+    switch (t)
+    {
+    case Type::INT8:       *((int8_t *)dest)  = ply_read_ascii<int32_t>(is);  break;
+    case Type::UINT8:      *((uint8_t *)dest) = ply_read_ascii<uint32_t>(is); break;
+    case Type::INT16:      ply_cast_ascii<int16_t>(dest, is);                 break;
+    case Type::UINT16:     ply_cast_ascii<uint16_t>(dest, is);                break;
+    case Type::INT32:      ply_cast_ascii<int32_t>(dest, is);                 break;
+    case Type::UINT32:     ply_cast_ascii<uint32_t>(dest, is);                break;
+    case Type::FLOAT32:    ply_cast_ascii<float>(dest, is);                   break;
+    case Type::FLOAT64:    ply_cast_ascii<double>(dest, is);                  break;
+    case Type::INVALID:    throw std::invalid_argument("invalid ply property"); 
+    }
+    return stride;
+}
+
+void PlyFile::PlyFileImpl::write_property_ascii(Type t, std::ostream & os, uint8_t * src, size_t & srcOffset)
+{
+    switch (t)
+    {
+    case Type::INT8:       os << static_cast<int32_t>(*reinterpret_cast<int8_t*>(src));   break;
+    case Type::UINT8:      os << static_cast<uint32_t>(*reinterpret_cast<uint8_t*>(src)); break;
+    case Type::INT16:      os << *reinterpret_cast<int16_t*>(src);  break;
+    case Type::UINT16:     os << *reinterpret_cast<uint16_t*>(src); break;
+    case Type::INT32:      os << *reinterpret_cast<int32_t*>(src);  break;
+    case Type::UINT32:     os << *reinterpret_cast<uint32_t*>(src); break;
+    case Type::FLOAT32:    os << *reinterpret_cast<float*>(src);    break;
+    case Type::FLOAT64:    os << *reinterpret_cast<double*>(src);   break;
+    case Type::INVALID:    throw std::invalid_argument("invalid ply property");
+    }
+    os << " ";
+    srcOffset += PropertyTable[t].stride;
+}
+
+void PlyFile::PlyFileImpl::write_property_binary(Type t, std::ostream & os, uint8_t * src, size_t & srcOffset, const size_t & stride)
+{
+    os.write((char *)src, stride);
+    srcOffset += stride;
+}
+
+void PlyFile::PlyFileImpl::read(std::istream & is)
+{
+    std::vector<std::shared_ptr<PlyData>> buffers;
+    for (auto & entry : userData) buffers.push_back(entry.second.data);
+
+    // Discover if we can allocate up front without parsing the file twice 
+    uint32_t list_hints = 0;
+    for (auto & b : buffers) for (auto & entry : userData) {list_hints += entry.second.list_size_hint;(void)b;}
+
+    // No list hints? Then we need to calculate how much memory to allocate
+    if (list_hints == 0) parse_data(is, true);
+
+    // Count the number of properties (required for allocation)
+    // e.g. if we have properties x y and z requested, we ensure
+    // that their buffer points to the same PlyData 
+    std::unordered_map<PlyData*, int32_t> unique_data_count;
+    for (auto & ptr : buffers) unique_data_count[ptr.get()] += 1;
+
+    // Since group-requested properties share the same cursor, 
+    // we need to find unique cursors so we only allocate once
+    std::sort(buffers.begin(), buffers.end());
+    buffers.erase(std::unique(buffers.begin(), buffers.end()), buffers.end());
+
+    // We sorted by ptrs on PlyData, need to remap back onto its cursor in the userData table
+    for (auto & b : buffers)
+    {
+        for (auto & entry : userData)
+        {
+            if (entry.second.data == b && b->buffer.get() == nullptr)
+            {
+                // If we didn't receive any list hints, it means we did two passes over the
+                // file to compute the total length of all (potentially) variable-length lists
+                if (list_hints == 0)
+                {
+                    b->buffer = Buffer(entry.second.cursor->totalSizeBytes);
+                }
+                else
+                {
+                    // otherwise, we can allocate up front, skipping the first pass.
+                    const size_t list_size_multiplier = (entry.second.data->isList ? entry.second.list_size_hint : 1);
+                    auto bytes_per_property = entry.second.data->count * PropertyTable[entry.second.data->t].stride * list_size_multiplier;
+                    bytes_per_property *= unique_data_count[b.get()];
+                    b->buffer = Buffer(bytes_per_property);
+                }
+
+            }
+        }
+    }
+
+    // Populate the data
+    parse_data(is, false);
+
+    if (isBigEndian)
+    {
+        for (auto & b : buffers)
+        {
+            uint8_t * data_ptr = b->buffer.get();
+            const size_t stride = PropertyTable[b->t].stride;
+            const size_t buffer_size_bytes = b->buffer.size_bytes();
+    
+            switch (b->t)
+            {
+            case Type::INT16:   endian_swap_buffer<int16_t, int16_t>(data_ptr, buffer_size_bytes, stride);   break;
+            case Type::UINT16:  endian_swap_buffer<uint16_t, uint16_t>(data_ptr, buffer_size_bytes, stride); break;
+            case Type::INT32:   endian_swap_buffer<int32_t, int32_t>(data_ptr, buffer_size_bytes, stride);   break;
+            case Type::UINT32:  endian_swap_buffer<uint32_t, uint32_t>(data_ptr, buffer_size_bytes, stride); break;
+            case Type::FLOAT32: endian_swap_buffer<uint32_t, float>(data_ptr, buffer_size_bytes, stride);    break;
+            case Type::FLOAT64: endian_swap_buffer<uint64_t, double>(data_ptr, buffer_size_bytes, stride);   break;
+            default: break;
+            }
+        }
+    }
+}
+
+void PlyFile::PlyFileImpl::write(std::ostream & os, bool _isBinary)
+{
+    // reset cursors
+    for (auto & d : userData) { d.second.cursor->byteOffset = 0; }
+    if (_isBinary) write_binary_internal(os);
+    else write_ascii_internal(os);
+}
+
+void PlyFile::PlyFileImpl::write_binary_internal(std::ostream & os)
+{
+    isBinary = true;
+    write_header(os);
+
+    uint8_t listSize[4] = { 0, 0, 0, 0 };
+    size_t dummyCount = 0;
+
+    auto element_property_lookup = make_property_lookup_table();
+
+    size_t element_idx = 0;
+    for (auto & e : elements)
+    {
+        for (size_t i = 0; i < e.size; ++i)
+        {
+            size_t property_index = 0;
+            for (auto & p : e.properties)
+            {
+                auto & f = element_property_lookup[element_idx][property_index];
+                auto * helper = f.helper;
+
+                if (p.isList)
+                {
+                    std::memcpy(listSize, &p.listCount, sizeof(uint32_t));
+                    write_property_binary(p.listType, os, listSize, dummyCount, f.list_stride);
+                    write_property_binary(p.propertyType, os, (helper->data->buffer.get() + helper->cursor->byteOffset), helper->cursor->byteOffset, f.prop_stride * p.listCount);
+                }
+                else
+                {
+                    write_property_binary(p.propertyType, os, (helper->data->buffer.get() + helper->cursor->byteOffset), helper->cursor->byteOffset, f.prop_stride);
+                }
+                property_index++;
+            }
+        }
+        element_idx++;
+    }
+}
+
+void PlyFile::PlyFileImpl::write_ascii_internal(std::ostream & os)
+{
+    write_header(os);
+
+    for (auto & e : elements)
+    {
+        for (size_t i = 0; i < e.size; ++i)
+        {
+            for (auto & p : e.properties)
+            {
+                auto & helper = userData[hash_fnv1a(e.name + p.name)];
+                if (p.isList)
+                {
+                    os << p.listCount << " ";
+                    for (size_t j = 0; j < p.listCount; ++j)
+                    {
+                        write_property_ascii(p.propertyType, os, (helper.data->buffer.get() + helper.cursor->byteOffset), helper.cursor->byteOffset);
+                    }
+                }
+                else
+                {
+                    write_property_ascii(p.propertyType, os, (helper.data->buffer.get() + helper.cursor->byteOffset), helper.cursor->byteOffset);
+                }
+            }
+            os << "\n";
+        }
+    }
+}
+
+void PlyFile::PlyFileImpl::write_header(std::ostream & os)
+{
+    const std::locale & fixLoc = std::locale("C");
+    os.imbue(fixLoc);
+
+    os << "ply\n";
+    if (isBinary) os << ((isBigEndian) ? "format binary_big_endian 1.0" : "format binary_little_endian 1.0") << "\n";
+    else os << "format ascii 1.0\n";
+
+    for (const auto & comment : comments) os << "comment " << comment << "\n";
+
+    for (auto & e : elements)
+    {
+        os << "element " << e.name << " " << e.size << "\n";
+        for (const auto & p : e.properties)
+        {
+            if (p.isList)
+            {
+                os << "property list " << PropertyTable[p.listType].str << " "
+                   << PropertyTable[p.propertyType].str << " " << p.name << "\n";
+            }
+            else
+            {
+                os << "property " << PropertyTable[p.propertyType].str << " " << p.name << "\n";
+            }
+        }
+    }
+    os << "end_header\n";
+}
+
+std::shared_ptr<PlyData> PlyFile::PlyFileImpl::request_properties_from_element(const std::string & elementKey,
+    const std::initializer_list<std::string> propertyKeys,
+    const uint32_t list_size_hint)
+{
+    // Each key in `propertyKey` gets an entry into the userData map (keyed by a hash of
+    // element name and property name), but groups of properties (requested from the
+    // public api through this function) all share the same `ParsingHelper`. When it comes 
+    // time to .read(), we check the number of unique PlyData shared pointers
+    // and allocate a single buffer that will be used by each individual property. 
+    ParsingHelper helper;
+    helper.data = std::make_shared<PlyData>();
+    helper.data->count = 0;
+    helper.data->isList = false;
+    helper.data->t = Type::INVALID;
+    helper.cursor = std::make_shared<PlyDataCursor>();
+    helper.list_size_hint = list_size_hint;
+
+    if (elements.empty()) throw std::runtime_error("header had no elements defined. malformed file?");
+    if (elementKey.empty()) throw std::invalid_argument("`elementKey` argument is empty");
+    if (!propertyKeys.size()) throw std::invalid_argument("`propertyKeys` argument is empty");
+
+    const int64_t elementIndex = find_element(elementKey, elements);
+
+    std::vector<std::string> keys_not_found;
+
+    // Sanity check if the user requested element is in the pre-parsed header
+    if (elementIndex >= 0)
+    {
+        // We found the element
+        const PlyElement & element = elements[elementIndex];
+
+        helper.data->count = element.size;
+
+        // Find each of the keys
+        for (auto key : propertyKeys)
+        {
+            const int64_t propertyIndex = find_property(key, element.properties);
+            // The key was not found
+            if (propertyIndex < 0)
+            {
+                keys_not_found.push_back(key);
+            }
+        }
+
+        if (keys_not_found.size())
+        {
+            std::stringstream ss;
+            for (auto & str : keys_not_found) ss << str << ", ";
+            throw std::invalid_argument("the following property keys were not found in the header: " + ss.str());
+        }
+
+        for (auto key : propertyKeys)
+        {
+            const int64_t propertyIndex = find_property(key, element.properties);
+            const PlyProperty & property = element.properties[propertyIndex];
+            helper.data->t = property.propertyType;
+            helper.data->isList = property.isList;
+            auto result = userData.insert(std::pair<uint32_t, ParsingHelper>(hash_fnv1a(element.name + property.name), helper));
+            if (result.second == false)
+            {
+                throw std::invalid_argument("element-property key has already been requested: " + element.name + " " + property.name);
+            }
+        }
+    }
+    else throw std::invalid_argument("the element key was not found in the header: " + elementKey);
+
+    if (keys_not_found.size())
+    {
+        std::stringstream ss;
+        for (auto & str : keys_not_found) ss << str << ", ";
+        throw std::invalid_argument("the following property keys were not found in the header: " + ss.str());
+    }
+    return helper.data;
+}
+
+void PlyFile::PlyFileImpl::add_properties_to_element(const std::string & elementKey, 
+    const std::initializer_list<std::string> propertyKeys, 
+    const Type type, const size_t count, uint8_t * data, const Type listType, const size_t listCount)
+{
+    ParsingHelper helper;
+    helper.data = std::make_shared<PlyData>();
+    helper.data->count = count;
+    helper.data->t = type;
+    helper.data->buffer = Buffer(data);
+    helper.cursor = std::make_shared<PlyDataCursor>();
+
+    auto create_property_on_element = [&](PlyElement & e)
+    {
+        for (auto key : propertyKeys)
+        {
+            PlyProperty newProp = (listType == Type::INVALID) ? PlyProperty(type, key) : PlyProperty(listType, type, key, listCount);
+            userData.insert(std::pair<uint32_t, ParsingHelper>(hash_fnv1a(elementKey + key), helper));
+            e.properties.push_back(newProp);
+        }
+    };
+
+    const int64_t idx = find_element(elementKey, elements);
+    if (idx >= 0)
+    {
+        PlyElement & e = elements[idx];
+        create_property_on_element(e);
+    }
+    else
+    {
+        PlyElement newElement = (listType == Type::INVALID) ? PlyElement(elementKey, count) : PlyElement(elementKey, count);
+        create_property_on_element(newElement);
+        elements.push_back(newElement);
+    }
+}
+
+void PlyFile::PlyFileImpl::parse_data(std::istream & is, bool firstPass)
+{
+    std::function<void(PropertyLookup & f, const PlyProperty & p, uint8_t * dest, size_t & destOffset, std::istream & is)> read;
+    std::function<size_t(PropertyLookup & f, const PlyProperty & p, std::istream & is)> skip;
+
+    const auto start = is.tellg();
+
+    size_t listSize = 0;
+    size_t dummyCount = 0;
+    std::string skip_ascii_buffer;
+
+    // Special case mirroring read_property_binary but for list types; this
+    // has an additional big endian check to flip the data in place immediately
+    // after reading. We do this as a performance optimization; endian flipping is
+    // done on regular properties as a post-process after reading (also for optimization)
+    // but we need the correct little-endian list count as we read the file. 
+    auto read_list_binary = [this](const Type & t, void * dst, size_t & destOffset, const size_t & stride, std::istream & _is)
+    {
+        destOffset += stride;
+        _is.read((char*)dst, stride);
+
+        if (isBigEndian)
+        {
+            switch (t)
+            {
+            case Type::INT16:  endian_swap<int16_t, int16_t>(*(int16_t*)dst);    break;
+            case Type::UINT16: endian_swap<uint16_t, uint16_t>(*(uint16_t*)dst); break;
+            case Type::INT32:  endian_swap<int32_t, int32_t>(*(int32_t*)dst);    break;
+            case Type::UINT32: endian_swap<uint32_t, uint32_t>(*(uint32_t*)dst); break;
+            default: break;
+            }
+        }
+
+        return stride;
+    };
+
+    if (isBinary)
+    {
+        read = [this, &listSize, &dummyCount, &read_list_binary](PropertyLookup & f, const PlyProperty & p, uint8_t * dest, size_t & destOffset, std::istream & _is)
+        {
+            if (!p.isList)
+            {
+                read_property_binary(p.propertyType, f.prop_stride, dest + destOffset, destOffset, _is);
+            }
+            else
+            {
+                read_list_binary(p.listType, &listSize, dummyCount, f.list_stride, _is); // the list size
+                read_property_binary(p.propertyType, f.prop_stride * listSize, dest + destOffset, destOffset, _is); // properties in list
+            }
+        };
+        skip = [this, &listSize, &dummyCount, &read_list_binary](PropertyLookup & f, const PlyProperty & p, std::istream & _is)
+        {
+            if (!p.isList)
+            {
+                _is.read((char*)scratch, f.prop_stride);
+                return f.prop_stride;
+            }
+            read_list_binary(p.listType, &listSize, dummyCount, f.list_stride, _is); // the list size (does not count for memory alloc)
+            return read_property_binary(p.propertyType, f.prop_stride * listSize, scratch, dummyCount, _is);
+        };
+    }
+    else
+    {
+        read = [this, &listSize, &dummyCount](PropertyLookup & f, const PlyProperty & p, uint8_t * dest, size_t & destOffset, std::istream & _is) 
+        { 
+            if (!p.isList)
+            {
+                read_property_ascii(p.propertyType, f.prop_stride, dest + destOffset, destOffset, _is); 
+            }
+            else
+            {
+                read_property_ascii(p.listType, f.list_stride, &listSize, dummyCount, _is); // the list size
+                for (size_t i = 0; i < listSize; ++i) 
+                {
+                    read_property_ascii(p.propertyType, f.prop_stride, dest + destOffset, destOffset, _is);
+                }
+            }
+        };
+        skip = [this, &listSize, &dummyCount, &skip_ascii_buffer](PropertyLookup & f, const PlyProperty & p, std::istream & _is)
+        { 
+            skip_ascii_buffer.clear();
+            if (p.isList)
+            {
+                read_property_ascii(p.listType, f.list_stride, &listSize, dummyCount, _is); // the list size
+                for (size_t i = 0; i < listSize; ++i) _is >> skip_ascii_buffer; // properties in list
+                return listSize * f.prop_stride;
+            }
+            _is >> skip_ascii_buffer;
+            return f.prop_stride;
+        };
+    }
+
+    auto element_property_lookup = make_property_lookup_table();
+
+    size_t element_idx = 0;
+    size_t property_index = 0;
+    for (auto & element : elements)
+    {
+        for (size_t count = 0; count < element.size; ++count)
+        {
+            property_index = 0;
+            for (auto & property : element.properties)
+            {
+                auto & f = element_property_lookup[element_idx][property_index];
+                if (!f.skip)
+                {
+                    auto * helper = f.helper;
+                    if (firstPass) helper->cursor->totalSizeBytes += skip(f, property, is);
+                    else read(f, property, helper->data->buffer.get(), helper->cursor->byteOffset, is);
+                }
+                else skip(f, property, is);
+                property_index++;
+            }
+        }
+        element_idx++;
+    }
+
+    // Reset istream reader to the beginning
+    if (firstPass) is.seekg(start, is.beg);
+}
+
+// Wrap the public interface:
+
+PlyFile::PlyFile() { impl.reset(new PlyFileImpl()); }
+PlyFile::~PlyFile() { }
+bool PlyFile::parse_header(std::istream & is) { return impl->parse_header(is); }
+void PlyFile::read(std::istream & is) { return impl->read(is); }
+void PlyFile::write(std::ostream & os, bool isBinary) { return impl->write(os, isBinary); }
+std::vector<PlyElement> PlyFile::get_elements() const { return impl->elements; }
+std::vector<std::string> & PlyFile::get_comments() { return impl->comments; }
+std::vector<std::string> PlyFile::get_info() const { return impl->objInfo; }
+std::shared_ptr<PlyData> PlyFile::request_properties_from_element(const std::string & elementKey,
+    const std::initializer_list<std::string> propertyKeys,
+    const uint32_t list_size_hint)
+{
+    return impl->request_properties_from_element(elementKey, propertyKeys, list_size_hint);
+}
+void PlyFile::add_properties_to_element(const std::string & elementKey,
+    const std::initializer_list<std::string> propertyKeys,
+    const Type type, const size_t count, uint8_t * data, const Type listType, const size_t listCount)
+{
+    return impl->add_properties_to_element(elementKey, propertyKeys, type, count, data, listType, listCount);
+}
+
+#endif // end TINYPLY_IMPLEMENTATION

+ 49 - 0
src/detection/detection_localization_global_hdl/main.cpp

@@ -0,0 +1,49 @@
+#include <QCoreApplication>
+
+#include <iostream>
+
+#include <pcl/io/pcd_io.h>
+
+#include "hdl_global_localization/engines/global_localization_bbs.hpp"
+#include "hdl_global_localization/engines/global_localization_fpfh_teaser.hpp"
+
+int main(int argc, char *argv[])
+{
+    QCoreApplication a(argc, argv);
+
+//    hdl_global_localization::GlobalLocalizationBBS gglobal;
+
+//    pcl::PointCloud<pcl::PointXYZ>::Ptr mapx = boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>>(new pcl::PointCloud<pcl::PointXYZ>);;
+//    if(0 == pcl::io::loadPCDFile("/home/nvidia/map/xingxialicangfang.pcd",*mapx))
+//    {
+//        std::cout<<" Load Map success."<<std::endl;
+//        gglobal.set_global_map(mapx);
+
+//        pcl::PointCloud<pcl::PointXYZ>::Ptr pc1 = boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>>(new pcl::PointCloud<pcl::PointXYZ>);
+//        if(0 == pcl::io::loadPCDFile("/home/nvidia/pcd/xiali2.pcd",*pc1))
+//        {
+//            std::cout<<"Load point cloud success."<<std::endl;
+//             gglobal.query(pc1,10);
+//        }
+//    }
+
+    hdl_global_localization::GlobalLocalizationEngineFPFH_RANSAC gglobal;
+
+    pcl::PointCloud<pcl::PointXYZ>::Ptr mapx = boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>>(new pcl::PointCloud<pcl::PointXYZ>);;
+    if(0 == pcl::io::loadPCDFile("/home/nvidia/map/xingxialicangfang.pcd",*mapx))
+    {
+        std::cout<<" Load Map success."<<std::endl;
+        gglobal.set_global_map(mapx);
+
+        pcl::PointCloud<pcl::PointXYZ>::Ptr pc1 = boost::shared_ptr<pcl::PointCloud<pcl::PointXYZ>>(new pcl::PointCloud<pcl::PointXYZ>);
+        if(0 == pcl::io::loadPCDFile("/home/nvidia/pcd/xiali2.pcd",*pc1))
+        {
+            std::cout<<"Load point cloud success."<<std::endl;
+             gglobal.query(pc1,10);
+        }
+    }
+
+    std::cout<<"hello."<<std::endl;
+
+    return a.exec();
+}

+ 671 - 0
src/detection/detection_localization_global_hdl/teaser/certification.cc

@@ -0,0 +1,671 @@
+/**
+ * Copyright (c) 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#include <limits>
+#include <fstream>
+#include <chrono>
+#include <algorithm>
+
+#include <teaser/macros.h>
+#include <Spectra/SymEigsSolver.h>
+#include <Spectra/GenEigsSolver.h>
+#include <Spectra/SymEigsShiftSolver.h>
+
+#include "teaser/certification.h"
+#include "teaser/linalg.h"
+
+teaser::CertificationResult
+teaser::DRSCertifier::certify(const Eigen::Matrix3d& R_solution,
+                              const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                              const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                              const Eigen::Matrix<bool, 1, Eigen::Dynamic>& theta) {
+  // convert theta to a double Eigen matrix
+  Eigen::Matrix<double, 1, Eigen::Dynamic> theta_double(1, theta.cols());
+  for (size_t i = 0; i < theta.cols(); ++i) {
+    if (theta(i)) {
+      theta_double(i) = 1;
+    } else {
+      theta_double(i) = -1;
+    }
+  }
+  return certify(R_solution, src, dst, theta_double);
+}
+
+teaser::CertificationResult
+teaser::DRSCertifier::certify(const Eigen::Matrix3d& R_solution,
+                              const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                              const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                              const Eigen::Matrix<double, 1, Eigen::Dynamic>& theta) {
+  int N = src.cols();
+  int Npm = 4 + 4 * N;
+
+  // prepend theta with 1
+  Eigen::Matrix<double, 1, Eigen::Dynamic> theta_prepended(1, theta.cols() + 1);
+  theta_prepended << 1, theta;
+
+  // get the inverse map
+  TEASER_INFO_MSG("Starting linear inverse map calculation.\n");
+  TEASER_DEBUG_DECLARE_TIMING(LProj);
+  TEASER_DEBUG_START_TIMING(LProj);
+  SparseMatrix inverse_map;
+  getLinearProjection(theta_prepended, &inverse_map);
+  TEASER_DEBUG_STOP_TIMING(LProj);
+  TEASER_DEBUG_INFO_MSG("Obtained linear inverse map.");
+  TEASER_DEBUG_INFO_MSG("Linear projection time: " << TEASER_DEBUG_GET_TIMING(LProj));
+
+  // recall data matrix from QUASAR
+  Eigen::MatrixXd Q_cost(Npm, Npm);
+  getQCost(src, dst, &Q_cost);
+  TEASER_DEBUG_INFO_MSG("Obtained Q_cost matrix.");
+
+  // convert the estimated rotation to quaternion
+  Eigen::Quaterniond q_solution(R_solution);
+  q_solution.normalize();
+  Eigen::VectorXd q_solution_vec(4, 1);
+  q_solution_vec << q_solution.x(), q_solution.y(), q_solution.z(), q_solution.w();
+
+  // this would have been the rank-1 decomposition of Z if Z were the globally
+  // optimal solution of the QUASAR SDP
+  Eigen::VectorXd x =
+      teaser::vectorKron<double, Eigen::Dynamic, Eigen::Dynamic>(theta_prepended, q_solution_vec);
+
+  // build the "rotation matrix" D_omega
+  Eigen::MatrixXd D_omega;
+  getBlockDiagOmega(Npm, q_solution, &D_omega);
+  Eigen::MatrixXd Q_bar = D_omega.transpose() * (Q_cost * D_omega);
+  Eigen::VectorXd x_bar = D_omega.transpose() * x;
+  TEASER_DEBUG_INFO_MSG("Obtained D_omega matrix.");
+
+  // build J_bar matrix with a 4-by-4 identity at the top left corner
+  Eigen::SparseMatrix<double> J_bar(Npm, Npm);
+  for (size_t i = 0; i < 4; ++i) {
+    J_bar.insert(i, i) = 1;
+  }
+
+  // verify optimality in the "rotated" space using projection
+  // this is the cost of the primal, when strong duality holds, mu is also the cost of the dual
+  double mu = x.transpose().dot(Q_cost * x);
+
+  // get initial guess
+  SparseMatrix lambda_bar_init;
+  getLambdaGuess(R_solution, theta, src, dst, &lambda_bar_init);
+
+  // this initial guess lives in the affine subspace
+  // use 2 separate steps to limit slow evaluation on only the few non-zeros in the sparse matrix
+#if EIGEN_VERSION_AT_LEAST(3,3,0)
+  Eigen::SparseMatrix<double> M_init = Q_bar - mu * J_bar - lambda_bar_init;
+#else
+  // fix for this bug in Eigen 3.2: https://eigen.tuxfamily.org/bz/show_bug.cgi?id=632
+  Eigen::SparseMatrix<double> M_init = Q_bar.sparseView() - mu * J_bar - lambda_bar_init;
+#endif
+
+  // flag to indicate whether we exceeded iterations or reach the desired sub-optim gap
+  bool exceeded_maxiters = true;
+
+  // vector to store suboptim trajectory
+  std::vector<double> suboptim_traj;
+
+  // current suboptimality gap
+  double current_suboptim = std::numeric_limits<double>::infinity();
+  double best_suboptim = std::numeric_limits<double>::infinity();
+
+  // preallocate some matrices
+  Eigen::MatrixXd M_PSD;
+  // TODO: Make M a sparse matrix
+  Eigen::MatrixXd M = M_init.toDense();
+  Eigen::MatrixXd temp_W(M.rows(), M.cols());
+  Eigen::MatrixXd W_dual(Npm, Npm);
+  Eigen::MatrixXd M_affine(Npm, Npm);
+
+  TEASER_INFO_MSG("Starting Douglas-Rachford Splitting.\n");
+  for (size_t iter = 0; iter < params_.max_iterations; ++iter) {
+    // print out iteration every 10 iteration
+    TEASER_INFO_MSG_THROTTLE("Iteration: " << iter << "\n", iter, 10);
+
+    // to nearest PSD
+    TEASER_DEBUG_DECLARE_TIMING(PSD);
+    TEASER_DEBUG_START_TIMING(PSD);
+    teaser::getNearestPSD<double>(M, &M_PSD);
+    TEASER_DEBUG_STOP_TIMING(PSD);
+    TEASER_DEBUG_INFO_MSG("PSD time: " << TEASER_DEBUG_GET_TIMING(PSD));
+
+    // projection to affine space
+#if EIGEN_VERSION_AT_LEAST(3,3,0)
+    temp_W = 2 * M_PSD - M - M_init;
+#else
+    // fix for this bug in Eigen 3.2: https://eigen.tuxfamily.org/bz/show_bug.cgi?id=632
+    temp_W = 2 * M_PSD - M - M_init.toDense();
+#endif
+
+    TEASER_DEBUG_DECLARE_TIMING(DualProjection);
+    TEASER_DEBUG_START_TIMING(DualProjection);
+    getOptimalDualProjection(temp_W, theta_prepended, inverse_map, &W_dual);
+    TEASER_DEBUG_STOP_TIMING(DualProjection);
+    TEASER_DEBUG_INFO_MSG("Dual Projection time: " << TEASER_DEBUG_GET_TIMING(DualProjection));
+#if EIGEN_VERSION_AT_LEAST(3,3,0)
+    M_affine = M_init + W_dual;
+#else
+    // fix for this bug in Eigen 3.2: https://eigen.tuxfamily.org/bz/show_bug.cgi?id=632
+    M_affine = M_init.toDense() + W_dual;
+#endif
+
+    // compute suboptimality gap
+    TEASER_DEBUG_DECLARE_TIMING(Gap);
+    TEASER_DEBUG_START_TIMING(Gap);
+    current_suboptim = computeSubOptimalityGap(M_affine, mu, N);
+    TEASER_DEBUG_STOP_TIMING(Gap);
+    TEASER_DEBUG_INFO_MSG("Sub Optimality Gap time: " << TEASER_DEBUG_GET_TIMING(Gap));
+    TEASER_DEBUG_INFO_MSG("Current sub-optimality gap: " << current_suboptim);
+
+    // termination check and update trajectory
+    suboptim_traj.push_back(current_suboptim);
+
+    // update best optimality
+    if (current_suboptim < best_suboptim) {
+      best_suboptim = current_suboptim;
+    }
+
+    if (current_suboptim < params_.sub_optimality) {
+      TEASER_DEBUG_INFO_MSG("Suboptimality condition reached in " << iter + 1
+                                                                  << " iterations. Stopping DRS.");
+      exceeded_maxiters = false;
+      break;
+    }
+
+    // update M
+    M += params_.gamma_tau * (M_affine - M_PSD);
+  }
+
+  // prepare results
+  CertificationResult cert_result;
+  cert_result.is_optimal = best_suboptim < params_.sub_optimality;
+  cert_result.best_suboptimality = best_suboptim;
+  cert_result.suboptimality_traj = suboptim_traj;
+  return cert_result;
+}
+
+double teaser::DRSCertifier::computeSubOptimalityGap(const Eigen::MatrixXd& M, double mu, int N) {
+  Eigen::MatrixXd new_M = (M + M.transpose()) / 2;
+
+  bool successful = false;
+  Eigen::VectorXd eig_vals;
+  double min_eig;
+  if (params_.eig_decomposition_solver == EIG_SOLVER_TYPE::EIGEN) {
+    // Eigen
+    Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eigensolver(new_M);
+    if (eigensolver.info() == Eigen::Success) {
+      eig_vals = eigensolver.eigenvalues();
+      min_eig = eig_vals.minCoeff();
+      successful = true;
+    }
+  } else {
+    // Spectra
+    Spectra::DenseSymMatProd<double> op(new_M);
+    Spectra::SymEigsSolver<double, Spectra::SMALLEST_ALGE, Spectra::DenseSymMatProd<double>> eigs(
+        &op, 1, 30);
+    eigs.init();
+    int nconv = eigs.compute();
+    if (eigs.info() == Spectra::SUCCESSFUL) {
+      eig_vals = eigs.eigenvalues();
+      min_eig = eig_vals(0);
+      successful = true;
+    }
+  }
+
+  if (!successful) {
+    TEASER_DEBUG_ERROR_MSG(
+        "Failed to find the minimal eigenvalue for suboptimality gap calculaiton.");
+    return std::numeric_limits<double>::infinity();
+  }
+
+  if (min_eig > 0) {
+    // already optimal
+    return 0;
+  }
+  return (-min_eig * (N + 1)) / mu;
+}
+
+void teaser::DRSCertifier::getQCost(const Eigen::Matrix<double, 3, Eigen::Dynamic>& v1,
+                                    const Eigen::Matrix<double, 3, Eigen::Dynamic>& v2,
+                                    Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>* Q) {
+  int N = v1.cols();
+  int Npm = 4 + 4 * N;
+  double noise_bound_scaled = params_.cbar2 * std::pow(params_.noise_bound, 2);
+
+  // coefficient matrix that maps vec(qq\tran) to vec(R)
+  Eigen::Matrix<double, 9, 16> P(9, 16);
+  // clang-format off
+  P << 1,  0, 0, 0,  0, -1, 0, 0,  0, 0, -1, 0, 0,  0,  0, 1,
+       0,  1, 0, 0,  1,  0, 0, 0,  0, 0, 0,  1, 0,  0,  1, 0,
+       0,  0, 1, 0,  0,  0, 0, -1, 1, 0, 0,  0, 0,  -1, 0, 0,
+       0,  1, 0, 0,  1,  0, 0, 0,  0, 0, 0, -1, 0,  0, -1, 0,
+       -1, 0, 0, 0,  0,  1, 0, 0,  0, 0, -1, 0, 0,  0,  0, 1,
+       0,  0, 0, 1,  0,  0, 1, 0,  0, 1, 0,  0, 1,  0,  0, 0,
+       0,  0, 1, 0,  0,  0, 0, 1,  1, 0, 0,  0, 0,  1,  0, 0,
+       0,  0, 0, -1, 0,  0, 1, 0,  0, 1, 0,  0, -1, 0,  0, 0,
+       -1, 0, 0, 0,  0, -1, 0, 0,  0, 0, 1,  0, 0,  0,  0, 1;
+  // clang-format on
+
+  // Some temporary vectors to save intermediate matrices
+  Eigen::Matrix3d temp_A;
+  Eigen::Matrix<double, 16, 1> temp_B;
+  Eigen::Matrix<double, 9, 1> temp_map2vec;
+  Eigen::Matrix4d P_k;
+
+  // Q1 matrix
+  Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> Q1(Npm, Npm);
+  Q1.setZero();
+  for (size_t k = 0; k < N; ++k) {
+    int start_idx = k * 4 + 4;
+
+    //  P_k = reshape(P'*reshape(v2(:,k)*v1(:,k)',[9,1]),[4,4]);
+    temp_A = v2.col(k) * (v1.col(k).transpose());
+    temp_map2vec = Eigen::Map<Eigen::Matrix<double, 9, 1>>(temp_A.data());
+    temp_B = P.transpose() * temp_map2vec;
+    P_k = Eigen::Map<Eigen::Matrix4d>(temp_B.data());
+
+    //  ck = 0.5 * ( v1(:,k)'*v1(:,k)+v2(:,k)'*v2(:,k) - barc2 );
+    double ck = 0.5 * (v1.col(k).squaredNorm() + v2.col(k).squaredNorm() - noise_bound_scaled);
+    Q1.block<4, 4>(0, start_idx) =
+        Q1.block<4, 4>(0, start_idx) - 0.5 * P_k + ck / 2 * Eigen::Matrix4d::Identity();
+    Q1.block<4, 4>(start_idx, 0) =
+        Q1.block<4, 4>(start_idx, 0) - 0.5 * P_k + ck / 2 * Eigen::Matrix4d::Identity();
+  }
+
+  // Q2 matrix
+  Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> Q2(Npm, Npm);
+  Q2.setZero();
+  for (size_t k = 0; k < N; ++k) {
+    int start_idx = k * 4 + 4;
+
+    //  P_k = reshape(P'*reshape(v2(:,k)*v1(:,k)',[9,1]),[4,4]);
+    temp_A = v2.col(k) * (v1.col(k).transpose());
+    temp_map2vec = Eigen::Map<Eigen::Matrix<double, 9, 1>>(temp_A.data());
+    temp_B = P.transpose() * temp_map2vec;
+    P_k = Eigen::Map<Eigen::Matrix4d>(temp_B.data());
+
+    //  ck = 0.5 * ( v1(:,k)'*v1(:,k)+v2(:,k)'*v2(:,k) + barc2 );
+    double ck = 0.5 * (v1.col(k).squaredNorm() + v2.col(k).squaredNorm() + noise_bound_scaled);
+    Q2.block<4, 4>(start_idx, start_idx) =
+        Q2.block<4, 4>(start_idx, start_idx) - P_k + ck * Eigen::Matrix4d::Identity();
+  }
+
+  *Q = Q1 + Q2;
+}
+
+Eigen::Matrix4d teaser::DRSCertifier::getOmega1(const Eigen::Quaterniond& q) {
+  Eigen::Matrix4d omega1;
+  // clang-format off
+  omega1 << q.w(), -q.z(), q.y(), q.x(),
+            q.z(), q.w(), -q.x(), q.y(),
+            -q.y(), q.x(), q.w(), q.z(),
+            -q.x(), -q.y(), -q.z(), q.w();
+  // clang-format on
+  return omega1;
+}
+
+void teaser::DRSCertifier::getBlockDiagOmega(
+    int Npm, const Eigen::Quaterniond& q,
+    Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>* D_omega) {
+  D_omega->resize(Npm, Npm);
+  D_omega->setZero();
+  for (size_t i = 0; i < Npm / 4; ++i) {
+    int start_idx = i * 4;
+    D_omega->block<4, 4>(start_idx, start_idx) = getOmega1(q);
+  }
+}
+
+void teaser::DRSCertifier::getOptimalDualProjection(
+    const Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>& W,
+    const Eigen::Matrix<double, 1, Eigen::Dynamic>& theta_prepended, const SparseMatrix& A_inv,
+    Eigen::MatrixXd* W_dual) {
+  // prepare some variables
+  int Npm = W.rows();
+  int N = Npm / 4 - 1;
+  assert(theta_prepended.cols() == N + 1);
+
+  // first project the off-diagonal blocks
+  int nr_off_diag_blks = A_inv.rows();
+
+  // Compute b_W
+  Eigen::Matrix<double, Eigen::Dynamic, 3> b_W(nr_off_diag_blks, 3);
+  b_W.setZero();
+
+  int count = 0;
+  for (size_t i = 0; i < N; ++i) {
+    // prepare indices
+    int row_idx_start = i * 4;
+    int row_idx_end = i * 4 + 3;
+    for (size_t j = i + 1; j < N + 1; ++j) {
+      // prepare indices
+      int col_idx_start = j * 4;
+      int col_idx_end = j * 4 + 3;
+
+      // current theta value calculation
+      double theta_ij = theta_prepended.col(i) * theta_prepended.col(j);
+
+      // [-theta_ij 1]
+      Eigen::Matrix<double, 1, 2> temp_A;
+      temp_A << -theta_ij, 1;
+
+      // [-1 theta_ij]
+      Eigen::Matrix<double, 1, 2> temp_B;
+      temp_B << -1, theta_ij;
+
+      // W([row_idx(4) col_idx(4)],row_idx(1:3))
+      Eigen::Matrix<double, 1, 3> temp_C = W.block<1, 3>(row_idx_end, row_idx_start);
+      Eigen::Matrix<double, 1, 3> temp_D = W.block<1, 3>(col_idx_end, row_idx_start);
+      Eigen::Matrix<double, 2, 3> temp_CD;
+      temp_CD << temp_C, temp_D;
+
+      // W([row_idx(4) col_idx(4)], col_idx(1:3))
+      Eigen::Matrix<double, 1, 3> temp_E = W.block<1, 3>(row_idx_end, col_idx_start);
+      Eigen::Matrix<double, 1, 3> temp_F = W.block<1, 3>(col_idx_end, col_idx_start);
+      Eigen::Matrix<double, 2, 3> temp_EF;
+      temp_EF << temp_E, temp_F;
+
+      // calculate the current row for b_W with the temporary variables
+      Eigen::Matrix<double, 1, 3> y_b_Wt = temp_A * temp_CD + temp_B * temp_EF;
+
+      // update b_W
+      b_W.row(count) = y_b_Wt;
+      count += 1;
+    }
+  }
+  Eigen::Matrix<double, Eigen::Dynamic, 3> b_W_dual = A_inv * b_W;
+
+  // Compute W_dual
+  W_dual->resize(Npm, Npm);
+  W_dual->setZero();
+  count = 0;
+  // declare matrices to prevent reallocation
+  Eigen::Matrix4d W_ij = Eigen::Matrix4d::Zero();
+  Eigen::Matrix4d W_dual_ij = Eigen::Matrix4d::Zero();
+  Eigen::Matrix<double, 3, 1> y_dual_ij = Eigen::Matrix<double, 3, 1>::Zero();
+  Eigen::Matrix<double, 4, Eigen::Dynamic> W_i(4, W.cols());
+  Eigen::Matrix<double, 4, Eigen::Dynamic> W_dual_i(4, Npm);
+  W_i.setZero();
+  W_dual_i.setZero();
+  for (size_t i = 0; i < N; ++i) {
+    int row_idx_start = i * 4;
+    W_i = W.block(row_idx_start, 0, 4, W.cols());
+
+    for (size_t j = i + 1; j < N + 1; ++j) {
+      int col_idx_start = j * 4;
+
+      // take W_ij and break into top-left 3x3 and vectors
+      W_ij = W_i.block(0, col_idx_start, 4, 4);
+      y_dual_ij = (b_W_dual.row(count)).transpose();
+
+      // assemble W_dual_ij
+      W_dual_ij = (W_ij - W_ij.transpose()) / 2;
+      W_dual_ij.block<3, 1>(0, 3) = y_dual_ij;
+      W_dual_ij.block<1, 3>(3, 0) = -y_dual_ij.transpose();
+
+      // assign W_dual_ij to W_dual_i
+      W_dual_i.block<4, 4>(0, col_idx_start) = W_dual_ij;
+
+      count += 1;
+    }
+    W_dual->block(row_idx_start, 0, 4, Npm) = W_dual_i;
+
+    // clear out temporary variables
+    W_dual_i.setZero();
+    W_i.setZero();
+  }
+  Eigen::MatrixXd temp = W_dual->transpose();
+  *W_dual += temp;
+
+  // Project the diagonal blocks
+  Eigen::Matrix4d W_ii = Eigen::Matrix4d::Zero();
+  Eigen::Matrix4d W_diag_mean = Eigen::Matrix4d::Zero();
+  Eigen::Matrix3d W_diag_sum_33 = Eigen::Matrix3d::Zero();
+  for (size_t i = 0; i < N + 1; ++i) {
+    int idx_start = i * 4;
+    // Eigen::Vector4d W_dual_row_sum_last_column= W_dual->middleRows<4>(idx_start).rowwise().sum();
+    Eigen::Vector4d W_dual_row_sum_last_column;
+    // sum 4 rows
+    getBlockRowSum(*W_dual, idx_start, theta_prepended, &W_dual_row_sum_last_column);
+    W_ii = W.block<4, 4>(idx_start, idx_start);
+    // modify W_ii's last column/row to satisfy complementary slackness
+    W_ii.block<4, 1>(0, 3) = -theta_prepended(i) * W_dual_row_sum_last_column;
+    W_ii.block<1, 4>(3, 0) = -theta_prepended(i) * W_dual_row_sum_last_column.transpose();
+    (*W_dual).block<4, 4>(idx_start, idx_start) = W_ii;
+    W_diag_sum_33 += W_ii.topLeftCorner<3, 3>();
+  }
+  W_diag_mean.topLeftCorner<3, 3>() = W_diag_sum_33 / (N + 1);
+
+  // update diagonal blocks
+  Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> temp_A((N + 1) * W_diag_mean.rows(),
+                                                               (N + 1) * W_diag_mean.cols());
+  temp_A.setZero();
+  for (int i = 0; i < N + 1; i++) {
+    temp_A.block(i * W_diag_mean.rows(), i * W_diag_mean.cols(), W_diag_mean.rows(),
+                 W_diag_mean.cols()) = W_diag_mean;
+  }
+  *W_dual -= temp_A;
+}
+
+void teaser::DRSCertifier::getLambdaGuess(const Eigen::Matrix<double, 3, 3>& R,
+                                          const Eigen::Matrix<double, 1, Eigen::Dynamic>& theta,
+                                          const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                                          const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                                          SparseMatrix* lambda_guess) {
+  int K = theta.cols();
+  int Npm = 4 * K + 4;
+
+  double noise_bound_scaled = params_.cbar2 * std::pow(params_.noise_bound, 2);
+
+  // prepare the lambda sparse matrix output
+  lambda_guess->resize(Npm, Npm);
+  lambda_guess->reserve(Npm * (Npm - 1) * 2);
+  lambda_guess->setZero();
+
+  // 4-by-4 Eigen matrix to store the top left 4-by-4 block
+  Eigen::Matrix<double, 4, 4> topleft_block = Eigen::Matrix4d::Zero();
+
+  // 4-by-4 Eigen matrix to store the current 4-by-4 block
+  Eigen::Matrix<double, 4, 4> current_block = Eigen::Matrix4d::Zero();
+
+  for (size_t i = 0; i < K; ++i) {
+    // hat maps for later usage
+    Eigen::Matrix<double, 3, 3> src_i_hatmap = teaser::hatmap(src.col(i));
+    if (theta(0, i) > 0) {
+      // residual
+      Eigen::Matrix<double, 3, 1> xi = R.transpose() * (dst.col(i) - R * src.col(i));
+      Eigen::Matrix<double, 3, 3> xi_hatmap = teaser::hatmap(xi);
+
+      // compute the (4,4) entry of the current block, obtained from KKT complementary slackness
+      current_block(3, 3) = -0.75 * xi.squaredNorm() - 0.25 * noise_bound_scaled;
+
+      // compute the top-left 3-by-3 block
+      current_block.topLeftCorner<3, 3>() =
+          src_i_hatmap * src_i_hatmap - 0.5 * (src.col(i)).dot(xi) * Eigen::Matrix3d::Identity() +
+          0.5 * xi_hatmap * src_i_hatmap + 0.5 * xi * src.col(i).transpose() -
+          0.75 * xi.squaredNorm() * Eigen::Matrix3d::Identity() -
+          0.25 * noise_bound_scaled * Eigen::Matrix3d::Identity();
+
+      // compute the vector part
+      current_block.topRightCorner<3, 1>() = -1.5 * xi_hatmap * src.col(i);
+      current_block.bottomLeftCorner<1, 3>() = (current_block.topRightCorner<3, 1>()).transpose();
+    } else {
+      // residual
+      Eigen::Matrix<double, 3, 1> phi = R.transpose() * (dst.col(i) - R * src.col(i));
+      Eigen::Matrix<double, 3, 3> phi_hatmap = teaser::hatmap(phi);
+
+      // compute lambda_i, (4,4) entry
+      current_block(3, 3) = -0.25 * phi.squaredNorm() - 0.75 * noise_bound_scaled;
+
+      // compute E_ii, top-left 3-by-3 block
+      current_block.topLeftCorner<3, 3>() =
+          src_i_hatmap * src_i_hatmap - 0.5 * (src.col(i)).dot(phi) * Eigen::Matrix3d::Identity() +
+          0.5 * phi_hatmap * src_i_hatmap + 0.5 * phi * src.col(i).transpose() -
+          0.25 * phi.squaredNorm() * Eigen::Matrix3d::Identity() -
+          0.25 * noise_bound_scaled * Eigen::Matrix3d::Identity();
+
+      // compute x_i
+      current_block.topRightCorner<3, 1>() = -0.5 * phi_hatmap * src.col(i);
+      current_block.bottomLeftCorner<1, 3>() = (current_block.topRightCorner<3, 1>()).transpose();
+    }
+
+    // put the current block to the sparse triplets
+    // start idx: i * 4
+    // end idx: i * 4 + 3
+    // assume current block is column major
+    for (size_t col = 0; col < 4; ++col) {
+      for (size_t row = 0; row < 4; ++row) {
+        lambda_guess->insert((i + 1) * 4 + row, (i + 1) * 4 + col) = -current_block(row, col);
+      }
+    }
+
+    // update the first block
+    topleft_block += current_block;
+  }
+
+  // put the first block to the sparse matrix
+  for (size_t col = 0; col < 4; ++col) {
+    for (size_t row = 0; row < 4; ++row) {
+      lambda_guess->coeffRef(row, col) += topleft_block(row, col);
+    }
+  }
+}
+
+void teaser::DRSCertifier::getLinearProjection(
+    const Eigen::Matrix<double, 1, Eigen::Dynamic>& theta_prepended, SparseMatrix* A_inv) {
+  // number of off-diagonal entries in the inverse map
+  int N0 = theta_prepended.cols() - 1;
+
+  double y = 1.0 / (2 * static_cast<double>(N0) + 6);
+  // number of diagonal entries in the inverse map
+  double x = (static_cast<double>(N0) + 1.0) * y;
+
+  int N = N0 + 1;
+
+  // build the mapping from independent var idx to matrix index
+  int nr_vals = N * (N - 1) / 2;
+  Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> mat2vec = Eigen::MatrixXd::Zero(N, N);
+  int count = 0;
+  for (size_t i = 0; i < N - 1; ++i) {
+    for (size_t j = i + 1; j < N; ++j) {
+      mat2vec(i, j) = count;
+      count += 1;
+    }
+  }
+
+  // creating the inverse map sparse matrix and reserve memory
+  int nrNZ_per_row_off_diag = 2 * (N0 - 1) + 1;
+  // int nrNZ_off_diag = nrNZ_per_row_off_diag * nr_vals;
+
+  // resize the inverse matrix to the appropriate size
+  // this won't reserve any memory for non-zero values
+  A_inv->resize(nr_vals, nr_vals);
+
+  // temporary vector storing column for holding the non zero entries
+  std::vector<Eigen::Triplet<double>> temp_column;
+  temp_column.reserve(nrNZ_per_row_off_diag);
+
+  // for creating columns in inv_A
+  for (size_t i = 0; i < N - 1; ++i) {
+    TEASER_INFO_MSG_THROTTLE("Linear proj at i=" << i << "\n", i, 10);
+    for (size_t j = i + 1; j < N; ++j) {
+      // get current column index
+      // var_j_idx is unique for all each loop, i.e., each var_j_idx only occurs once and the loops
+      // won't enter the same column twice
+      int var_j_idx = mat2vec(i, j);
+
+      // start a inner vector
+      // A_inv is column major so this will enable us to insert values to the end of column
+      // var_j_idx
+      A_inv->startVec(var_j_idx);
+
+      // flag to indicated whether a diagonal entry on this column has been inserted
+      // this is used to save computation time for adding x to all the diagonal entries
+      bool diag_inserted = false;
+      size_t diag_idx = 0;
+
+      for (size_t p = 0; p < N; ++p) {
+        if ((p != j) && (p != i)) {
+          int var_i_idx;
+          double entry_val;
+          if (p < i) {
+            // same row i, i,j upper triangular, i,p lower triangular
+            // flip to upper-triangular
+            var_i_idx = mat2vec(p, i);
+            entry_val = y * theta_prepended(j) * theta_prepended(p);
+          } else {
+            var_i_idx = mat2vec(i, p);
+            entry_val = -y * theta_prepended(j) * theta_prepended(p);
+          }
+          temp_column.emplace_back(var_i_idx, var_j_idx, entry_val);
+          if (var_i_idx == var_j_idx) {
+            diag_inserted = true;
+            diag_idx = temp_column.size()-1;
+          }
+        }
+      }
+      for (size_t p = 0; p < N; ++p) {
+        if ((p != i) && (p != j)) {
+          int var_i_idx;
+          double entry_val;
+          if (p < j) {
+            // flip to upper-triangular
+            var_i_idx = mat2vec(p, j);
+            entry_val = -y * theta_prepended(i) * theta_prepended(p);
+          } else {
+            var_i_idx = mat2vec(j, p);
+            entry_val = y * theta_prepended(i) * theta_prepended(p);
+          }
+          temp_column.emplace_back(var_i_idx, var_j_idx, entry_val);
+          if (var_i_idx == var_j_idx) {
+            diag_inserted = true;
+            diag_idx = temp_column.size()-1;
+          }
+        }
+      }
+
+      // insert diagonal entries if not already done so
+      if (!diag_inserted) {
+        temp_column.emplace_back(var_j_idx, var_j_idx, x);
+      } else {
+        double entry_val = temp_column[diag_idx].value() + x;
+        temp_column[diag_idx] = {var_j_idx, var_j_idx, entry_val};
+      }
+
+      // sort by row index (ascending)
+      std::sort(temp_column.begin(), temp_column.end(),
+                [](const Eigen::Triplet<double>& t1, const Eigen::Triplet<double>& t2) {
+                  return t1.row() < t2.row();
+                });
+
+      // populate A_inv with the temporary column
+      for (size_t tidx = 0; tidx < temp_column.size(); ++tidx) {
+        // take care of the diagonal entries
+        A_inv->insertBack(temp_column[tidx].row(), var_j_idx) = temp_column[tidx].value();
+      }
+      temp_column.clear();
+      temp_column.reserve(nrNZ_per_row_off_diag);
+    }
+  }
+  TEASER_DEBUG_INFO_MSG("Finalizing A_inv ...");
+  A_inv->finalize();
+  TEASER_DEBUG_INFO_MSG("A_inv finalized.");
+}
+
+void teaser::DRSCertifier::getBlockRowSum(const Eigen::MatrixXd& A, const int& row,
+                                          const Eigen::Matrix<double, 1, Eigen::Dynamic>& theta,
+                                          Eigen::Vector4d* output) {
+  // unit = sparse(4,1); unit(end) = 1;
+  // vector = kron(theta,unit); % vector of size 4N+4 by 1
+  // entireRow = A(blkIndices(row,4),:); % entireRow of size 4 by 4N+4
+  // row_sum_last_column = entireRow * vector; % last column sum has size 4 by 1;
+  Eigen::Matrix<double, 4, 1> unit = Eigen::Matrix<double, 4, 1>::Zero();
+  unit(3, 0) = 1;
+  Eigen::Matrix<double, Eigen::Dynamic, 1> vector =
+      vectorKron<double, Eigen::Dynamic, 4>(theta.transpose(), unit);
+  *output = A.middleRows<4>(row) * vector;
+}

+ 61 - 0
src/detection/detection_localization_global_hdl/teaser/fpfh.cc

@@ -0,0 +1,61 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#include "teaser/fpfh.h"
+
+#include <pcl/features/normal_3d.h>
+
+#include "teaser/utils.h"
+
+teaser::FPFHCloudPtr teaser::FPFHEstimation::computeFPFHFeatures(
+    const teaser::PointCloud& input_cloud, double normal_search_radius, double fpfh_search_radius) {
+
+  // Intermediate variables
+  pcl::PointCloud<pcl::Normal>::Ptr normals(new pcl::PointCloud<pcl::Normal>);
+  teaser::FPFHCloudPtr descriptors(new pcl::PointCloud<pcl::FPFHSignature33>());
+  pcl::PointCloud<pcl::PointXYZ>::Ptr pcl_input_cloud(new pcl::PointCloud<pcl::PointXYZ>);
+  for (auto& i : input_cloud) {
+    pcl::PointXYZ p(i.x, i.y, i.z);
+    pcl_input_cloud->push_back(p);
+  }
+
+  // Estimate normals
+  pcl::NormalEstimation<pcl::PointXYZ, pcl::Normal> normalEstimation;
+  normalEstimation.setInputCloud(pcl_input_cloud);
+  normalEstimation.setRadiusSearch(normal_search_radius);
+  pcl::search::KdTree<pcl::PointXYZ>::Ptr kdtree(new pcl::search::KdTree<pcl::PointXYZ>);
+  normalEstimation.setSearchMethod(kdtree);
+  normalEstimation.compute(*normals);
+
+  // Estimate FPFH
+  setInputCloud(pcl_input_cloud);
+  setInputNormals(normals);
+  setSearchMethod(kdtree);
+  setRadiusSearch(fpfh_search_radius);
+  compute(*descriptors);
+
+  return descriptors;
+}
+
+void teaser::FPFHEstimation::setInputCloud(pcl::PointCloud<pcl::PointXYZ>::Ptr input_cloud) {
+  fpfh_estimation_->setInputCloud(input_cloud);
+}
+
+void teaser::FPFHEstimation::setInputNormals(pcl::PointCloud<pcl::Normal>::Ptr input_normals) {
+  fpfh_estimation_->setInputNormals(input_normals);
+}
+
+void teaser::FPFHEstimation::setSearchMethod(
+    pcl::search::KdTree<pcl::PointXYZ>::Ptr search_method) {
+  fpfh_estimation_->setSearchMethod(search_method);
+}
+
+void teaser::FPFHEstimation::compute(pcl::PointCloud<pcl::FPFHSignature33>& output_cloud) {
+  fpfh_estimation_->compute(output_cloud);
+}
+void teaser::FPFHEstimation::setRadiusSearch(double r) { fpfh_estimation_->setRadiusSearch(r); }

+ 125 - 0
src/detection/detection_localization_global_hdl/teaser/graph.cc

@@ -0,0 +1,125 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#include "teaser/graph.h"
+#include "pmc/pmc.h"
+
+vector<int> teaser::MaxCliqueSolver::findMaxClique(teaser::Graph graph) {
+
+  // Handle deprecated field
+  if (!params_.solve_exactly) {
+    params_.solver_mode = CLIQUE_SOLVER_MODE::PMC_HEU;
+  }
+
+  // Create a PMC graph from the TEASER graph
+  vector<int> edges;
+  vector<long long> vertices;
+  vertices.push_back(edges.size());
+
+  const auto all_vertices = graph.getVertices();
+  for (const auto& i : all_vertices) {
+    const auto& c_edges = graph.getEdges(i);
+    edges.insert(edges.end(), c_edges.begin(), c_edges.end());
+    vertices.push_back(edges.size());
+  }
+
+  // Use PMC to calculate
+  pmc::pmc_graph G(vertices, edges);
+
+  // Prepare PMC input
+  // TODO: Incorporate this to the constructor
+  pmc::input in;
+  in.algorithm = 0;
+  in.threads = 12;
+  in.experiment = 0;
+  in.lb = 0;
+  in.ub = 0;
+  in.param_ub = 0;
+  in.adj_limit = 20000;
+  in.time_limit = params_.time_limit;
+  in.remove_time = 4;
+  in.graph_stats = false;
+  in.verbose = false;
+  in.help = false;
+  in.MCE = false;
+  in.decreasing_order = false;
+  in.heu_strat = "kcore";
+  in.vertex_search_order = "deg";
+
+  // vector to represent max clique
+  vector<int> C;
+
+  // upper-bound of max clique
+  G.compute_cores();
+  auto max_core = G.get_max_core();
+
+  TEASER_DEBUG_INFO_MSG("Max core number: " << max_core);
+  TEASER_DEBUG_INFO_MSG("Num vertices: " << vertices.size());
+
+  // check for k-core heuristic threshold
+  // check whether threshold equals 1 to short circuit the comparison
+  if (params_.solver_mode == CLIQUE_SOLVER_MODE::KCORE_HEU &&
+      params_.kcore_heuristic_threshold != 1 &&
+      max_core > static_cast<int>(params_.kcore_heuristic_threshold *
+                                  static_cast<double>(all_vertices.size()))) {
+    TEASER_DEBUG_INFO_MSG("Using K-core heuristic finder.");
+    // remove all nodes with core number less than max core number
+    // k_cores is a vector saving the core number of each vertex
+    auto k_cores = G.get_kcores();
+    for (int i = 1; i < k_cores->size(); ++i) {
+      // Note: k_core has size equals to num vertices + 1
+      if ((*k_cores)[i] >= max_core) {
+        C.push_back(i-1);
+      }
+    }
+    return C;
+  }
+
+  if (in.ub == 0) {
+    in.ub = max_core + 1;
+  }
+
+  // lower-bound of max clique
+  if (in.lb == 0 && in.heu_strat != "0") { // skip if given as input
+    pmc::pmc_heu maxclique(G, in);
+    in.lb = maxclique.search(G, C);
+  }
+
+  assert(in.lb != 0);
+  if (in.lb == 0) {
+    // This means that max clique has a size of one
+    TEASER_DEBUG_ERROR_MSG("Max clique lower bound equals to zero. Abort.");
+    return C;
+  }
+
+  if (in.lb == in.ub) {
+    return C;
+  }
+
+  // Optional exact max clique finding
+  if (params_.solver_mode == CLIQUE_SOLVER_MODE::PMC_EXACT) {
+    // The following methods are used:
+    // 1. k-core pruning
+    // 2. neigh-core pruning/ordering
+    // 3. dynamic coloring bounds/sort
+    // see the original PMC paper and implementation for details:
+    // R. A. Rossi, D. F. Gleich, and A. H. Gebremedhin, “Parallel Maximum Clique Algorithms with
+    // Applications to Network Analysis,” SIAM J. Sci. Comput., vol. 37, no. 5, pp. C589–C616, Jan.
+    // 2015.
+    if (G.num_vertices() < in.adj_limit) {
+      G.create_adj();
+      pmc::pmcx_maxclique finder(G, in);
+      finder.search_dense(G, C);
+    } else {
+      pmc::pmcx_maxclique finder(G, in);
+      finder.search(G, C);
+    }
+  }
+
+  return C;
+}

+ 336 - 0
src/detection/detection_localization_global_hdl/teaser/matcher.cc

@@ -0,0 +1,336 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#include "teaser/matcher.h"
+
+#include <Eigen/Core>
+#include <Eigen/Geometry>
+#include <flann/flann.hpp>
+
+#include "teaser/geometry.h"
+
+namespace teaser {
+
+std::vector<std::pair<int, int>> Matcher::calculateCorrespondences(
+    teaser::PointCloud& source_points, teaser::PointCloud& target_points,
+    teaser::FPFHCloud& source_features, teaser::FPFHCloud& target_features, bool use_absolute_scale,
+    bool use_crosscheck, bool use_tuple_test, float tuple_scale) {
+
+  Feature cloud_features;
+  pointcloud_.push_back(source_points);
+  pointcloud_.push_back(target_points);
+
+  // It compute the global_scale_ required to set correctly the search radius
+  normalizePoints(use_absolute_scale);
+
+  for (auto& f : source_features) {
+    Eigen::VectorXf fpfh(33);
+    for (int i = 0; i < 33; i++)
+      fpfh(i) = f.histogram[i];
+    cloud_features.push_back(fpfh);
+  }
+  features_.push_back(cloud_features);
+
+  cloud_features.clear();
+  for (auto& f : target_features) {
+    Eigen::VectorXf fpfh(33);
+    for (int i = 0; i < 33; i++)
+      fpfh(i) = f.histogram[i];
+    cloud_features.push_back(fpfh);
+  }
+  features_.push_back(cloud_features);
+
+  advancedMatching(use_crosscheck, use_tuple_test, tuple_scale);
+
+  return corres_;
+}
+
+void Matcher::normalizePoints(bool use_absolute_scale) {
+  int num = 2;
+  float scale = 0;
+
+  means_.clear();
+
+  for (int i = 0; i < num; ++i) {
+    float max_scale = 0;
+
+    // compute mean
+    Eigen::Vector3f mean;
+    mean.setZero();
+
+    int npti = pointcloud_[i].size();
+    for (int ii = 0; ii < npti; ++ii) {
+      Eigen::Vector3f p(pointcloud_[i][ii].x, pointcloud_[i][ii].y, pointcloud_[i][ii].z);
+      mean = mean + p;
+    }
+    mean = mean / npti;
+    means_.push_back(mean);
+
+    for (int ii = 0; ii < npti; ++ii) {
+      pointcloud_[i][ii].x -= mean(0);
+      pointcloud_[i][ii].y -= mean(1);
+      pointcloud_[i][ii].z -= mean(2);
+    }
+
+    // compute scale
+    for (int ii = 0; ii < npti; ++ii) {
+      Eigen::Vector3f p(pointcloud_[i][ii].x, pointcloud_[i][ii].y, pointcloud_[i][ii].z);
+      float temp = p.norm(); // because we extract mean in the previous stage.
+      if (temp > max_scale) {
+        max_scale = temp;
+      }
+    }
+
+    if (max_scale > scale) {
+      scale = max_scale;
+    }
+  }
+
+  // mean of the scale variation
+  if (use_absolute_scale) {
+    global_scale_ = 1.0f;
+  } else {
+    global_scale_ = scale; // second choice: we keep the maximum scale.
+  }
+
+  if (global_scale_ != 1.0f) {
+    for (int i = 0; i < num; ++i) {
+      int npti = pointcloud_[i].size();
+      for (int ii = 0; ii < npti; ++ii) {
+        pointcloud_[i][ii].x /= global_scale_;
+        pointcloud_[i][ii].y /= global_scale_;
+        pointcloud_[i][ii].z /= global_scale_;
+      }
+    }
+  }
+}
+void Matcher::advancedMatching(bool use_crosscheck, bool use_tuple_test, float tuple_scale) {
+
+  int fi = 0; // source idx
+  int fj = 1; // destination idx
+
+  bool swapped = false;
+
+  if (pointcloud_[fj].size() > pointcloud_[fi].size()) {
+    int temp = fi;
+    fi = fj;
+    fj = temp;
+    swapped = true;
+  }
+
+  int nPti = pointcloud_[fi].size();
+  int nPtj = pointcloud_[fj].size();
+
+  ///////////////////////////
+  /// Build FLANNTREE
+  ///////////////////////////
+  KDTree feature_tree_i(flann::KDTreeSingleIndexParams(15));
+  buildKDTree(features_[fi], &feature_tree_i);
+
+  KDTree feature_tree_j(flann::KDTreeSingleIndexParams(15));
+  buildKDTree(features_[fj], &feature_tree_j);
+
+  std::vector<int> corres_K, corres_K2;
+  std::vector<float> dis;
+  std::vector<int> ind;
+
+  std::vector<std::pair<int, int>> corres;
+  std::vector<std::pair<int, int>> corres_cross;
+  std::vector<std::pair<int, int>> corres_ij;
+  std::vector<std::pair<int, int>> corres_ji;
+
+  ///////////////////////////
+  /// INITIAL MATCHING
+  ///////////////////////////
+  std::vector<int> i_to_j(nPti, -1);
+  for (int j = 0; j < nPtj; j++) {
+    searchKDTree(&feature_tree_i, features_[fj][j], corres_K, dis, 1);
+    int i = corres_K[0];
+    if (i_to_j[i] == -1) {
+      searchKDTree(&feature_tree_j, features_[fi][i], corres_K, dis, 1);
+      int ij = corres_K[0];
+      i_to_j[i] = ij;
+    }
+    corres_ji.push_back(std::pair<int, int>(i, j));
+  }
+
+  for (int i = 0; i < nPti; i++) {
+    if (i_to_j[i] != -1)
+      corres_ij.push_back(std::pair<int, int>(i, i_to_j[i]));
+  }
+
+  int ncorres_ij = corres_ij.size();
+  int ncorres_ji = corres_ji.size();
+
+  // corres = corres_ij + corres_ji;
+  for (int i = 0; i < ncorres_ij; ++i)
+    corres.push_back(std::pair<int, int>(corres_ij[i].first, corres_ij[i].second));
+  for (int j = 0; j < ncorres_ji; ++j)
+    corres.push_back(std::pair<int, int>(corres_ji[j].first, corres_ji[j].second));
+
+  ///////////////////////////
+  /// CROSS CHECK
+  /// input : corres_ij, corres_ji
+  /// output : corres
+  ///////////////////////////
+  if (use_crosscheck) {
+    std::cout << "CROSS CHECK" << std::endl;
+    // build data structure for cross check
+    corres.clear();
+    corres_cross.clear();
+    std::vector<std::vector<int>> Mi(nPti);
+    std::vector<std::vector<int>> Mj(nPtj);
+
+    int ci, cj;
+    for (int i = 0; i < ncorres_ij; ++i) {
+      ci = corres_ij[i].first;
+      cj = corres_ij[i].second;
+      Mi[ci].push_back(cj);
+    }
+    for (int j = 0; j < ncorres_ji; ++j) {
+      ci = corres_ji[j].first;
+      cj = corres_ji[j].second;
+      Mj[cj].push_back(ci);
+    }
+
+    // cross check
+    for (int i = 0; i < nPti; ++i) {
+      for (int ii = 0; ii < Mi[i].size(); ++ii) {
+        int j = Mi[i][ii];
+        for (int jj = 0; jj < Mj[j].size(); ++jj) {
+          if (Mj[j][jj] == i) {
+            corres.push_back(std::pair<int, int>(i, j));
+            corres_cross.push_back(std::pair<int, int>(i, j));
+          }
+        }
+      }
+    }
+  } else {
+    std::cout << "Skipping Cross Check." << std::endl;
+  }
+
+  ///////////////////////////
+  /// TUPLE CONSTRAINT
+  /// input : corres
+  /// output : corres
+  ///////////////////////////
+  if (use_tuple_test && tuple_scale != 0) {
+    std::cout << "TUPLE CONSTRAINT" << std::endl;
+    srand(time(NULL));
+    int rand0, rand1, rand2;
+    int idi0, idi1, idi2;
+    int idj0, idj1, idj2;
+    float scale = tuple_scale;
+    int ncorr = corres.size();
+    int number_of_trial = ncorr * 100;
+    std::vector<std::pair<int, int>> corres_tuple;
+
+    for (int i = 0; i < number_of_trial; i++) {
+      rand0 = rand() % ncorr;
+      rand1 = rand() % ncorr;
+      rand2 = rand() % ncorr;
+
+      idi0 = corres[rand0].first;
+      idj0 = corres[rand0].second;
+      idi1 = corres[rand1].first;
+      idj1 = corres[rand1].second;
+      idi2 = corres[rand2].first;
+      idj2 = corres[rand2].second;
+
+      // collect 3 points from i-th fragment
+      Eigen::Vector3f pti0 = {pointcloud_[fi][idi0].x, pointcloud_[fi][idi0].y,
+                              pointcloud_[fi][idi0].z};
+      Eigen::Vector3f pti1 = {pointcloud_[fi][idi1].x, pointcloud_[fi][idi1].y,
+                              pointcloud_[fi][idi1].z};
+      Eigen::Vector3f pti2 = {pointcloud_[fi][idi2].x, pointcloud_[fi][idi2].y,
+                              pointcloud_[fi][idi2].z};
+
+      float li0 = (pti0 - pti1).norm();
+      float li1 = (pti1 - pti2).norm();
+      float li2 = (pti2 - pti0).norm();
+
+      // collect 3 points from j-th fragment
+      Eigen::Vector3f ptj0 = {pointcloud_[fj][idj0].x, pointcloud_[fj][idj0].y,
+                              pointcloud_[fj][idj0].z};
+      Eigen::Vector3f ptj1 = {pointcloud_[fj][idj1].x, pointcloud_[fj][idj1].y,
+                              pointcloud_[fj][idj1].z};
+      Eigen::Vector3f ptj2 = {pointcloud_[fj][idj2].x, pointcloud_[fj][idj2].y,
+                              pointcloud_[fj][idj2].z};
+
+      float lj0 = (ptj0 - ptj1).norm();
+      float lj1 = (ptj1 - ptj2).norm();
+      float lj2 = (ptj2 - ptj0).norm();
+
+      if ((li0 * scale < lj0) && (lj0 < li0 / scale) && (li1 * scale < lj1) &&
+          (lj1 < li1 / scale) && (li2 * scale < lj2) && (lj2 < li2 / scale)) {
+        corres_tuple.push_back(std::pair<int, int>(idi0, idj0));
+        corres_tuple.push_back(std::pair<int, int>(idi1, idj1));
+        corres_tuple.push_back(std::pair<int, int>(idi2, idj2));
+      }
+    }
+    corres.clear();
+
+    for (size_t i = 0; i < corres_tuple.size(); ++i)
+      corres.push_back(std::pair<int, int>(corres_tuple[i].first, corres_tuple[i].second));
+  } else {
+    std::cout << "Skipping Tuple Constraint." << std::endl;
+  }
+
+  if (swapped) {
+    std::vector<std::pair<int, int>> temp;
+    for (size_t i = 0; i < corres.size(); i++)
+      temp.push_back(std::pair<int, int>(corres[i].second, corres[i].first));
+    corres.clear();
+    corres = temp;
+  }
+  corres_ = corres;
+
+  ///////////////////////////
+  /// ERASE DUPLICATES
+  /// input : corres_
+  /// output : corres_
+  ///////////////////////////
+  std::sort(corres_.begin(), corres_.end());
+  corres_.erase(std::unique(corres_.begin(), corres_.end()), corres_.end());
+}
+
+template <typename T> void Matcher::buildKDTree(const std::vector<T>& data, Matcher::KDTree* tree) {
+  int rows, dim;
+  rows = (int)data.size();
+  dim = (int)data[0].size();
+  std::vector<float> dataset(rows * dim);
+  flann::Matrix<float> dataset_mat(&dataset[0], rows, dim);
+  for (int i = 0; i < rows; i++)
+    for (int j = 0; j < dim; j++)
+      dataset[i * dim + j] = data[i][j];
+  KDTree temp_tree(dataset_mat, flann::KDTreeSingleIndexParams(15));
+  temp_tree.buildIndex();
+  *tree = temp_tree;
+}
+
+template <typename T>
+void Matcher::searchKDTree(Matcher::KDTree* tree, const T& input, std::vector<int>& indices,
+                           std::vector<float>& dists, int nn) {
+  int rows_t = 1;
+  int dim = input.size();
+
+  std::vector<float> query;
+  query.resize(rows_t * dim);
+  for (int i = 0; i < dim; i++)
+    query[i] = input(i);
+  flann::Matrix<float> query_mat(&query[0], rows_t, dim);
+
+  indices.resize(rows_t * nn);
+  dists.resize(rows_t * nn);
+  flann::Matrix<int> indices_mat(&indices[0], rows_t, nn);
+  flann::Matrix<float> dists_mat(&dists[0], rows_t, nn);
+
+  tree->knnSearch(query_mat, indices_mat, dists_mat, nn, flann::SearchParams(128));
+}
+
+} // namespace teaser

+ 110 - 0
src/detection/detection_localization_global_hdl/teaser/ply_io.cc

@@ -0,0 +1,110 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <cstring>
+
+#include "teaser/ply_io.h"
+#include "tinyply.h"
+
+// Internal datatypes for storing ply vertices
+struct float3 {
+  float x, y, z;
+};
+struct double3 {
+  double x, y, z;
+};
+
+int teaser::PLYReader::read(const std::string& file_name, teaser::PointCloud& cloud) {
+  std::unique_ptr<std::istream> file_stream;
+  std::vector<uint8_t> byte_buffer;
+
+  try {
+    file_stream.reset(new std::ifstream(file_name, std::ios::binary));
+
+    if (!file_stream || file_stream->fail()) {
+      std::cerr << "Failed to open " << file_name << std::endl;
+      return -1;
+    }
+
+    tinyply::PlyFile file;
+    file.parse_header(*file_stream);
+
+    std::shared_ptr<tinyply::PlyData> vertices;
+
+    try {
+      vertices = file.request_properties_from_element("vertex", {"x", "y", "z"});
+    } catch (const std::exception& e) {
+      std::cerr << "tinyply exception: " << e.what() << std::endl;
+      return -1;
+    }
+
+    file.read(*file_stream);
+
+    if (vertices) {
+      std::cout << "\tRead " << vertices->count << " total vertices " << std::endl;
+      if (vertices->t == tinyply::Type::FLOAT32) {
+        std::vector<float3> verts_floats(vertices->count);
+        const size_t numVerticesBytes = vertices->buffer.size_bytes();
+        std::memcpy(verts_floats.data(), vertices->buffer.get(), numVerticesBytes);
+        for (auto& i : verts_floats) {
+          cloud.push_back({i.x, i.y, i.z});
+        }
+      }
+      if (vertices->t == tinyply::Type::FLOAT64) {
+        std::vector<double3> verts_doubles(vertices->count);
+        const size_t numVerticesBytes = vertices->buffer.size_bytes();
+        std::memcpy(verts_doubles.data(), vertices->buffer.get(), numVerticesBytes);
+        for (auto& i : verts_doubles) {
+          cloud.push_back(
+              {static_cast<float>(i.x), static_cast<float>(i.y), static_cast<float>(i.z)});
+        }
+      }
+    }
+
+  } catch (const std::exception& e) {
+    std::cerr << "Caught tinyply exception: " << e.what() << std::endl;
+    return -1;
+  }
+
+  return 0;
+}
+
+int teaser::PLYWriter::write(const std::string& file_name, const teaser::PointCloud& cloud,
+                             bool binary_mode) {
+  // Open file buffer according to binary mode
+  std::filebuf fb;
+  if (binary_mode) {
+    fb.open(file_name, std::ios::out | std::ios::binary);
+  } else {
+    fb.open(file_name, std::ios::out);
+  }
+
+  // Open output stream
+  std::ostream outstream(&fb);
+  if (outstream.fail()) {
+    std::cerr << "Failed to open " << file_name << std::endl;
+    return -1;
+  }
+
+  // Use tinyply to write to ply file
+  tinyply::PlyFile ply_file;
+  std::vector<float3> temp_vertices;
+  for (auto& i : cloud) {
+    temp_vertices.push_back({i.x, i.y, i.z});
+  }
+  ply_file.add_properties_to_element(
+      "vertex", {"x", "y", "z"}, tinyply::Type::FLOAT32, temp_vertices.size(),
+      reinterpret_cast<uint8_t*>(temp_vertices.data()), tinyply::Type::INVALID, 0);
+  ply_file.write(outstream, binary_mode);
+
+  return 0;
+}

+ 699 - 0
src/detection/detection_localization_global_hdl/teaser/registration.cc

@@ -0,0 +1,699 @@
+/**
+ * Copyright 2020, Massachusetts Institute of Technology,
+ * Cambridge, MA 02139
+ * All Rights Reserved
+ * Authors: Jingnan Shi, et al. (see THANKS for the full author list)
+ * See LICENSE for the license information
+ */
+
+#include "teaser/registration.h"
+
+#include <cmath>
+#include <functional>
+#include <iostream>
+#include <limits>
+#include <iterator>
+
+#include "teaser/utils.h"
+#include "teaser/graph.h"
+#include "teaser/macros.h"
+
+void teaser::ScalarTLSEstimator::estimate(const Eigen::RowVectorXd& X,
+                                          const Eigen::RowVectorXd& ranges, double* estimate,
+                                          Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) {
+  // check input parameters
+  bool dimension_inconsistent = (X.rows() != ranges.rows()) || (X.cols() != ranges.cols());
+  if (inliers) {
+    dimension_inconsistent |= ((inliers->rows() != 1) || (inliers->cols() != ranges.cols()));
+  }
+  bool only_one_element = (X.rows() == 1) && (X.cols() == 1);
+  assert(!dimension_inconsistent);
+  assert(!only_one_element); // TODO: admit a trivial solution
+
+  int N = X.cols();
+  std::vector<std::pair<double, int>> h;
+  for (size_t i= 0 ;i < N ;++i){
+    h.push_back(std::make_pair(X(i) - ranges(i), i+1));
+    h.push_back(std::make_pair(X(i) + ranges(i), -i-1));
+  }
+
+  // ascending order
+  std::sort(h.begin(), h.end(), [](std::pair<double, int> a, std::pair<double, int> b) { return a.first < b.first; });
+
+  // calculate weights
+  Eigen::RowVectorXd weights = ranges.array().square();
+  weights = weights.array().inverse();
+  int nr_centers = 2 * N;
+  Eigen::RowVectorXd x_hat = Eigen::MatrixXd::Zero(1, nr_centers);
+  Eigen::RowVectorXd x_cost = Eigen::MatrixXd::Zero(1, nr_centers);
+
+  double ranges_inverse_sum = ranges.sum();
+  double dot_X_weights = 0;
+  double dot_weights_consensus = 0;
+  int consensus_set_cardinal = 0;
+  double sum_xi = 0; 
+  double sum_xi_square = 0;
+
+  for (size_t i = 0 ; i < nr_centers ; ++i){
+
+    int idx = int(std::abs(h.at(i).second)) - 1; // Indices starting at 1
+    int epsilon = (h.at(i).second > 0) ? 1 : -1;
+
+    consensus_set_cardinal += epsilon;
+    dot_weights_consensus += epsilon * weights(idx);
+    dot_X_weights += epsilon * weights(idx) * X(idx);
+    ranges_inverse_sum -= epsilon * ranges(idx);
+    sum_xi += epsilon * X(idx);
+    sum_xi_square += epsilon * X(idx) * X(idx);
+
+    x_hat(i) = dot_X_weights / dot_weights_consensus;
+
+    double residual = consensus_set_cardinal * x_hat(i) * x_hat(i) + sum_xi_square  - 2 * sum_xi * x_hat(i);
+    x_cost(i) = residual + ranges_inverse_sum;
+      
+  }
+
+  size_t min_idx;
+  x_cost.minCoeff(&min_idx);
+  double estimate_temp = x_hat(min_idx);
+  if (estimate) {
+    // update estimate output if it's not nullptr
+    *estimate = estimate_temp;
+  }
+  if (inliers) {
+    // update inlier output if it's not nullptr
+    *inliers = (X.array() - estimate_temp).array().abs() <= ranges.array();
+  }
+}
+
+void teaser::ScalarTLSEstimator::estimate_tiled(const Eigen::RowVectorXd& X,
+                                                const Eigen::RowVectorXd& ranges, const int& s,
+                                                double* estimate,
+                                                Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) {
+  // check input parameters
+  bool dimension_inconsistent = (X.rows() != ranges.rows()) || (X.cols() != ranges.cols());
+  if (inliers) {
+    dimension_inconsistent |= ((inliers->rows() != 1) || (inliers->cols() != ranges.cols()));
+  }
+  bool only_one_element = (X.rows() == 1) && (X.cols() == 1);
+  assert(!dimension_inconsistent);
+  assert(!only_one_element); // TODO: admit a trivial solution
+
+  // Prepare variables for calculations
+  int N = X.cols();
+  Eigen::RowVectorXd h(N * 2);
+  h << X - ranges, X + ranges;
+  // ascending order
+  std::sort(h.data(), h.data() + h.cols(), [](double a, double b) { return a < b; });
+  // calculate interval centers
+  Eigen::RowVectorXd h_centers = (h.head(h.cols() - 1) + h.tail(h.cols() - 1)) / 2;
+  auto nr_centers = h_centers.cols();
+
+  // calculate weights
+  Eigen::RowVectorXd weights = ranges.array().square();
+  weights = weights.array().inverse();
+
+  Eigen::RowVectorXd x_hat = Eigen::MatrixXd::Zero(1, nr_centers);
+  Eigen::RowVectorXd x_cost = Eigen::MatrixXd::Zero(1, nr_centers);
+
+  // loop tiling
+  size_t ih_bound = ((nr_centers) & ~((s)-1));
+  size_t jh_bound = ((N) & ~((s)-1));
+
+  std::vector<double> ranges_inverse_sum_vec(nr_centers, 0);
+  std::vector<double> dot_X_weights_vec(nr_centers, 0);
+  std::vector<double> dot_weights_consensus_vec(nr_centers, 0);
+  std::vector<std::vector<double>> X_consensus_table(nr_centers, std::vector<double>());
+
+  auto inner_loop_f = [&](const size_t& i, const size_t& jh, const size_t& jl_lower_bound,
+                          const size_t& jl_upper_bound) {
+    double& ranges_inverse_sum = ranges_inverse_sum_vec[i];
+    double& dot_X_weights = dot_X_weights_vec[i];
+    double& dot_weights_consensus = dot_weights_consensus_vec[i];
+    std::vector<double>& X_consensus_vec = X_consensus_table[i];
+
+    size_t j = 0;
+    for (size_t jl = jl_lower_bound; jl < jl_upper_bound; ++jl) {
+      j = jh + jl;
+      bool consensus = std::abs(X(j) - h_centers(i)) <= ranges(j);
+      if (consensus) {
+        dot_X_weights += X(j) * weights(j);
+        dot_weights_consensus += weights(j);
+        X_consensus_vec.push_back(X(j));
+      } else {
+        ranges_inverse_sum += ranges(j);
+      }
+    }
+
+    if (j == N - 1) {
+      // x_hat(i) = dot(X(consensus), weights(consensus)) / dot(weights, consensus);
+      x_hat(i) = dot_X_weights / dot_weights_consensus;
+
+      // residual = X(consensus)-x_hat(i);
+      Eigen::Map<Eigen::VectorXd> X_consensus(X_consensus_vec.data(), X_consensus_vec.size());
+      Eigen::VectorXd residual = X_consensus.array() - x_hat(i);
+
+      // x_cost(i) = dot(residual,residual) + sum(ranges(~consensus));
+      x_cost(i) = residual.squaredNorm() + ranges_inverse_sum;
+    }
+  };
+
+#pragma omp parallel for default(none) shared(                                                     \
+    jh_bound, ih_bound, ranges_inverse_sum_vec, dot_X_weights_vec, dot_weights_consensus_vec,      \
+    X_consensus_table, h_centers, weights, N, X, x_hat, x_cost, s, ranges, inner_loop_f)
+  for (size_t ih = 0; ih < ih_bound; ih += s) {
+    for (size_t jh = 0; jh < jh_bound; jh += s) {
+      for (size_t il = 0; il < s; ++il) {
+        size_t i = ih + il;
+        inner_loop_f(i, jh, 0, s);
+      }
+    }
+  }
+
+  // finish the left over entries
+  // 1. Finish the unfinished js
+#pragma omp parallel for default(none)                                                             \
+    shared(jh_bound, ih_bound, ranges_inverse_sum_vec, dot_X_weights_vec,                          \
+           dot_weights_consensus_vec, X_consensus_table, h_centers, weights, N, X, x_hat, x_cost,  \
+           s, ranges, nr_centers, inner_loop_f)
+  for (size_t i = 0; i < nr_centers; ++i) {
+    inner_loop_f(i, 0, jh_bound, N);
+  }
+
+  // 2. Finish the unfinished is
+#pragma omp parallel for default(none)                                                             \
+    shared(jh_bound, ih_bound, ranges_inverse_sum_vec, dot_X_weights_vec,                          \
+           dot_weights_consensus_vec, X_consensus_table, h_centers, weights, N, X, x_hat, x_cost,  \
+           s, ranges, nr_centers, inner_loop_f)
+  for (size_t i = ih_bound; i < nr_centers; ++i) {
+    inner_loop_f(i, 0, 0, N);
+  }
+
+  size_t min_idx;
+  x_cost.minCoeff(&min_idx);
+  double estimate_temp = x_hat(min_idx);
+  if (estimate) {
+    // update estimate output if it's not nullptr
+    *estimate = estimate_temp;
+  }
+  if (inliers) {
+    // update inlier output if it's not nullptr
+    *inliers = (X.array() - estimate_temp).array().abs() <= ranges.array();
+  }
+}
+
+void teaser::FastGlobalRegistrationSolver::solveForRotation(
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst, Eigen::Matrix3d* rotation,
+    Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) {
+  assert(rotation);                 // make sure R is not a nullptr
+  assert(src.cols() == dst.cols()); // check dimensions of input data
+  assert(params_.gnc_factor > 1);   // make sure mu will decrease
+  assert(params_.noise_bound != 0); // make sure noise bound is not zero
+  if (inliers) {
+    assert(inliers->cols() == src.cols());
+  }
+
+  // Prepare some intermediate variables
+  double noise_bound_sq = std::pow(params_.noise_bound, 2);
+  size_t match_size = src.cols();
+  cost_ = std::numeric_limits<double>::infinity();
+
+  // Calculate the initial mu
+  double src_diameter = teaser::utils::calculateDiameter<double, 3>(src);
+  double dest_diameter = teaser::utils::calculateDiameter<double, 3>(dst);
+  double global_scale = src_diameter > dest_diameter ? src_diameter : dest_diameter;
+  global_scale /= noise_bound_sq;
+  double mu = std::pow(global_scale, 2) / noise_bound_sq;
+
+  // stopping condition for mu
+  double min_mu = 1.0;
+  *rotation = Eigen::Matrix3d::Identity(3, 3); // rotation matrix
+  Eigen::Matrix<double, 1, Eigen::Dynamic> l_pq(1, match_size);
+  l_pq.setOnes(1, match_size);
+
+  // Assumptions of the two inputs:
+  // they should be of the same scale,
+  // outliers should be removed as much as possible
+  // input vectors should contain TIM vectors (if only estimating rotation)
+  for (size_t i = 0; i < params_.max_iterations; ++i) {
+    double scaled_mu = mu * noise_bound_sq;
+
+    // 1. Optimize for line processes weights
+    Eigen::Matrix<double, 3, 1> q, p, rpq;
+    for (size_t j = 0; j < match_size; ++j) {
+      // p = Rq
+      q = src.col(j);
+      p = dst.col(j);
+      rpq = p - (*rotation) * q;
+      l_pq(j) = std::pow(scaled_mu / (scaled_mu + rpq.squaredNorm()), 2);
+    }
+
+    // 2. Optimize for Rotation Matrix
+    *rotation = teaser::utils::svdRot(src, dst, l_pq);
+
+    // update cost
+    Eigen::Matrix<double, 3, Eigen::Dynamic> diff = (dst - (*rotation) * src).array().square();
+    cost_ = ((scaled_mu * diff.colwise().sum()).array() /
+             (scaled_mu + diff.colwise().sum().array()).array())
+                .sum();
+
+    // additional termination conditions
+    if (cost_ < params_.cost_threshold || mu < min_mu) {
+      TEASER_DEBUG_INFO_MSG("Convergence condition met.");
+      TEASER_DEBUG_INFO_MSG("Iterations: " << i);
+      TEASER_DEBUG_INFO_MSG("Mu: " << mu);
+      TEASER_DEBUG_INFO_MSG("Cost: " << cost_);
+      break;
+    }
+
+    // update mu
+    mu /= params_.gnc_factor;
+  }
+
+  if (inliers) {
+    *inliers = l_pq.cast<bool>();
+  }
+}
+
+void teaser::TLSScaleSolver::solveForScale(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                                           const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst,
+                                           double* scale,
+                                           Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) {
+
+  Eigen::Matrix<double, 1, Eigen::Dynamic> v1_dist =
+      src.array().square().colwise().sum().array().sqrt();
+  Eigen::Matrix<double, 1, Eigen::Dynamic> v2_dist =
+      dst.array().square().colwise().sum().array().sqrt();
+
+  Eigen::Matrix<double, 1, Eigen::Dynamic> raw_scales = v2_dist.array() / v1_dist.array();
+  double beta = 2 * noise_bound_ * sqrt(cbar2_);
+  Eigen::Matrix<double, 1, Eigen::Dynamic> alphas = beta * v1_dist.cwiseInverse();
+
+  tls_estimator_.estimate(raw_scales, alphas, scale, inliers);
+}
+
+void teaser::ScaleInliersSelector::solveForScale(
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst, double* scale,
+    Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) {
+  // We assume no scale difference between the two vectors of points.
+  *scale = 1;
+
+  Eigen::Matrix<double, 1, Eigen::Dynamic> v1_dist =
+      src.array().square().colwise().sum().array().sqrt();
+  Eigen::Matrix<double, 1, Eigen::Dynamic> v2_dist =
+      dst.array().square().colwise().sum().array().sqrt();
+  double beta = 2 * noise_bound_ * sqrt(cbar2_);
+
+  // A pair-wise correspondence is an inlier if it passes the following test:
+  // abs(|dst| - |src|) is within maximum allowed error
+  *inliers = (v1_dist.array() - v2_dist.array()).array().abs() <= beta;
+}
+
+void teaser::TLSTranslationSolver::solveForTranslation(
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst, Eigen::Vector3d* translation,
+    Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) {
+  assert(src.cols() == dst.cols());
+  if (inliers) {
+    assert(inliers->cols() == src.cols());
+  }
+
+  // Raw translation
+  Eigen::Matrix<double, 3, Eigen::Dynamic> raw_translation = dst - src;
+
+  // Error bounds for each measurements
+  int N = src.cols();
+  double beta = noise_bound_ * sqrt(cbar2_);
+  Eigen::Matrix<double, 1, Eigen::Dynamic> alphas = beta * Eigen::MatrixXd::Ones(1, N);
+
+  // Estimate x, y, and z component of translation: perform TLS on each row
+  *inliers = Eigen::Matrix<bool, 1, Eigen::Dynamic>::Ones(1, N);
+  Eigen::Matrix<bool, 1, Eigen::Dynamic> inliers_temp(1, N);
+  for (size_t i = 0; i < raw_translation.rows(); ++i) {
+    tls_estimator_.estimate(raw_translation.row(i), alphas, &((*translation)(i)), &inliers_temp);
+    // element-wise AND using component-wise product (Eigen 3.2 compatible)
+    // a point is an inlier iff. x,y,z are all inliers
+    *inliers = (*inliers).cwiseProduct(inliers_temp);
+  }
+}
+
+teaser::RobustRegistrationSolver::RobustRegistrationSolver(
+    const teaser::RobustRegistrationSolver::Params& params) {
+  reset(params);
+}
+
+Eigen::Matrix<double, 3, Eigen::Dynamic>
+teaser::RobustRegistrationSolver::computeTIMs(const Eigen::Matrix<double, 3, Eigen::Dynamic>& v,
+                                              Eigen::Matrix<int, 2, Eigen::Dynamic>* map) {
+
+  auto N = v.cols();
+  Eigen::Matrix<double, 3, Eigen::Dynamic> vtilde(3, N * (N - 1) / 2);
+  map->resize(2, N * (N - 1) / 2);
+
+#pragma omp parallel for default(none) shared(N, v, vtilde, map)
+  for (size_t i = 0; i < N - 1; i++) {
+    // Calculate some important indices
+    // For each measurement, we compute the TIMs between itself and all the measurements after it.
+    // For example:
+    // i=0: add N-1 TIMs
+    // i=1: add N-2 TIMs
+    // etc..
+    // i=k: add N-1-k TIMs
+    // And by arithmatic series, we can get the starting index of each segment be:
+    // k*N - k*(k+1)/2
+    size_t segment_start_idx = i * N - i * (i + 1) / 2;
+    size_t segment_cols = N - 1 - i;
+
+    // calculate TIM
+    Eigen::Matrix<double, 3, 1> m = v.col(i);
+    Eigen::Matrix<double, 3, Eigen::Dynamic> temp = v - m * Eigen::MatrixXd::Ones(1, N);
+
+    // concatenate to the end of the tilde vector
+    vtilde.middleCols(segment_start_idx, segment_cols) = temp.rightCols(segment_cols);
+
+    // populate the index map
+    Eigen::Matrix<int, 2, Eigen::Dynamic> map_addition(2, N);
+    for (size_t j = 0; j < N; ++j) {
+      map_addition(0, j) = i;
+      map_addition(1, j) = j;
+    }
+    map->middleCols(segment_start_idx, segment_cols) = map_addition.rightCols(segment_cols);
+  }
+
+  return vtilde;
+}
+
+teaser::RegistrationSolution
+teaser::RobustRegistrationSolver::solve(const teaser::PointCloud& src_cloud,
+                                        const teaser::PointCloud& dst_cloud,
+                                        const std::vector<std::pair<int, int>> correspondences) {
+  Eigen::Matrix<double, 3, Eigen::Dynamic> src(3, correspondences.size());
+  Eigen::Matrix<double, 3, Eigen::Dynamic> dst(3, correspondences.size());
+  for (size_t i = 0; i < correspondences.size(); ++i) {
+    auto src_idx = std::get<0>(correspondences[i]);
+    auto dst_idx = std::get<1>(correspondences[i]);
+    src.col(i) << src_cloud[src_idx].x, src_cloud[src_idx].y, src_cloud[src_idx].z;
+    dst.col(i) << dst_cloud[dst_idx].x, dst_cloud[dst_idx].y, dst_cloud[dst_idx].z;
+  }
+  return solve(src, dst);
+}
+
+teaser::RegistrationSolution
+teaser::RobustRegistrationSolver::solve(const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+                                        const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst) {
+  assert(scale_solver_ && rotation_solver_ && translation_solver_);
+
+  // Handle deprecated params
+  if (!params_.use_max_clique) {
+    TEASER_DEBUG_INFO_MSG(
+        "Using deprecated param field use_max_clique. Switch to inlier_selection_mode instead.");
+    params_.inlier_selection_mode = INLIER_SELECTION_MODE::NONE;
+  }
+  if (!params_.max_clique_exact_solution) {
+    TEASER_DEBUG_INFO_MSG("Using deprecated param field max_clique_exact_solution. Switch to "
+                          "inlier_selection_mode instead.");
+    params_.inlier_selection_mode = INLIER_SELECTION_MODE::PMC_HEU;
+  }
+
+  /**
+   * Steps to estimate T/R/s
+   *
+   * Estimate Scale
+   * -- compute TIMs
+   *
+   * Remove outliers
+   * De-scale the TIMs
+   *        v2tilde = v2tilde/s_est; % correct scale from v2 side, more stable
+   *
+   * Estimate rotation
+   *
+   * Estimate Translation
+   */
+  src_tims_ = computeTIMs(src, &src_tims_map_);
+  dst_tims_ = computeTIMs(dst, &dst_tims_map_);
+  TEASER_DEBUG_INFO_MSG("Starting scale solver.");
+  solveForScale(src_tims_, dst_tims_);
+  TEASER_DEBUG_INFO_MSG("Scale estimation complete.");
+
+  // Calculate Maximum Clique
+  // Note: the max_clique_ vector holds the indices of original measurements that are within the
+  // max clique of the built inlier graph.
+  if (params_.inlier_selection_mode != INLIER_SELECTION_MODE::NONE) {
+
+    // Create inlier graph: A graph with (indices of) original measurements as vertices, and edges
+    // only when the TIM between two measurements are inliers. Note: src_tims_map_ is the same as
+    // dst_tim_map_
+    inlier_graph_.populateVertices(src.cols());
+    for (size_t i = 0; i < scale_inliers_mask_.cols(); ++i) {
+      if (scale_inliers_mask_(0, i)) {
+        inlier_graph_.addEdge(src_tims_map_(0, i), src_tims_map_(1, i));
+      }
+    }
+
+    teaser::MaxCliqueSolver::Params clique_params;
+
+    if (params_.inlier_selection_mode == INLIER_SELECTION_MODE::PMC_EXACT) {
+      clique_params.solver_mode = teaser::MaxCliqueSolver::CLIQUE_SOLVER_MODE::PMC_EXACT;
+    } else if (params_.inlier_selection_mode == INLIER_SELECTION_MODE::PMC_HEU) {
+      clique_params.solver_mode = teaser::MaxCliqueSolver::CLIQUE_SOLVER_MODE::PMC_HEU;
+    } else {
+      clique_params.solver_mode = teaser::MaxCliqueSolver::CLIQUE_SOLVER_MODE::KCORE_HEU;
+    }
+    clique_params.time_limit = params_.max_clique_time_limit;
+    clique_params.kcore_heuristic_threshold = params_.kcore_heuristic_threshold;
+
+    teaser::MaxCliqueSolver clique_solver(clique_params);
+    max_clique_ = clique_solver.findMaxClique(inlier_graph_);
+    std::sort(max_clique_.begin(), max_clique_.end());
+    TEASER_DEBUG_INFO_MSG("Max Clique of scale estimation inliers: ");
+#ifndef NDEBUG
+    std::copy(max_clique_.begin(), max_clique_.end(), std::ostream_iterator<int>(std::cout, " "));
+    std::cout << std::endl;
+#endif
+    // Abort if max clique size <= 1
+    if (max_clique_.size() <= 1) {
+      TEASER_DEBUG_INFO_MSG("Clique size too small. Abort.");
+      solution_.valid = false;
+      return solution_;
+    }
+  } else {
+    // not using clique filtering is equivalent to saying all measurements are in the max clique
+    max_clique_.reserve(src.cols());
+    for (size_t i = 0; i < src.cols(); ++i) {
+      max_clique_.push_back(i);
+    }
+  }
+
+  // Calculate new measurements & TIMs based on max clique inliers
+  if (params_.rotation_tim_graph == INLIER_GRAPH_FORMULATION::CHAIN) {
+    // chain graph
+    TEASER_DEBUG_INFO_MSG("Using chain graph for GNC rotation.");
+    pruned_src_tims_.resize(3, max_clique_.size());
+    pruned_dst_tims_.resize(3, max_clique_.size());
+    src_tims_map_rotation_.resize(2, max_clique_.size());
+    dst_tims_map_rotation_.resize(2, max_clique_.size());
+    for (size_t i = 0; i < max_clique_.size(); ++i) {
+      const auto& root = max_clique_[i];
+      int leaf;
+      if (i != max_clique_.size() - 1) {
+        leaf = max_clique_[i + 1];
+      } else {
+        leaf = max_clique_[0];
+      }
+      pruned_src_tims_.col(i) = src.col(leaf) - src.col(root);
+      pruned_dst_tims_.col(i) = dst.col(leaf) - dst.col(root);
+
+      // populate the TIMs map
+      dst_tims_map_rotation_(0,i) = leaf;
+      dst_tims_map_rotation_(1,i) = root;
+      src_tims_map_rotation_(0,i) = leaf;
+      src_tims_map_rotation_(1,i) = root;
+    }
+  } else {
+    // complete graph
+    TEASER_DEBUG_INFO_MSG("Using complete graph for GNC rotation.");
+    // select the inlier measurements with max clique
+    Eigen::Matrix<double, 3, Eigen::Dynamic> src_inliers(3, max_clique_.size());
+    Eigen::Matrix<double, 3, Eigen::Dynamic> dst_inliers(3, max_clique_.size());
+    for (size_t i = 0; i < max_clique_.size(); ++i) {
+      src_inliers.col(i) = src.col(max_clique_[i]);
+      dst_inliers.col(i) = dst.col(max_clique_[i]);
+    }
+    // construct the TIMs
+    pruned_dst_tims_ = computeTIMs(dst_inliers, &dst_tims_map_rotation_);
+    pruned_src_tims_ = computeTIMs(src_inliers, &src_tims_map_rotation_);
+  }
+
+  // Remove scaling for rotation estimation
+  pruned_dst_tims_ *= (1 / solution_.scale);
+
+  // Update GNC rotation solver's noise bound with the new information
+  // Note: this implicitly assumes that rotation_solver_'s noise bound
+  // is set to the original noise bound of the measurements.
+  auto params = rotation_solver_->getParams();
+  params.noise_bound *= (2 / solution_.scale);
+  rotation_solver_->setParams(params);
+
+  // Solve for rotation
+  TEASER_DEBUG_INFO_MSG("Starting rotation solver.");
+  solveForRotation(pruned_src_tims_, pruned_dst_tims_);
+  TEASER_DEBUG_INFO_MSG("Rotation estimation complete.");
+
+  // Save indices of inlier TIMs from GNC rotation estimation
+  for (size_t i = 0; i < rotation_inliers_mask_.cols(); ++i) {
+    if (rotation_inliers_mask_[i]) {
+      rotation_inliers_.emplace_back(i);
+    }
+  }
+  Eigen::Matrix<double, 3, Eigen::Dynamic> rotation_pruned_src(3, max_clique_.size());
+  Eigen::Matrix<double, 3, Eigen::Dynamic> rotation_pruned_dst(3, max_clique_.size());
+  for (size_t i = 0; i < max_clique_.size(); ++i) {
+    rotation_pruned_src.col(i) = src.col(max_clique_[i]);
+    rotation_pruned_dst.col(i) = dst.col(max_clique_[i]);
+  }
+
+  // Solve for translation
+  TEASER_DEBUG_INFO_MSG("Starting translation solver.");
+  solveForTranslation(solution_.scale * solution_.rotation * rotation_pruned_src,
+                      rotation_pruned_dst);
+  TEASER_DEBUG_INFO_MSG("Translation estimation complete.");
+
+  // Find the final inliers
+  translation_inliers_ = utils::findNonzero<bool>(translation_inliers_mask_);
+
+  // Update validity flag
+  solution_.valid = true;
+
+  return solution_;
+}
+
+double teaser::RobustRegistrationSolver::solveForScale(
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& v1,
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& v2) {
+  scale_inliers_mask_.resize(1, v1.cols());
+  scale_solver_->solveForScale(v1, v2, &(solution_.scale), &scale_inliers_mask_);
+  return solution_.scale;
+}
+
+Eigen::Vector3d teaser::RobustRegistrationSolver::solveForTranslation(
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& v1,
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& v2) {
+  translation_inliers_mask_.resize(1, v1.cols());
+  translation_solver_->solveForTranslation(v1, v2, &(solution_.translation),
+                                           &translation_inliers_mask_);
+  return solution_.translation;
+}
+
+Eigen::Matrix3d teaser::RobustRegistrationSolver::solveForRotation(
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& v1,
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& v2) {
+  rotation_inliers_mask_.resize(1, v1.cols());
+  rotation_solver_->solveForRotation(v1, v2, &(solution_.rotation), &rotation_inliers_mask_);
+  return solution_.rotation;
+}
+
+void teaser::GNCTLSRotationSolver::solveForRotation(
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& src,
+    const Eigen::Matrix<double, 3, Eigen::Dynamic>& dst, Eigen::Matrix3d* rotation,
+    Eigen::Matrix<bool, 1, Eigen::Dynamic>* inliers) {
+  assert(rotation);                 // make sure R is not a nullptr
+  assert(src.cols() == dst.cols()); // check dimensions of input data
+  assert(params_.gnc_factor > 1);   // make sure mu will increase
+  assert(params_.noise_bound != 0); // make sure noise sigma is not zero
+  if (inliers) {
+    assert(inliers->cols() == src.cols());
+  }
+
+  /**
+   * Loop: terminate when:
+   *    1. the change in cost in two consecutive runs is smaller than a user-defined threshold
+   *    2. # iterations exceeds the maximum allowed
+   *
+   * Within each loop:
+   * 1. fix weights and solve for R
+   * 2. fix R and solve for weights
+   */
+
+  // Prepare some variables
+  size_t match_size = src.cols(); // number of correspondences
+
+  double mu = 1; // arbitrary starting mu
+
+  double prev_cost = std::numeric_limits<double>::infinity();
+  cost_ = std::numeric_limits<double>::infinity();
+  double noise_bound_sq = std::pow(params_.noise_bound, 2);
+  if (noise_bound_sq < 1e-16) {
+    noise_bound_sq = 1e-2;
+  }
+  TEASER_DEBUG_INFO_MSG("GNC rotation estimation noise bound:" << params_.noise_bound);
+  TEASER_DEBUG_INFO_MSG("GNC rotation estimation noise bound squared:" << noise_bound_sq);
+
+  Eigen::Matrix<double, 3, Eigen::Dynamic> diffs(3, match_size);
+  Eigen::Matrix<double, 1, Eigen::Dynamic> weights(1, match_size);
+  weights.setOnes(1, match_size);
+  Eigen::Matrix<double, 1, Eigen::Dynamic> residuals_sq(1, match_size);
+
+  // Loop for performing GNC-TLS
+  for (size_t i = 0; i < params_.max_iterations; ++i) {
+
+    // Fix weights and perform SVD rotation estimation
+    *rotation = teaser::utils::svdRot(src, dst, weights);
+
+    // Calculate residuals squared
+    diffs = (dst - (*rotation) * src).array().square();
+    residuals_sq = diffs.colwise().sum();
+    if (i == 0) {
+      // Initialize rule for mu
+      double max_residual = residuals_sq.maxCoeff();
+      mu = 1 / (2 * max_residual / noise_bound_sq - 1);
+      // Degenerate case: mu = -1 because max_residual is very small
+      // i.e., little to none noise
+      if (mu <= 0) {
+        TEASER_DEBUG_INFO_MSG(
+            "GNC-TLS terminated because maximum residual at initialization is very small.");
+        break;
+      }
+    }
+
+    // Fix R and solve for weights in closed form
+    double th1 = (mu + 1) / mu * noise_bound_sq;
+    double th2 = mu / (mu + 1) * noise_bound_sq;
+    cost_ = 0;
+    for (size_t j = 0; j < match_size; ++j) {
+      // Also calculate cost in this loop
+      // Note: the cost calculated is using the previously solved weights
+      cost_ += weights(j) * residuals_sq(j);
+
+      if (residuals_sq(j) >= th1) {
+        weights(j) = 0;
+      } else if (residuals_sq(j) <= th2) {
+        weights(j) = 1;
+      } else {
+        weights(j) = sqrt(noise_bound_sq * mu * (mu + 1) / residuals_sq(j)) - mu;
+        assert(weights(j) >= 0 && weights(j) <= 1);
+      }
+    }
+
+    // Calculate cost
+    double cost_diff = std::abs(cost_ - prev_cost);
+
+    // Increase mu
+    mu = mu * params_.gnc_factor;
+    prev_cost = cost_;
+
+    if (cost_diff < params_.cost_threshold) {
+      TEASER_DEBUG_INFO_MSG("GNC-TLS solver terminated due to cost convergence.");
+      TEASER_DEBUG_INFO_MSG("Cost diff: " << cost_diff);
+      TEASER_DEBUG_INFO_MSG("Iterations: " << i);
+      break;
+    }
+  }
+
+  if (inliers) {
+    for (size_t i = 0; i < weights.cols(); ++i) {
+      (*inliers)(0, i) = weights(0, i) >= 0.5;
+    }
+  }
+}

+ 5 - 0
src/detection/detection_localization_global_hdl/tinyply/tinyply.cpp

@@ -0,0 +1,5 @@
+// This file exists to create a nice static or shared library via cmake
+// but can otherwise be omitted if you prefer to compile tinyply
+// directly into your own project.
+#define TINYPLY_IMPLEMENTATION
+#include "tinyply.h"