1 year ago · 2ad138b83e
--- a/src/detection/detection_lidar_centerpoint/detection_lidar_centerpoint.pro
+++ b/src/detection/detection_lidar_centerpoint/detection_lidar_centerpoint.pro
@@ -70,6 +70,10 @@ LIBS += -lrt -ldl -lnvinfer -lcudnn  -lcudart -lnvparsers -lnvonnxparser -lnvinf
 
				     error( "Couldn't find the ivprotobuf.pri file!" )
			
 
				 }
			
 
				 
			
 
				+!include(../../../include/ivyaml-cpp.pri ) {
			
 
				+    error( "Couldn't find the ivyaml-cpp.pri file!" )
			
 
				+}
			
 
				+
			
 
				 INCLUDEPATH += $$PWD/../../include/msgtype
			
 
				 
			
 
				 
			
@@ -174,3 +178,25 @@ HEADERS += \
 
				     ../../include/msgtype/object.pb.h \
			
 
				     ../../include/msgtype/objectarray.pb.h
			
 
				 
			
 
				+
			
 
				+
			
 
				+
			
 
				+unix:LIBS +=  -lpcl_common\
			
 
				+        -lpcl_features\
			
 
				+        -lpcl_filters\
			
 
				+        -lpcl_io\
			
 
				+        -lpcl_io_ply\
			
 
				+        -lpcl_kdtree\
			
 
				+        -lpcl_keypoints\
			
 
				+        -lpcl_octree\
			
 
				+        -lpcl_outofcore\
			
 
				+        -lpcl_people\
			
 
				+        -lpcl_recognition\
			
 
				+        -lpcl_registration\
			
 
				+        -lpcl_sample_consensus\
			
 
				+        -lpcl_search\
			
 
				+        -lpcl_segmentation\
			
 
				+        -lpcl_surface\
			
 
				+        -lpcl_tracking\
			
 
				+        -lpcl_visualization
			
 
				+
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
@@ -32,6 +32,7 @@
 
				 
			
 
				 namespace centerpoint
			
 
				 {
			
 
				+static constexpr size_t CAPACITY_POINT = 1000000;
			
 
				 class NetworkParam
			
 
				 {
			
 
				 public:
			
@@ -88,14 +89,16 @@ protected:
 
				   std::unique_ptr<PostProcessCUDA> post_proc_ptr_{nullptr};
			
 
				   cudaStream_t stream_{nullptr};
			
 
				 
			
 
				+
			
 
				   std::size_t class_size_{0};
			
 
				   CenterPointConfig config_;
			
 
				-  std::size_t num_voxels_{0};
			
 
				   std::size_t encoder_in_feature_size_{0};
			
 
				   std::size_t spatial_features_size_{0};
			
 
				-  std::vector<float> voxels_;
			
 
				-  std::vector<int> coordinates_;
			
 
				-  std::vector<float> num_points_per_voxel_;
			
 
				+  std::size_t voxels_buffer_size_{0};
			
 
				+  std::size_t mask_size_{0};
			
 
				+  std::size_t voxels_size_{0};
			
 
				+  std::size_t coordinates_size_{0};
			
 
				+  std::vector<float> points_;
			
 
				   cuda::unique_ptr<float[]> voxels_d_{nullptr};
			
 
				   cuda::unique_ptr<int[]> coordinates_d_{nullptr};
			
 
				   cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
			
@@ -108,6 +111,31 @@ protected:
 
				   cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
			
 
				   cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
			
 
				   cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> points_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> voxels_buffer_d_{nullptr};
			
 
				+  cuda::unique_ptr<unsigned int[]> mask_d_{nullptr};
			
 
				+  cuda::unique_ptr<unsigned int[]> num_voxels_d_{nullptr};
			
 
				+
			
 
				+//  std::size_t class_size_{0};
			
 
				+//  CenterPointConfig config_;
			
 
				+//  std::size_t num_voxels_{0};
			
 
				+//  std::size_t encoder_in_feature_size_{0};
			
 
				+//  std::size_t spatial_features_size_{0};
			
 
				+//  std::vector<float> voxels_;
			
 
				+//  std::vector<int> coordinates_;
			
 
				+//  std::vector<float> num_points_per_voxel_;
			
 
				+//  cuda::unique_ptr<float[]> voxels_d_{nullptr};
			
 
				+//  cuda::unique_ptr<int[]> coordinates_d_{nullptr};
			
 
				+//  cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
			
 
				+//  cuda::unique_ptr<float[]> encoder_in_features_d_{nullptr};
			
 
				+//  cuda::unique_ptr<float[]> pillar_features_d_{nullptr};
			
 
				+//  cuda::unique_ptr<float[]> spatial_features_d_{nullptr};
			
 
				+//  cuda::unique_ptr<float[]> head_out_heatmap_d_{nullptr};
			
 
				+//  cuda::unique_ptr<float[]> head_out_offset_d_{nullptr};
			
 
				+//  cuda::unique_ptr<float[]> head_out_z_d_{nullptr};
			
 
				+//  cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
			
 
				+//  cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
			
 
				+//  cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
			
 
				 };
			
 
				 
			
 
				 }  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
@@ -21,7 +21,7 @@
 
				 namespace centerpoint
			
 
				 {
			
 
				 cudaError_t scatterFeatures_launch(
			
 
				-  const float * pillar_features, const int * coords, const std::size_t num_pillars,
			
 
				+  const float * pillar_features, const int * coords, const unsigned int * num_pillars,
			
 
				   const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
			
 
				   const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
			
 
				   cudaStream_t stream);
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
@@ -52,6 +52,7 @@ private:
 
				 struct PointCloudWithTransform
			
 
				 {
			
 
				   pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr;
			
 
				+  Eigen::Affine3f affine_past2world;
			
 
				 };
			
 
				 
			
 
				 
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
@@ -20,9 +20,20 @@
 
				 
			
 
				 namespace centerpoint
			
 
				 {
			
 
				+cudaError_t generateVoxels_random_launch(
			
 
				+  const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range,
			
 
				+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
			
 
				+  float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels,
			
 
				+  cudaStream_t stream);
			
 
				+
			
 
				+cudaError_t generateBaseFeatures_launch(
			
 
				+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
			
 
				+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
			
 
				+  cudaStream_t stream);
			
 
				+
			
 
				 cudaError_t generateFeatures_launch(
			
 
				   const float * voxel_features, const float * voxel_num_points, const int * coords,
			
 
				-  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
			
 
				+  const unsigned int * num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
			
 
				   const float voxel_size_y, const float voxel_size_z, const float range_min_x,
			
 
				   const float range_min_y, const float range_min_z, float * features, cudaStream_t stream);
			
 
				 
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
@@ -31,9 +31,10 @@ public:
 
				   explicit VoxelGeneratorTemplate(
			
 
				     const DensificationParam & param, const CenterPointConfig & config);
			
 
				 
			
 
				-  virtual std::size_t pointsToVoxels(
			
 
				-    std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				-    std::vector<float> & num_points_per_voxel) = 0;
			
 
				+    virtual std::size_t generateSweepPoints(std::vector<float> & points) = 0;
			
 
				+//  virtual std::size_t pointsToVoxels(
			
 
				+//    std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				+//    std::vector<float> & num_points_per_voxel) = 0;
			
 
				 
			
 
				     bool enqueuePointCloud(
			
 
				       const pcl::PointCloud<pcl::PointXYZI>::Ptr);
			
@@ -55,9 +56,11 @@ class VoxelGenerator : public VoxelGeneratorTemplate
 
				 public:
			
 
				   using VoxelGeneratorTemplate::VoxelGeneratorTemplate;
			
 
				 
			
 
				-  std::size_t pointsToVoxels(
			
 
				-    std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				-    std::vector<float> & num_points_per_voxel) override;
			
 
				+  std::size_t generateSweepPoints(std::vector<float> & points) override;
			
 
				+
			
 
				+//  std::size_t pointsToVoxels(
			
 
				+//    std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				+//    std::vector<float> & num_points_per_voxel) override;
			
 
				 };
			
 
				 
			
 
				 }  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/centerpoint_trt.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/centerpoint_trt.cpp
@@ -69,34 +69,40 @@ CenterPointTRT::~CenterPointTRT()
 
				 
			
 
				 void CenterPointTRT::initPtr()
			
 
				 {
			
 
				-  const auto voxels_size =
			
 
				-    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
			
 
				-  const auto coordinates_size = config_.max_voxel_size_ * config_.point_dim_size_;
			
 
				-  encoder_in_feature_size_ =
			
 
				-    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_;
			
 
				-  const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_;
			
 
				-  spatial_features_size_ =
			
 
				-    config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_;
			
 
				-  const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_;
			
 
				-
			
 
				-  // host
			
 
				-  voxels_.resize(voxels_size);
			
 
				-  coordinates_.resize(coordinates_size);
			
 
				-  num_points_per_voxel_.resize(config_.max_voxel_size_);
			
 
				-
			
 
				-  // device
			
 
				-  voxels_d_ = cuda::make_unique<float[]>(voxels_size);
			
 
				-  coordinates_d_ = cuda::make_unique<int[]>(coordinates_size);
			
 
				-  num_points_per_voxel_d_ = cuda::make_unique<float[]>(config_.max_voxel_size_);
			
 
				-  encoder_in_features_d_ = cuda::make_unique<float[]>(encoder_in_feature_size_);
			
 
				-  pillar_features_d_ = cuda::make_unique<float[]>(pillar_features_size);
			
 
				-  spatial_features_d_ = cuda::make_unique<float[]>(spatial_features_size_);
			
 
				-  head_out_heatmap_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.class_size_);
			
 
				-  head_out_offset_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_offset_size_);
			
 
				-  head_out_z_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_z_size_);
			
 
				-  head_out_dim_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_dim_size_);
			
 
				-  head_out_rot_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_rot_size_);
			
 
				-  head_out_vel_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_vel_size_);
			
 
				+    voxels_size_ =
			
 
				+      config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
			
 
				+    coordinates_size_ = config_.max_voxel_size_ * config_.point_dim_size_;
			
 
				+    encoder_in_feature_size_ =
			
 
				+      config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_;
			
 
				+    const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_;
			
 
				+    spatial_features_size_ =
			
 
				+      config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_;
			
 
				+    const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_;
			
 
				+
			
 
				+    voxels_buffer_size_ = config_.grid_size_x_ * config_.grid_size_y_ *
			
 
				+                          config_.max_point_in_voxel_size_ * config_.point_feature_size_;
			
 
				+    mask_size_ = config_.grid_size_x_ * config_.grid_size_y_;
			
 
				+
			
 
				+    // host
			
 
				+    points_.resize(CAPACITY_POINT * config_.point_feature_size_);
			
 
				+
			
 
				+    // device
			
 
				+    voxels_d_ = cuda::make_unique<float[]>(voxels_size_);
			
 
				+    coordinates_d_ = cuda::make_unique<int[]>(coordinates_size_);
			
 
				+    num_points_per_voxel_d_ = cuda::make_unique<float[]>(config_.max_voxel_size_);
			
 
				+    encoder_in_features_d_ = cuda::make_unique<float[]>(encoder_in_feature_size_);
			
 
				+    pillar_features_d_ = cuda::make_unique<float[]>(pillar_features_size);
			
 
				+    spatial_features_d_ = cuda::make_unique<float[]>(spatial_features_size_);
			
 
				+    head_out_heatmap_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.class_size_);
			
 
				+    head_out_offset_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_offset_size_);
			
 
				+    head_out_z_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_z_size_);
			
 
				+    head_out_dim_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_dim_size_);
			
 
				+    head_out_rot_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_rot_size_);
			
 
				+    head_out_vel_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_vel_size_);
			
 
				+    points_d_ = cuda::make_unique<float[]>(CAPACITY_POINT * config_.point_feature_size_);
			
 
				+    voxels_buffer_d_ = cuda::make_unique<float[]>(voxels_buffer_size_);
			
 
				+    mask_d_ = cuda::make_unique<unsigned int[]>(mask_size_);
			
 
				+    num_voxels_d_ = cuda::make_unique<unsigned int[]>(1);
			
 
				 }
			
 
				 
			
 
				 bool CenterPointTRT::detect(
			
@@ -105,13 +111,13 @@ bool CenterPointTRT::detect(
 
				 {
			
 
				 
			
 
				     int64_t time1 = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
			
 
				-    std::fill(voxels_.begin(), voxels_.end(), 0);
			
 
				-    std::fill(coordinates_.begin(), coordinates_.end(), -1);
			
 
				-    std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
			
 
				-    CHECK_CUDA_ERROR(cudaMemsetAsync(
			
 
				-      encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
			
 
				-    CHECK_CUDA_ERROR(
			
 
				-      cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
			
 
				+//    std::fill(voxels_.begin(), voxels_.end(), 0);
			
 
				+//    std::fill(coordinates_.begin(), coordinates_.end(), -1);
			
 
				+//    std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
			
 
				+//    CHECK_CUDA_ERROR(cudaMemsetAsync(
			
 
				+//      encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
			
 
				+//    CHECK_CUDA_ERROR(
			
 
				+//      cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
			
 
				 
			
 
				     if (!preprocess(pc_ptr)) {
			
 
				         std::cout<<"Fail to preprocess and skip to detect."<<std::endl;
			
@@ -162,27 +168,33 @@ bool CenterPointTRT::preprocess(
 
				     if (!is_success) {
			
 
				       return false;
			
 
				     }
			
 
				-    num_voxels_ = vg_ptr_->pointsToVoxels(voxels_, coordinates_, num_points_per_voxel_);
			
 
				-    if (num_voxels_ == 0) {
			
 
				-      return false;
			
 
				-    }
			
 
				-
			
 
				-    const auto voxels_size =
			
 
				-      num_voxels_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
			
 
				-    const auto coordinates_size = num_voxels_ * config_.point_dim_size_;
			
 
				-    // memcpy from host to device (not copy empty voxels)
			
 
				-    CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				-      voxels_d_.get(), voxels_.data(), voxels_size * sizeof(float), cudaMemcpyHostToDevice));
			
 
				-    CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				-      coordinates_d_.get(), coordinates_.data(), coordinates_size * sizeof(int),
			
 
				-      cudaMemcpyHostToDevice));
			
 
				+    const auto count = vg_ptr_->generateSweepPoints(points_);
			
 
				     CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				-      num_points_per_voxel_d_.get(), num_points_per_voxel_.data(), num_voxels_ * sizeof(float),
			
 
				-      cudaMemcpyHostToDevice));
			
 
				-    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
			
 
				+      points_d_.get(), points_.data(), count * config_.point_feature_size_ * sizeof(float),
			
 
				+      cudaMemcpyHostToDevice, stream_));
			
 
				+    CHECK_CUDA_ERROR(cudaMemsetAsync(num_voxels_d_.get(), 0, sizeof(unsigned int), stream_));
			
 
				+    CHECK_CUDA_ERROR(
			
 
				+      cudaMemsetAsync(voxels_buffer_d_.get(), 0, voxels_buffer_size_ * sizeof(float), stream_));
			
 
				+    CHECK_CUDA_ERROR(cudaMemsetAsync(mask_d_.get(), 0, mask_size_ * sizeof(int), stream_));
			
 
				+    CHECK_CUDA_ERROR(cudaMemsetAsync(voxels_d_.get(), 0, voxels_size_ * sizeof(float), stream_));
			
 
				+    CHECK_CUDA_ERROR(
			
 
				+      cudaMemsetAsync(coordinates_d_.get(), 0, coordinates_size_ * sizeof(int), stream_));
			
 
				+    CHECK_CUDA_ERROR(cudaMemsetAsync(
			
 
				+      num_points_per_voxel_d_.get(), 0, config_.max_voxel_size_ * sizeof(float), stream_));
			
 
				+
			
 
				+    CHECK_CUDA_ERROR(generateVoxels_random_launch(
			
 
				+      points_d_.get(), count, config_.range_min_x_, config_.range_max_x_, config_.range_min_y_,
			
 
				+      config_.range_max_y_, config_.range_min_z_, config_.range_max_z_, config_.voxel_size_x_,
			
 
				+      config_.voxel_size_y_, config_.voxel_size_z_, config_.grid_size_y_, config_.grid_size_x_,
			
 
				+      mask_d_.get(), voxels_buffer_d_.get(), stream_));
			
 
				+
			
 
				+    CHECK_CUDA_ERROR(generateBaseFeatures_launch(
			
 
				+      mask_d_.get(), voxels_buffer_d_.get(), config_.grid_size_y_, config_.grid_size_x_,
			
 
				+      config_.max_voxel_size_, num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(),
			
 
				+      coordinates_d_.get(), stream_));
			
 
				 
			
 
				     CHECK_CUDA_ERROR(generateFeatures_launch(
			
 
				-      voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_,
			
 
				+      voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_d_.get(),
			
 
				       config_.max_voxel_size_, config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_,
			
 
				       config_.range_min_x_, config_.range_min_y_, config_.range_min_z_, encoder_in_features_d_.get(),
			
 
				       stream_));
			
@@ -227,26 +239,26 @@ bool CenterPointTRT::preprocess(
 
				 
			
 
				 void CenterPointTRT::inference()
			
 
				 {
			
 
				-  if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) {
			
 
				-    throw std::runtime_error("Failed to create tensorrt context.");
			
 
				-  }
			
 
				+    if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) {
			
 
				+      throw std::runtime_error("Failed to create tensorrt context.");
			
 
				+    }
			
 
				 
			
 
				-  // pillar encoder network
			
 
				-  std::vector<void *> encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()};
			
 
				-  encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr);
			
 
				-
			
 
				-  // scatter
			
 
				-  CHECK_CUDA_ERROR(scatterFeatures_launch(
			
 
				-    pillar_features_d_.get(), coordinates_d_.get(), num_voxels_, config_.max_voxel_size_,
			
 
				-    config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_,
			
 
				-    spatial_features_d_.get(), stream_));
			
 
				-
			
 
				-  // head network
			
 
				-  std::vector<void *> head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(),
			
 
				-                                      head_out_offset_d_.get(),  head_out_z_d_.get(),
			
 
				-                                      head_out_dim_d_.get(),     head_out_rot_d_.get(),
			
 
				-                                      head_out_vel_d_.get()};
			
 
				-  head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr);
			
 
				+    // pillar encoder network
			
 
				+    std::vector<void *> encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()};
			
 
				+    encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr);
			
 
				+
			
 
				+    // scatter
			
 
				+    CHECK_CUDA_ERROR(scatterFeatures_launch(
			
 
				+      pillar_features_d_.get(), coordinates_d_.get(), num_voxels_d_.get(), config_.max_voxel_size_,
			
 
				+      config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_,
			
 
				+      spatial_features_d_.get(), stream_));
			
 
				+
			
 
				+    // head network
			
 
				+    std::vector<void *> head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(),
			
 
				+                                        head_out_offset_d_.get(),  head_out_z_d_.get(),
			
 
				+                                        head_out_dim_d_.get(),     head_out_rot_d_.get(),
			
 
				+                                        head_out_vel_d_.get()};
			
 
				+    head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr);
			
 
				 }
			
 
				 
			
 
				 void CenterPointTRT::postProcess(std::vector<Box3D> & det_boxes3d)
			
--- a/src/detection/detection_lidar_centerpoint/lib/network/network_trt.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/network/network_trt.cpp
@@ -59,6 +59,14 @@ bool HeadTRT::setProfile(
 
				 
			
 
				   for (std::size_t ci = 0; ci < out_channel_sizes_.size(); ci++) {
			
 
				     auto out_name = network.getOutput(ci)->getName();
			
 
				+
			
 
				+    if (
			
 
				+      out_name == std::string("heatmap") &&
			
 
				+      network.getOutput(ci)->getDimensions().d[1] != static_cast<int32_t>(out_channel_sizes_[ci])) {
			
 
				+      std::cout
			
 
				+        << "Expected and actual number of classes do not match" << std::endl;
			
 
				+      return false;
			
 
				+    }
			
 
				     auto out_dims = nvinfer1::Dims4(
			
 
				       config_.batch_size_, out_channel_sizes_[ci], config_.down_grid_size_y_,
			
 
				       config_.down_grid_size_x_);
			
--- a/src/detection/detection_lidar_centerpoint/lib/network/scatter_kernel.cu
+++ b/src/detection/detection_lidar_centerpoint/lib/network/scatter_kernel.cu
@@ -24,7 +24,7 @@ const std::size_t THREADS_PER_BLOCK = 32;
 
				 namespace centerpoint
			
 
				 {
			
 
				 __global__ void scatterFeatures_kernel(
			
 
				-  const float * pillar_features, const int * coords, const std::size_t num_pillars,
			
 
				+  const float * pillar_features, const int * coords, const unsigned int * num_pillars,
			
 
				   const std::size_t pillar_feature_size, const std::size_t grid_size_x,
			
 
				   const std::size_t grid_size_y, float * scattered_features)
			
 
				 {
			
@@ -34,7 +34,7 @@ __global__ void scatterFeatures_kernel(
 
				   const auto pillar_i = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
			
 
				   const auto feature_i = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
			
 
				 
			
 
				-  if (pillar_i >= num_pillars || feature_i >= pillar_feature_size) {
			
 
				+  if (pillar_i >= num_pillars[0] || feature_i >= pillar_feature_size) {
			
 
				     return;
			
 
				   }
			
 
				 
			
@@ -50,7 +50,7 @@ __global__ void scatterFeatures_kernel(
 
				 
			
 
				 // cspell: ignore divup
			
 
				 cudaError_t scatterFeatures_launch(
			
 
				-  const float * pillar_features, const int * coords, const std::size_t num_pillars,
			
 
				+  const float * pillar_features, const int * coords, const unsigned int * num_pillars,
			
 
				   const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
			
 
				   const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
			
 
				   cudaStream_t stream)
			
--- a/src/detection/detection_lidar_centerpoint/lib/network/tensorrt_wrapper.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/network/tensorrt_wrapper.cpp
@@ -22,7 +22,9 @@
 
				 
			
 
				 namespace centerpoint
			
 
				 {
			
 
				-TensorRTWrapper::TensorRTWrapper(const CenterPointConfig & config) : config_(config) {}
			
 
				+TensorRTWrapper::TensorRTWrapper(const CenterPointConfig & config) : config_(config)
			
 
				+{
			
 
				+}
			
 
				 
			
 
				 TensorRTWrapper::~TensorRTWrapper()
			
 
				 {
			
@@ -38,7 +40,7 @@ bool TensorRTWrapper::init(
 
				   runtime_ =
			
 
				     tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
			
 
				   if (!runtime_) {
			
 
				-    std::cout << "Fail to create runtime" << std::endl;
			
 
				+    std::cout << "Failed to create runtime" << std::endl;
			
 
				     return false;
			
 
				   }
			
 
				 
			
@@ -47,7 +49,12 @@ bool TensorRTWrapper::init(
 
				   if (engine_file.is_open()) {
			
 
				     success = loadEngine(engine_path);
			
 
				   } else {
			
 
				+      std::cout<<"Applying optimizations and building TRT CUDA engine. Please wait"<<std::endl;
			
 
				+//    auto log_thread = logger_.log_throttle(
			
 
				+//      nvinfer1::ILogger::Severity::kINFO,
			
 
				+//      "Applying optimizations and building TRT CUDA engine. Please wait a minutes...", 5);
			
 
				     success = parseONNX(onnx_path, engine_path, precision);
			
 
				+//    logger_.stop_throttle(log_thread);
			
 
				   }
			
 
				   success &= createContext();
			
 
				 
			
@@ -57,14 +64,15 @@ bool TensorRTWrapper::init(
 
				 bool TensorRTWrapper::createContext()
			
 
				 {
			
 
				   if (!engine_) {
			
 
				-    std::cout << "Fail to create context: Engine isn't created" << std::endl;
			
 
				+    std::cout
			
 
				+      << "Failed to create context: Engine was not created" << std::endl;
			
 
				     return false;
			
 
				   }
			
 
				 
			
 
				   context_ =
			
 
				     tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
			
 
				   if (!context_) {
			
 
				-    std::cout << "Fail to create context" << std::endl;
			
 
				+    std::cout << "Failed to create context" << std::endl;
			
 
				     return false;
			
 
				   }
			
 
				 
			
@@ -78,14 +86,14 @@ bool TensorRTWrapper::parseONNX(
 
				   auto builder =
			
 
				     tensorrt_common::TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
			
 
				   if (!builder) {
			
 
				-    std::cout << "Fail to create builder" << std::endl;
			
 
				+    std::cout << "Failed to create builder" << std::endl;
			
 
				     return false;
			
 
				   }
			
 
				 
			
 
				   auto config =
			
 
				     tensorrt_common::TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
			
 
				   if (!config) {
			
 
				-    std::cout << "Fail to create config" << std::endl;
			
 
				+    std::cout << "Failed to create config" << std::endl;
			
 
				     return false;
			
 
				   }
			
 
				 #if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
			
@@ -95,10 +103,11 @@ bool TensorRTWrapper::parseONNX(
 
				 #endif
			
 
				   if (precision == "fp16") {
			
 
				     if (builder->platformHasFastFp16()) {
			
 
				-      std::cout << "use TensorRT FP16 Inference" << std::endl;
			
 
				+      std::cout << "Using TensorRT FP16 Inference" << std::endl;
			
 
				       config->setFlag(nvinfer1::BuilderFlag::kFP16);
			
 
				     } else {
			
 
				-      std::cout << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
			
 
				+       std::cout
			
 
				+        << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
			
 
				     }
			
 
				   }
			
 
				 
			
@@ -107,7 +116,7 @@ bool TensorRTWrapper::parseONNX(
 
				   auto network =
			
 
				     tensorrt_common::TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
			
 
				   if (!network) {
			
 
				-    std::cout << "Fail to create network" << std::endl;
			
 
				+    std::cout << "Failed to create network" << std::endl;
			
 
				     return false;
			
 
				   }
			
 
				 
			
@@ -116,22 +125,20 @@ bool TensorRTWrapper::parseONNX(
 
				   parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR));
			
 
				 
			
 
				   if (!setProfile(*builder, *network, *config)) {
			
 
				-    std::cout << "Fail to set profile" << std::endl;
			
 
				+    std::cout << "Failed to set profile" << std::endl;
			
 
				     return false;
			
 
				   }
			
 
				 
			
 
				-  std::cout << "Applying optimizations and building TRT CUDA engine (" << onnx_path << ") ..."
			
 
				-            << std::endl;
			
 
				   plan_ = tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory>(
			
 
				     builder->buildSerializedNetwork(*network, *config));
			
 
				   if (!plan_) {
			
 
				-    std::cout << "Fail to create serialized network" << std::endl;
			
 
				+    std::cout << "Failed to create serialized network" << std::endl;
			
 
				     return false;
			
 
				   }
			
 
				   engine_ = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(
			
 
				     runtime_->deserializeCudaEngine(plan_->data(), plan_->size()));
			
 
				   if (!engine_) {
			
 
				-    std::cout << "Fail to create engine" << std::endl;
			
 
				+    std::cout << "Failed to create engine" << std::endl;
			
 
				     return false;
			
 
				   }
			
 
				 
			
--- a/src/detection/detection_lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp
@@ -14,10 +14,9 @@
 
				 
			
 
				 #include "lidar_centerpoint/postprocess/non_maximum_suppression.hpp"
			
 
				 
			
 
				-#include "perception_utils/geometry.hpp"
			
 
				-#include "perception_utils/perception_utils.hpp"
			
 
				-//#include "tier4_autoware_utils/tier4_autoware_utils.hpp"
			
 
				-
			
 
				+#include "object_recognition_utils/geometry.hpp"
			
 
				+#include "object_recognition_utils/object_recognition_utils.hpp"
			
 
				+#include "tier4_autoware_utils/geometry/geometry.hpp"
			
 
				 namespace centerpoint
			
 
				 {
			
 
				 
			
@@ -41,8 +40,8 @@ bool NonMaximumSuppression::isTargetLabel(const uint8_t label)
 
				 bool NonMaximumSuppression::isTargetPairObject(
			
 
				   const DetectedObject & object1, const DetectedObject & object2)
			
 
				 {
			
 
				-  const auto label1 = perception_utils::getHighestProbLabel(object1.classification);
			
 
				-  const auto label2 = perception_utils::getHighestProbLabel(object2.classification);
			
 
				+  const auto label1 = object_recognition_utils::getHighestProbLabel(object1.classification);
			
 
				+  const auto label2 = object_recognition_utils::getHighestProbLabel(object2.classification);
			
 
				 
			
 
				   if (isTargetLabel(label1) && isTargetLabel(label2)) {
			
 
				     return true;
			
@@ -50,7 +49,7 @@ bool NonMaximumSuppression::isTargetPairObject(
 
				 
			
 
				   const auto search_sqr_dist_2d = params_.search_distance_2d_ * params_.search_distance_2d_;
			
 
				   const auto sqr_dist_2d = tier4_autoware_utils::calcSquaredDistance2d(
			
 
				-    perception_utils::getPose(object1), perception_utils::getPose(object2));
			
 
				+    object_recognition_utils::getPose(object1), object_recognition_utils::getPose(object2));
			
 
				   return sqr_dist_2d <= search_sqr_dist_2d;
			
 
				 }
			
 
				 
			
@@ -69,7 +68,7 @@ Eigen::MatrixXd NonMaximumSuppression::generateIoUMatrix(
 
				       }
			
 
				 
			
 
				       if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
			
 
				-        const double iou = perception_utils::get2dIoU(target_obj, source_obj);
			
 
				+        const double iou = object_recognition_utils::get2dIoU(target_obj, source_obj);
			
 
				         triangular_matrix(target_i, source_i) = iou;
			
 
				         // NOTE: If the target object has any objects with iou > iou_threshold, it
			
 
				         // will be suppressed regardless of later results.
			
--- a/src/detection/detection_lidar_centerpoint/lib/postprocess/postprocess_kernel.cu
+++ b/src/detection/detection_lidar_centerpoint/lib/postprocess/postprocess_kernel.cu
@@ -48,7 +48,10 @@ struct score_greater
 
				   __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; }
			
 
				 };
			
 
				 
			
 
				-__device__ inline float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); }
			
 
				+__device__ inline float sigmoid(float x)
			
 
				+{
			
 
				+  return 1.0f / (1.0f + expf(-x));
			
 
				+}
			
 
				 
			
 
				 __global__ void generateBoxes3D_kernel(
			
 
				   const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
			
--- a/src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp
@@ -96,7 +96,8 @@ bool PointCloudDensification::enqueuePointCloud(
 
				 
			
 
				 void PointCloudDensification::enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
			
 
				 {
			
 
				-    PointCloudWithTransform pointcloud = {pc_ptr};
			
 
				+ //   affine_world2current_ = affine_world2current;
			
 
				+    PointCloudWithTransform pointcloud = {pc_ptr,affine_world2current_};
			
 
				     pointcloud_cache_.push_front(pointcloud);
			
 
				 }
			
 
				 //void PointCloudDensification::enqueue(
			
--- a/src/detection/detection_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
+++ b/src/detection/detection_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
@@ -41,9 +41,104 @@ const std::size_t ENCODER_IN_FEATURE_SIZE = 9;  // the same as encoder_in_featur
 
				 
			
 
				 namespace centerpoint
			
 
				 {
			
 
				+__global__ void generateVoxels_random_kernel(
			
 
				+  const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range,
			
 
				+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
			
 
				+  float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels)
			
 
				+{
			
 
				+  int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
			
 
				+  if (point_idx >= points_size) return;
			
 
				+
			
 
				+  float4 point = ((float4 *)points)[point_idx];
			
 
				+
			
 
				+  if (
			
 
				+    point.x < min_x_range || point.x >= max_x_range || point.y < min_y_range ||
			
 
				+    point.y >= max_y_range || point.z < min_z_range || point.z >= max_z_range)
			
 
				+    return;
			
 
				+
			
 
				+  int voxel_idx = floorf((point.x - min_x_range) / pillar_x_size);
			
 
				+  int voxel_idy = floorf((point.y - min_y_range) / pillar_y_size);
			
 
				+  unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
			
 
				+
			
 
				+  unsigned int point_id = atomicAdd(&(mask[voxel_index]), 1);
			
 
				+
			
 
				+  if (point_id >= MAX_POINT_IN_VOXEL_SIZE) return;
			
 
				+  float * address = voxels + (voxel_index * MAX_POINT_IN_VOXEL_SIZE + point_id) * 4;
			
 
				+  atomicExch(address + 0, point.x);
			
 
				+  atomicExch(address + 1, point.y);
			
 
				+  atomicExch(address + 2, point.z);
			
 
				+  atomicExch(address + 3, point.w);
			
 
				+}
			
 
				+
			
 
				+cudaError_t generateVoxels_random_launch(
			
 
				+  const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range,
			
 
				+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
			
 
				+  float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels,
			
 
				+  cudaStream_t stream)
			
 
				+{
			
 
				+  dim3 blocks((points_size + 256 - 1) / 256);
			
 
				+  dim3 threads(256);
			
 
				+  generateVoxels_random_kernel<<<blocks, threads, 0, stream>>>(
			
 
				+    points, points_size, min_x_range, max_x_range, min_y_range, max_y_range, min_z_range,
			
 
				+    max_z_range, pillar_x_size, pillar_y_size, pillar_z_size, grid_y_size, grid_x_size, mask,
			
 
				+    voxels);
			
 
				+  cudaError_t err = cudaGetLastError();
			
 
				+  return err;
			
 
				+}
			
 
				+
			
 
				+__global__ void generateBaseFeatures_kernel(
			
 
				+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
			
 
				+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs)
			
 
				+{
			
 
				+  unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x;
			
 
				+  unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y;
			
 
				+
			
 
				+  if (voxel_idx >= grid_x_size || voxel_idy >= grid_y_size) return;
			
 
				+
			
 
				+  unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
			
 
				+  unsigned int count = mask[voxel_index];
			
 
				+  if (!(count > 0)) return;
			
 
				+  count = count < MAX_POINT_IN_VOXEL_SIZE ? count : MAX_POINT_IN_VOXEL_SIZE;
			
 
				+
			
 
				+  unsigned int current_pillarId = 0;
			
 
				+  current_pillarId = atomicAdd(pillar_num, 1);
			
 
				+  if (current_pillarId > max_voxel_size - 1) return;
			
 
				+
			
 
				+  voxel_num[current_pillarId] = count;
			
 
				+
			
 
				+  uint3 idx = {0, voxel_idy, voxel_idx};
			
 
				+  ((uint3 *)voxel_idxs)[current_pillarId] = idx;
			
 
				+
			
 
				+  for (int i = 0; i < count; i++) {
			
 
				+    int inIndex = voxel_index * MAX_POINT_IN_VOXEL_SIZE + i;
			
 
				+    int outIndex = current_pillarId * MAX_POINT_IN_VOXEL_SIZE + i;
			
 
				+    ((float4 *)voxel_features)[outIndex] = ((float4 *)voxels)[inIndex];
			
 
				+  }
			
 
				+
			
 
				+  // clear buffer for next infer
			
 
				+  atomicExch(mask + voxel_index, 0);
			
 
				+}
			
 
				+
			
 
				+// create 4 channels
			
 
				+cudaError_t generateBaseFeatures_launch(
			
 
				+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
			
 
				+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
			
 
				+  cudaStream_t stream)
			
 
				+{
			
 
				+  dim3 threads = {32, 32};
			
 
				+  dim3 blocks = {
			
 
				+    (grid_x_size + threads.x - 1) / threads.x, (grid_y_size + threads.y - 1) / threads.y};
			
 
				+
			
 
				+  generateBaseFeatures_kernel<<<blocks, threads, 0, stream>>>(
			
 
				+    mask, voxels, grid_y_size, grid_x_size, max_voxel_size, pillar_num, voxel_features, voxel_num,
			
 
				+    voxel_idxs);
			
 
				+  cudaError_t err = cudaGetLastError();
			
 
				+  return err;
			
 
				+}
			
 
				+
			
 
				 __global__ void generateFeatures_kernel(
			
 
				   const float * voxel_features, const float * voxel_num_points, const int * coords,
			
 
				-  const std::size_t num_voxels, const float voxel_x, const float voxel_y, const float voxel_z,
			
 
				+  const unsigned int * num_voxels, const float voxel_x, const float voxel_y, const float voxel_z,
			
 
				   const float range_min_x, const float range_min_y, const float range_min_z, float * features)
			
 
				 {
			
 
				   // voxel_features (float): (max_voxel_size, max_point_in_voxel_size, point_feature_size)
			
@@ -53,7 +148,8 @@ __global__ void generateFeatures_kernel(
 
				   int point_idx = threadIdx.x % MAX_POINT_IN_VOXEL_SIZE;
			
 
				   int pillar_idx_inBlock = threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;  // max_point_in_voxel_size
			
 
				 
			
 
				-  if (pillar_idx >= num_voxels) return;
			
 
				+  unsigned int num_pillars = num_voxels[0];
			
 
				+  if (pillar_idx >= num_pillars) return;
			
 
				 
			
 
				   // load src
			
 
				   __shared__ float4 pillarSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE];
			
@@ -144,7 +240,7 @@ __global__ void generateFeatures_kernel(
 
				 // cspell: ignore divup
			
 
				 cudaError_t generateFeatures_launch(
			
 
				   const float * voxel_features, const float * voxel_num_points, const int * coords,
			
 
				-  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
			
 
				+  const unsigned int * num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
			
 
				   const float voxel_size_y, const float voxel_size_z, const float range_min_x,
			
 
				   const float range_min_y, const float range_min_z, float * features, cudaStream_t stream)
			
 
				 {
			
--- a/src/detection/detection_lidar_centerpoint/lib/preprocess/voxel_generator.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/preprocess/voxel_generator.cpp
@@ -54,87 +54,130 @@ bool VoxelGeneratorTemplate::enqueuePointCloud(
 
				 //}
			
 
				 
			
 
				 
			
 
				-std::size_t VoxelGenerator::pointsToVoxels(
			
 
				-  std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				-  std::vector<float> & num_points_per_voxel)
			
 
				+std::size_t VoxelGenerator::generateSweepPoints(std::vector<float> & points)
			
 
				 {
			
 
				-  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
			
 
				-  // coordinates (int): (max_voxel_size * point_dim_size)
			
 
				-  // num_points_per_voxel (float): (max_voxel_size)
			
 
				-
			
 
				-  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
			
 
				-  std::vector<int> coord_to_voxel_idx(grid_size, -1);
			
 
				-
			
 
				-  std::size_t voxel_cnt = 0;  // @return
			
 
				-  std::vector<float> point;
			
 
				-  point.resize(config_.point_feature_size_);
			
 
				-  std::vector<float> coord_zyx;
			
 
				-  coord_zyx.resize(config_.point_dim_size_);
			
 
				-  bool out_of_range;
			
 
				-  std::size_t point_cnt;
			
 
				-  int c, coord_idx, voxel_idx;
			
 
				   Eigen::Vector3f point_current, point_past;
			
 
				-
			
 
				+  size_t point_counter{};
			
 
				   for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
			
 
				        pc_cache_iter++) {
			
 
				-    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
			
 
				+    auto pc_msg = pc_cache_iter->pc_ptr;
			
 
				+    auto affine_past2current =
			
 
				+      pd_ptr_->getAffineWorldToCurrent();// * pc_cache_iter->affine_past2world;   //Do not use last point
			
 
				+//    float time_lag = static_cast<float>(
			
 
				+//      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_msg.header.stamp).seconds());
			
 
				+
			
 
				+    float time_lag = 0;
			
 
				 
			
 
				+    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
			
 
				     int nsize = pc_ptr->points.size();
			
 
				     int i;
			
 
				 
			
 
				     for (i=0;i<nsize;i++) {
			
 
				+        points.at(point_counter * config_.point_feature_size_) = pc_ptr->at(i)._PointXYZI::x;//   point_current.x();
			
 
				+        points.at(point_counter * config_.point_feature_size_ + 1) = pc_ptr->at(i)._PointXYZI::y;// point_current.y();
			
 
				+        points.at(point_counter * config_.point_feature_size_ + 2) = pc_ptr->at(i)._PointXYZI::z;// point_current.z();
			
 
				+        points.at(point_counter * config_.point_feature_size_ + 3) = time_lag;
			
 
				+        ++point_counter;
			
 
				+    }
			
 
				+
			
 
				+//    for (sensor_msgs::PointCloud2ConstIterator<float> x_iter(pc_msg, "x"), y_iter(pc_msg, "y"),
			
 
				+//         z_iter(pc_msg, "z");
			
 
				+//         x_iter != x_iter.end(); ++x_iter, ++y_iter, ++z_iter) {
			
 
				+//      point_past << *x_iter, *y_iter, *z_iter;
			
 
				+//      point_current = affine_past2current * point_past;
			
 
				 
			
 
				+//      points.at(point_counter * config_.point_feature_size_) = point_current.x();
			
 
				+//      points.at(point_counter * config_.point_feature_size_ + 1) = point_current.y();
			
 
				+//      points.at(point_counter * config_.point_feature_size_ + 2) = point_current.z();
			
 
				+//      points.at(point_counter * config_.point_feature_size_ + 3) = time_lag;
			
 
				+//      ++point_counter;
			
 
				 
			
 
				-      point[0] = pc_ptr->points.at(i).x;
			
 
				-      point[1] = pc_ptr->points.at(i).y;
			
 
				-      point[2] = pc_ptr->points.at(i).z;
			
 
				-      point[3] = 0;
			
 
				-
			
 
				-      out_of_range = false;
			
 
				-      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
			
 
				-        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
			
 
				-        if (c < 0 || c >= grid_size_[di]) {
			
 
				-          out_of_range = true;
			
 
				-          break;
			
 
				-        }
			
 
				-        coord_zyx[config_.point_dim_size_ - di - 1] = c;
			
 
				-      }
			
 
				-      if (out_of_range) {
			
 
				-        continue;
			
 
				-      }
			
 
				-
			
 
				-      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
			
 
				-                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
			
 
				-      voxel_idx = coord_to_voxel_idx[coord_idx];
			
 
				-      if (voxel_idx == -1) {
			
 
				-        voxel_idx = voxel_cnt;
			
 
				-        if (voxel_cnt >= config_.max_voxel_size_) {
			
 
				-          continue;
			
 
				-        }
			
 
				-
			
 
				-        voxel_cnt++;
			
 
				-        coord_to_voxel_idx[coord_idx] = voxel_idx;
			
 
				-        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
			
 
				-          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      point_cnt = num_points_per_voxel[voxel_idx];
			
 
				-      if (point_cnt < config_.max_point_in_voxel_size_) {
			
 
				-        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
			
 
				-          voxels
			
 
				-            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
			
 
				-             point_cnt * config_.point_feature_size_ + fi] = point[fi];
			
 
				-        }
			
 
				-        num_points_per_voxel[voxel_idx]++;
			
 
				-      }
			
 
				-    }
			
 
				   }
			
 
				-
			
 
				-  return voxel_cnt;
			
 
				+  return point_counter;
			
 
				 }
			
 
				 
			
 
				 
			
 
				+//std::size_t VoxelGenerator::pointsToVoxels(
			
 
				+//  std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				+//  std::vector<float> & num_points_per_voxel)
			
 
				+//{
			
 
				+//  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
			
 
				+//  // coordinates (int): (max_voxel_size * point_dim_size)
			
 
				+//  // num_points_per_voxel (float): (max_voxel_size)
			
 
				+
			
 
				+//  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
			
 
				+//  std::vector<int> coord_to_voxel_idx(grid_size, -1);
			
 
				+
			
 
				+//  std::size_t voxel_cnt = 0;  // @return
			
 
				+//  std::vector<float> point;
			
 
				+//  point.resize(config_.point_feature_size_);
			
 
				+//  std::vector<float> coord_zyx;
			
 
				+//  coord_zyx.resize(config_.point_dim_size_);
			
 
				+//  bool out_of_range;
			
 
				+//  std::size_t point_cnt;
			
 
				+//  int c, coord_idx, voxel_idx;
			
 
				+//  Eigen::Vector3f point_current, point_past;
			
 
				+
			
 
				+//  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
			
 
				+//       pc_cache_iter++) {
			
 
				+//    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
			
 
				+
			
 
				+//    int nsize = pc_ptr->points.size();
			
 
				+//    int i;
			
 
				+
			
 
				+//    for (i=0;i<nsize;i++) {
			
 
				+
			
 
				+
			
 
				+//      point[0] = pc_ptr->points.at(i).x;
			
 
				+//      point[1] = pc_ptr->points.at(i).y;
			
 
				+//      point[2] = pc_ptr->points.at(i).z;
			
 
				+//      point[3] = 0;
			
 
				+
			
 
				+//      out_of_range = false;
			
 
				+//      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
			
 
				+//        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
			
 
				+//        if (c < 0 || c >= grid_size_[di]) {
			
 
				+//          out_of_range = true;
			
 
				+//          break;
			
 
				+//        }
			
 
				+//        coord_zyx[config_.point_dim_size_ - di - 1] = c;
			
 
				+//      }
			
 
				+//      if (out_of_range) {
			
 
				+//        continue;
			
 
				+//      }
			
 
				+
			
 
				+//      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
			
 
				+//                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
			
 
				+//      voxel_idx = coord_to_voxel_idx[coord_idx];
			
 
				+//      if (voxel_idx == -1) {
			
 
				+//        voxel_idx = voxel_cnt;
			
 
				+//        if (voxel_cnt >= config_.max_voxel_size_) {
			
 
				+//          continue;
			
 
				+//        }
			
 
				+
			
 
				+//        voxel_cnt++;
			
 
				+//        coord_to_voxel_idx[coord_idx] = voxel_idx;
			
 
				+//        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
			
 
				+//          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
			
 
				+//        }
			
 
				+//      }
			
 
				+
			
 
				+//      point_cnt = num_points_per_voxel[voxel_idx];
			
 
				+//      if (point_cnt < config_.max_point_in_voxel_size_) {
			
 
				+//        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
			
 
				+//          voxels
			
 
				+//            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
			
 
				+//             point_cnt * config_.point_feature_size_ + fi] = point[fi];
			
 
				+//        }
			
 
				+//        num_points_per_voxel[voxel_idx]++;
			
 
				+//      }
			
 
				+//    }
			
 
				+//  }
			
 
				+
			
 
				+//  return voxel_cnt;
			
 
				+//}
			
 
				+
			
 
				+
			
 
				 //std::size_t VoxelGenerator::pointsToVoxels(
			
 
				 //  std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				 //  std::vector<float> & num_points_per_voxel)
			
--- a/src/detection/detection_lidar_centerpoint/main.cpp
+++ b/src/detection/detection_lidar_centerpoint/main.cpp
@@ -1,6 +1,6 @@
 
				 #include <QCoreApplication>
			
 
				 
			
 
				-
			
 
				+#include <yaml-cpp/yaml.h>
			
 
				 
			
 
				 #include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				 #include <lidar_centerpoint/centerpoint_trt.hpp>
			
@@ -16,12 +16,19 @@ using namespace centerpoint;
 
				 
			
 
				 std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
			
 
				 
			
 
				+void LoadYaml(std::string stryamlname,std::vector<int64_t> & allow_remapping_by_area_matrix
			
 
				+              )
			
 
				+{
			
 
				+
			
 
				+}
			
 
				+
			
 
				 void init()
			
 
				 {
			
 
				     const float score_threshold = 0.35;
			
 
				-    const float circle_nms_dist_threshold =1.5;
			
 
				+    const float circle_nms_dist_threshold =0.5;
			
 
				   //    static_cast<float>(this->declare_parameter<double>("circle_nms_dist_threshold"));
			
 
				     std::vector<double> yaw_norm_thresholds ;
			
 
				+    yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.0);
			
 
				     const std::string densification_world_frame_id = "map";
			
 
				     const int densification_num_past_frames = 1;
			
 
				     const std::string trt_precision = "fp16";
			
@@ -119,9 +126,22 @@ void ListenPointCloud(const char * strdata,const unsigned int nSize,const unsign
 
				 
			
 
				 }
			
 
				 
			
 
				-void testdet()
			
 
				+
			
 
				+#include <pcl/io/pcd_io.h>
			
 
				+void testdet(std::string & path)
			
 
				 {
			
 
				+    pcl::PointCloud<pcl::PointXYZI>::Ptr point_cloud(
			
 
				+                new pcl::PointCloud<pcl::PointXYZI>());
			
 
				 
			
 
				+    pcl::io::loadPCDFile<pcl::PointXYZI>(path,*point_cloud);
			
 
				+
			
 
				+    std::vector<Box3D> det_boxes3d;
			
 
				+    int i;
			
 
				+    for(i=0;i<10;i++)
			
 
				+    {
			
 
				+    detector_ptr_ ->detect(point_cloud,det_boxes3d);
			
 
				+    std::cout<<" box size: "<<det_boxes3d.size()<<std::endl;
			
 
				+    }
			
 
				 }
			
 
				 
			
 
				 
			
@@ -130,6 +150,9 @@ int main(int argc, char *argv[])
 
				     QCoreApplication a(argc, argv);
			
 
				     init();
			
 
				 
			
 
				+    std::string path = "/home/nvidia/1.pcd";
			
 
				+    testdet(path);
			
 
				+
			
 
				     gpa = iv::modulecomm::RegisterRecv("lidarpc_center",ListenPointCloud);
			
 
				 
			
 
				     return a.exec();