10 months ago · 2ad138b83e
--- a/src/detection/detection_lidar_centerpoint/detection_lidar_centerpoint.pro
+++ b/src/detection/detection_lidar_centerpoint/detection_lidar_centerpoint.pro
@@ -70,6 +70,10 @@ LIBS += -lrt -ldl -lnvinfer -lcudnn  -lcudart -lnvparsers -lnvonnxparser -lnvinf
 
															     error( "Couldn't find the ivprotobuf.pri file!" )
														
 
															 }
														
 
															+!include(../../../include/ivyaml-cpp.pri ) {
														
 
															+    error( "Couldn't find the ivyaml-cpp.pri file!" )
														
 
															+}
														
 
															+
														
 
															 INCLUDEPATH += $$PWD/../../include/msgtype
														
@@ -174,3 +178,25 @@ HEADERS += \
 
															     ../../include/msgtype/object.pb.h \
														
 
															     ../../include/msgtype/objectarray.pb.h
														
 
															+
														
 
															+
														
 
															+
														
 
															+unix:LIBS +=  -lpcl_common\
														
 
															+        -lpcl_features\
														
 
															+        -lpcl_filters\
														
 
															+        -lpcl_io\
														
 
															+        -lpcl_io_ply\
														
 
															+        -lpcl_kdtree\
														
 
															+        -lpcl_keypoints\
														
 
															+        -lpcl_octree\
														
 
															+        -lpcl_outofcore\
														
 
															+        -lpcl_people\
														
 
															+        -lpcl_recognition\
														
 
															+        -lpcl_registration\
														
 
															+        -lpcl_sample_consensus\
														
 
															+        -lpcl_search\
														
 
															+        -lpcl_segmentation\
														
 
															+        -lpcl_surface\
														
 
															+        -lpcl_tracking\
														
 
															+        -lpcl_visualization
														
 
															+
														
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
@@ -32,6 +32,7 @@
 
															 namespace centerpoint
														
 
															 {
														
 
															+static constexpr size_t CAPACITY_POINT = 1000000;
														
 
															 class NetworkParam
														
 
															 {
														
 
															 public:
														
@@ -88,14 +89,16 @@ protected:
 
															   std::unique_ptr<PostProcessCUDA> post_proc_ptr_{nullptr};
														
 
															   cudaStream_t stream_{nullptr};
														
 
															+
														
 
															   std::size_t class_size_{0};
														
 
															   CenterPointConfig config_;
														
 
															-  std::size_t num_voxels_{0};
														
 
															   std::size_t encoder_in_feature_size_{0};
														
 
															   std::size_t spatial_features_size_{0};
														
 
															-  std::vector<float> voxels_;
														
 
															-  std::vector<int> coordinates_;
														
 
															-  std::vector<float> num_points_per_voxel_;
														
 
															+  std::size_t voxels_buffer_size_{0};
														
 
															+  std::size_t mask_size_{0};
														
 
															+  std::size_t voxels_size_{0};
														
 
															+  std::size_t coordinates_size_{0};
														
 
															+  std::vector<float> points_;
														
 
															   cuda::unique_ptr<float[]> voxels_d_{nullptr};
														
 
															   cuda::unique_ptr<int[]> coordinates_d_{nullptr};
														
 
															   cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
														
@@ -108,6 +111,31 @@ protected:
 
															   cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
														
 
															   cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
														
 
															   cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
														
 
															+  cuda::unique_ptr<float[]> points_d_{nullptr};
														
 
															+  cuda::unique_ptr<float[]> voxels_buffer_d_{nullptr};
														
 
															+  cuda::unique_ptr<unsigned int[]> mask_d_{nullptr};
														
 
															+  cuda::unique_ptr<unsigned int[]> num_voxels_d_{nullptr};
														
 
															+
														
 
															+//  std::size_t class_size_{0};
														
 
															+//  CenterPointConfig config_;
														
 
															+//  std::size_t num_voxels_{0};
														
 
															+//  std::size_t encoder_in_feature_size_{0};
														
 
															+//  std::size_t spatial_features_size_{0};
														
 
															+//  std::vector<float> voxels_;
														
 
															+//  std::vector<int> coordinates_;
														
 
															+//  std::vector<float> num_points_per_voxel_;
														
 
															+//  cuda::unique_ptr<float[]> voxels_d_{nullptr};
														
 
															+//  cuda::unique_ptr<int[]> coordinates_d_{nullptr};
														
 
															+//  cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
														
 
															+//  cuda::unique_ptr<float[]> encoder_in_features_d_{nullptr};
														
 
															+//  cuda::unique_ptr<float[]> pillar_features_d_{nullptr};
														
 
															+//  cuda::unique_ptr<float[]> spatial_features_d_{nullptr};
														
 
															+//  cuda::unique_ptr<float[]> head_out_heatmap_d_{nullptr};
														
 
															+//  cuda::unique_ptr<float[]> head_out_offset_d_{nullptr};
														
 
															+//  cuda::unique_ptr<float[]> head_out_z_d_{nullptr};
														
 
															+//  cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
														
 
															+//  cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
														
 
															+//  cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
														
 
															 };
														
 
															 }  // namespace centerpoint
														
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
@@ -21,7 +21,7 @@
 
															 namespace centerpoint
														
 
															 {
														
 
															 cudaError_t scatterFeatures_launch(
														
 
															-  const float * pillar_features, const int * coords, const std::size_t num_pillars,
														
 
															+  const float * pillar_features, const int * coords, const unsigned int * num_pillars,
														
 
															   const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
														
 
															   const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
														
 
															   cudaStream_t stream);
														
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
@@ -52,6 +52,7 @@ private:
 
															 struct PointCloudWithTransform
														
 
															 {
														
 
															   pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr;
														
 
															+  Eigen::Affine3f affine_past2world;
														
 
															 };
														
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
@@ -20,9 +20,20 @@
 
															 namespace centerpoint
														
 
															 {
														
 
															+cudaError_t generateVoxels_random_launch(
														
 
															+  const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range,
														
 
															+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
														
 
															+  float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels,
														
 
															+  cudaStream_t stream);
														
 
															+
														
 
															+cudaError_t generateBaseFeatures_launch(
														
 
															+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
														
 
															+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
														
 
															+  cudaStream_t stream);
														
 
															+
														
 
															 cudaError_t generateFeatures_launch(
														
 
															   const float * voxel_features, const float * voxel_num_points, const int * coords,
														
 
															-  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
														
 
															+  const unsigned int * num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
														
 
															   const float voxel_size_y, const float voxel_size_z, const float range_min_x,
														
 
															   const float range_min_y, const float range_min_z, float * features, cudaStream_t stream);
														
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
@@ -31,9 +31,10 @@ public:
 
															   explicit VoxelGeneratorTemplate(
														
 
															     const DensificationParam & param, const CenterPointConfig & config);
														
 
															-  virtual std::size_t pointsToVoxels(
														
 
															-    std::vector<float> & voxels, std::vector<int> & coordinates,
														
 
															-    std::vector<float> & num_points_per_voxel) = 0;
														
 
															+    virtual std::size_t generateSweepPoints(std::vector<float> & points) = 0;
														
 
															+//  virtual std::size_t pointsToVoxels(
														
 
															+//    std::vector<float> & voxels, std::vector<int> & coordinates,
														
 
															+//    std::vector<float> & num_points_per_voxel) = 0;
														
 
															     bool enqueuePointCloud(
														
 
															       const pcl::PointCloud<pcl::PointXYZI>::Ptr);
														
@@ -55,9 +56,11 @@ class VoxelGenerator : public VoxelGeneratorTemplate
 
															 public:
														
 
															   using VoxelGeneratorTemplate::VoxelGeneratorTemplate;
														
 
															-  std::size_t pointsToVoxels(
														
 
															-    std::vector<float> & voxels, std::vector<int> & coordinates,
														
 
															-    std::vector<float> & num_points_per_voxel) override;
														
 
															+  std::size_t generateSweepPoints(std::vector<float> & points) override;
														
 
															+
														
 
															+//  std::size_t pointsToVoxels(
														
 
															+//    std::vector<float> & voxels, std::vector<int> & coordinates,
														
 
															+//    std::vector<float> & num_points_per_voxel) override;
														
 
															 };
														
 
															 }  // namespace centerpoint
														
--- a/src/detection/detection_lidar_centerpoint/lib/centerpoint_trt.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/centerpoint_trt.cpp
@@ -69,34 +69,40 @@ CenterPointTRT::~CenterPointTRT()
 
															 void CenterPointTRT::initPtr()
														
 
															 {
														
 
															-  const auto voxels_size =
														
 
															-    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
														
 
															-  const auto coordinates_size = config_.max_voxel_size_ * config_.point_dim_size_;
														
 
															-  encoder_in_feature_size_ =
														
 
															-    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_;
														
 
															-  const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_;
														
 
															-  spatial_features_size_ =
														
 
															-    config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_;
														
 
															-  const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_;
														
 
															-
														
 
															-  // host
														
 
															-  voxels_.resize(voxels_size);
														
 
															-  coordinates_.resize(coordinates_size);
														
 
															-  num_points_per_voxel_.resize(config_.max_voxel_size_);
														
 
															-
														
 
															-  // device
														
 
															-  voxels_d_ = cuda::make_unique<float[]>(voxels_size);
														
 
															-  coordinates_d_ = cuda::make_unique<int[]>(coordinates_size);
														
 
															-  num_points_per_voxel_d_ = cuda::make_unique<float[]>(config_.max_voxel_size_);
														
 
															-  encoder_in_features_d_ = cuda::make_unique<float[]>(encoder_in_feature_size_);
														
 
															-  pillar_features_d_ = cuda::make_unique<float[]>(pillar_features_size);
														
 
															-  spatial_features_d_ = cuda::make_unique<float[]>(spatial_features_size_);
														
 
															-  head_out_heatmap_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.class_size_);
														
 
															-  head_out_offset_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_offset_size_);
														
 
															-  head_out_z_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_z_size_);
														
 
															-  head_out_dim_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_dim_size_);
														
 
															-  head_out_rot_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_rot_size_);
														
 
															-  head_out_vel_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_vel_size_);
														
 
															+    voxels_size_ =
														
 
															+      config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
														
 
															+    coordinates_size_ = config_.max_voxel_size_ * config_.point_dim_size_;
														
 
															+    encoder_in_feature_size_ =
														
 
															+      config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_;
														
 
															+    const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_;
														
 
															+    spatial_features_size_ =
														
 
															+      config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_;
														
 
															+    const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_;
														
 
															+
														
 
															+    voxels_buffer_size_ = config_.grid_size_x_ * config_.grid_size_y_ *
														
 
															+                          config_.max_point_in_voxel_size_ * config_.point_feature_size_;
														
 
															+    mask_size_ = config_.grid_size_x_ * config_.grid_size_y_;
														
 
															+
														
 
															+    // host
														
 
															+    points_.resize(CAPACITY_POINT * config_.point_feature_size_);
														
 
															+
														
 
															+    // device
														
 
															+    voxels_d_ = cuda::make_unique<float[]>(voxels_size_);
														
 
															+    coordinates_d_ = cuda::make_unique<int[]>(coordinates_size_);
														
 
															+    num_points_per_voxel_d_ = cuda::make_unique<float[]>(config_.max_voxel_size_);
														
 
															+    encoder_in_features_d_ = cuda::make_unique<float[]>(encoder_in_feature_size_);
														
 
															+    pillar_features_d_ = cuda::make_unique<float[]>(pillar_features_size);
														
 
															+    spatial_features_d_ = cuda::make_unique<float[]>(spatial_features_size_);
														
 
															+    head_out_heatmap_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.class_size_);
														
 
															+    head_out_offset_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_offset_size_);
														
 
															+    head_out_z_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_z_size_);
														
 
															+    head_out_dim_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_dim_size_);
														
 
															+    head_out_rot_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_rot_size_);
														
 
															+    head_out_vel_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_vel_size_);
														
 
															+    points_d_ = cuda::make_unique<float[]>(CAPACITY_POINT * config_.point_feature_size_);
														
 
															+    voxels_buffer_d_ = cuda::make_unique<float[]>(voxels_buffer_size_);
														
 
															+    mask_d_ = cuda::make_unique<unsigned int[]>(mask_size_);
														
 
															+    num_voxels_d_ = cuda::make_unique<unsigned int[]>(1);
														
 
															 }
														
 
															 bool CenterPointTRT::detect(
														
@@ -105,13 +111,13 @@ bool CenterPointTRT::detect(
 
															 {
														
 
															     int64_t time1 = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
														
 
															-    std::fill(voxels_.begin(), voxels_.end(), 0);
														
 
															-    std::fill(coordinates_.begin(), coordinates_.end(), -1);
														
 
															-    std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
														
 
															-    CHECK_CUDA_ERROR(cudaMemsetAsync(
														
 
															-      encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
														
 
															-    CHECK_CUDA_ERROR(
														
 
															-      cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
														
 
															+//    std::fill(voxels_.begin(), voxels_.end(), 0);
														
 
															+//    std::fill(coordinates_.begin(), coordinates_.end(), -1);
														
 
															+//    std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
														
 
															+//    CHECK_CUDA_ERROR(cudaMemsetAsync(
														
 
															+//      encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
														
 
															+//    CHECK_CUDA_ERROR(
														
 
															+//      cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
														
 
															     if (!preprocess(pc_ptr)) {
														
 
															         std::cout<<"Fail to preprocess and skip to detect."<<std::endl;
														
@@ -162,27 +168,33 @@ bool CenterPointTRT::preprocess(
 
															     if (!is_success) {
														
 
															       return false;
														
 
															     }
														
 
															-    num_voxels_ = vg_ptr_->pointsToVoxels(voxels_, coordinates_, num_points_per_voxel_);
														
 
															-    if (num_voxels_ == 0) {
														
 
															-      return false;
														
 
															-    }
														
 
															-
														
 
															-    const auto voxels_size =
														
 
															-      num_voxels_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
														
 
															-    const auto coordinates_size = num_voxels_ * config_.point_dim_size_;
														
 
															-    // memcpy from host to device (not copy empty voxels)
														
 
															-    CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															-      voxels_d_.get(), voxels_.data(), voxels_size * sizeof(float), cudaMemcpyHostToDevice));
														
 
															-    CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															-      coordinates_d_.get(), coordinates_.data(), coordinates_size * sizeof(int),
														
 
															-      cudaMemcpyHostToDevice));
														
 
															+    const auto count = vg_ptr_->generateSweepPoints(points_);
														
 
															     CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															-      num_points_per_voxel_d_.get(), num_points_per_voxel_.data(), num_voxels_ * sizeof(float),
														
 
															-      cudaMemcpyHostToDevice));
														
 
															-    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+      points_d_.get(), points_.data(), count * config_.point_feature_size_ * sizeof(float),
														
 
															+      cudaMemcpyHostToDevice, stream_));
														
 
															+    CHECK_CUDA_ERROR(cudaMemsetAsync(num_voxels_d_.get(), 0, sizeof(unsigned int), stream_));
														
 
															+    CHECK_CUDA_ERROR(
														
 
															+      cudaMemsetAsync(voxels_buffer_d_.get(), 0, voxels_buffer_size_ * sizeof(float), stream_));
														
 
															+    CHECK_CUDA_ERROR(cudaMemsetAsync(mask_d_.get(), 0, mask_size_ * sizeof(int), stream_));
														
 
															+    CHECK_CUDA_ERROR(cudaMemsetAsync(voxels_d_.get(), 0, voxels_size_ * sizeof(float), stream_));
														
 
															+    CHECK_CUDA_ERROR(
														
 
															+      cudaMemsetAsync(coordinates_d_.get(), 0, coordinates_size_ * sizeof(int), stream_));
														
 
															+    CHECK_CUDA_ERROR(cudaMemsetAsync(
														
 
															+      num_points_per_voxel_d_.get(), 0, config_.max_voxel_size_ * sizeof(float), stream_));
														
 
															+
														
 
															+    CHECK_CUDA_ERROR(generateVoxels_random_launch(
														
 
															+      points_d_.get(), count, config_.range_min_x_, config_.range_max_x_, config_.range_min_y_,
														
 
															+      config_.range_max_y_, config_.range_min_z_, config_.range_max_z_, config_.voxel_size_x_,
														
 
															+      config_.voxel_size_y_, config_.voxel_size_z_, config_.grid_size_y_, config_.grid_size_x_,
														
 
															+      mask_d_.get(), voxels_buffer_d_.get(), stream_));
														
 
															+
														
 
															+    CHECK_CUDA_ERROR(generateBaseFeatures_launch(
														
 
															+      mask_d_.get(), voxels_buffer_d_.get(), config_.grid_size_y_, config_.grid_size_x_,
														
 
															+      config_.max_voxel_size_, num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(),
														
 
															+      coordinates_d_.get(), stream_));
														
 
															     CHECK_CUDA_ERROR(generateFeatures_launch(
														
 
															-      voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_,
														
 
															+      voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_d_.get(),
														
 
															       config_.max_voxel_size_, config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_,
														
 
															       config_.range_min_x_, config_.range_min_y_, config_.range_min_z_, encoder_in_features_d_.get(),
														
 
															       stream_));
														
@@ -227,26 +239,26 @@ bool CenterPointTRT::preprocess(
 
															 void CenterPointTRT::inference()
														
 
															 {
														
 
															-  if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) {
														
 
															-    throw std::runtime_error("Failed to create tensorrt context.");
														
 
															-  }
														
 
															+    if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) {
														
 
															+      throw std::runtime_error("Failed to create tensorrt context.");
														
 
															+    }
														
 
															-  // pillar encoder network
														
 
															-  std::vector<void *> encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()};
														
 
															-  encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr);
														
 
															-
														
 
															-  // scatter
														
 
															-  CHECK_CUDA_ERROR(scatterFeatures_launch(
														
 
															-    pillar_features_d_.get(), coordinates_d_.get(), num_voxels_, config_.max_voxel_size_,
														
 
															-    config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_,
														
 
															-    spatial_features_d_.get(), stream_));
														
 
															-
														
 
															-  // head network
														
 
															-  std::vector<void *> head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(),
														
 
															-                                      head_out_offset_d_.get(),  head_out_z_d_.get(),
														
 
															-                                      head_out_dim_d_.get(),     head_out_rot_d_.get(),
														
 
															-                                      head_out_vel_d_.get()};
														
 
															-  head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr);
														
 
															+    // pillar encoder network
														
 
															+    std::vector<void *> encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()};
														
 
															+    encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr);
														
 
															+
														
 
															+    // scatter
														
 
															+    CHECK_CUDA_ERROR(scatterFeatures_launch(
														
 
															+      pillar_features_d_.get(), coordinates_d_.get(), num_voxels_d_.get(), config_.max_voxel_size_,
														
 
															+      config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_,
														
 
															+      spatial_features_d_.get(), stream_));
														
 
															+
														
 
															+    // head network
														
 
															+    std::vector<void *> head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(),
														
 
															+                                        head_out_offset_d_.get(),  head_out_z_d_.get(),
														
 
															+                                        head_out_dim_d_.get(),     head_out_rot_d_.get(),
														
 
															+                                        head_out_vel_d_.get()};
														
 
															+    head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr);
														
 
															 }
														
 
															 void CenterPointTRT::postProcess(std::vector<Box3D> & det_boxes3d)
														
--- a/src/detection/detection_lidar_centerpoint/lib/network/network_trt.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/network/network_trt.cpp
@@ -59,6 +59,14 @@ bool HeadTRT::setProfile(
 
															   for (std::size_t ci = 0; ci < out_channel_sizes_.size(); ci++) {
														
 
															     auto out_name = network.getOutput(ci)->getName();
														
 
															+
														
 
															+    if (
														
 
															+      out_name == std::string("heatmap") &&
														
 
															+      network.getOutput(ci)->getDimensions().d[1] != static_cast<int32_t>(out_channel_sizes_[ci])) {
														
 
															+      std::cout
														
 
															+        << "Expected and actual number of classes do not match" << std::endl;
														
 
															+      return false;
														
 
															+    }
														
 
															     auto out_dims = nvinfer1::Dims4(
														
 
															       config_.batch_size_, out_channel_sizes_[ci], config_.down_grid_size_y_,
														
 
															       config_.down_grid_size_x_);
														
--- a/src/detection/detection_lidar_centerpoint/lib/network/scatter_kernel.cu
+++ b/src/detection/detection_lidar_centerpoint/lib/network/scatter_kernel.cu
@@ -24,7 +24,7 @@ const std::size_t THREADS_PER_BLOCK = 32;
 
															 namespace centerpoint
														
 
															 {
														
 
															 __global__ void scatterFeatures_kernel(
														
 
															-  const float * pillar_features, const int * coords, const std::size_t num_pillars,
														
 
															+  const float * pillar_features, const int * coords, const unsigned int * num_pillars,
														
 
															   const std::size_t pillar_feature_size, const std::size_t grid_size_x,
														
 
															   const std::size_t grid_size_y, float * scattered_features)
														
 
															 {
														
@@ -34,7 +34,7 @@ __global__ void scatterFeatures_kernel(
 
															   const auto pillar_i = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
														
 
															   const auto feature_i = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
														
 
															-  if (pillar_i >= num_pillars || feature_i >= pillar_feature_size) {
														
 
															+  if (pillar_i >= num_pillars[0] || feature_i >= pillar_feature_size) {
														
 
															     return;
														
 
															   }
														
@@ -50,7 +50,7 @@ __global__ void scatterFeatures_kernel(
 
															 // cspell: ignore divup
														
 
															 cudaError_t scatterFeatures_launch(
														
 
															-  const float * pillar_features, const int * coords, const std::size_t num_pillars,
														
 
															+  const float * pillar_features, const int * coords, const unsigned int * num_pillars,
														
 
															   const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
														
 
															   const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
														
 
															   cudaStream_t stream)
														
--- a/src/detection/detection_lidar_centerpoint/lib/network/tensorrt_wrapper.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/network/tensorrt_wrapper.cpp
@@ -22,7 +22,9 @@
 
															 namespace centerpoint
														
 
															 {
														
 
															-TensorRTWrapper::TensorRTWrapper(const CenterPointConfig & config) : config_(config) {}
														
 
															+TensorRTWrapper::TensorRTWrapper(const CenterPointConfig & config) : config_(config)
														
 
															+{
														
 
															+}
														
 
															 TensorRTWrapper::~TensorRTWrapper()
														
 
															 {
														
@@ -38,7 +40,7 @@ bool TensorRTWrapper::init(
 
															   runtime_ =
														
 
															     tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
														
 
															   if (!runtime_) {
														
 
															-    std::cout << "Fail to create runtime" << std::endl;
														
 
															+    std::cout << "Failed to create runtime" << std::endl;
														
 
															     return false;
														
 
															   }
														
@@ -47,7 +49,12 @@ bool TensorRTWrapper::init(
 
															   if (engine_file.is_open()) {
														
 
															     success = loadEngine(engine_path);
														
 
															   } else {
														
 
															+      std::cout<<"Applying optimizations and building TRT CUDA engine. Please wait"<<std::endl;
														
 
															+//    auto log_thread = logger_.log_throttle(
														
 
															+//      nvinfer1::ILogger::Severity::kINFO,
														
 
															+//      "Applying optimizations and building TRT CUDA engine. Please wait a minutes...", 5);
														
 
															     success = parseONNX(onnx_path, engine_path, precision);
														
 
															+//    logger_.stop_throttle(log_thread);
														
 
															   }
														
 
															   success &= createContext();
														
@@ -57,14 +64,15 @@ bool TensorRTWrapper::init(
 
															 bool TensorRTWrapper::createContext()
														
 
															 {
														
 
															   if (!engine_) {
														
 
															-    std::cout << "Fail to create context: Engine isn't created" << std::endl;
														
 
															+    std::cout
														
 
															+      << "Failed to create context: Engine was not created" << std::endl;
														
 
															     return false;
														
 
															   }
														
 
															   context_ =
														
 
															     tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
														
 
															   if (!context_) {
														
 
															-    std::cout << "Fail to create context" << std::endl;
														
 
															+    std::cout << "Failed to create context" << std::endl;
														
 
															     return false;
														
 
															   }
														
@@ -78,14 +86,14 @@ bool TensorRTWrapper::parseONNX(
 
															   auto builder =
														
 
															     tensorrt_common::TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
														
 
															   if (!builder) {
														
 
															-    std::cout << "Fail to create builder" << std::endl;
														
 
															+    std::cout << "Failed to create builder" << std::endl;
														
 
															     return false;
														
 
															   }
														
 
															   auto config =
														
 
															     tensorrt_common::TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
														
 
															   if (!config) {
														
 
															-    std::cout << "Fail to create config" << std::endl;
														
 
															+    std::cout << "Failed to create config" << std::endl;
														
 
															     return false;
														
 
															   }
														
 
															 #if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
														
@@ -95,10 +103,11 @@ bool TensorRTWrapper::parseONNX(
 
															 #endif
														
 
															   if (precision == "fp16") {
														
 
															     if (builder->platformHasFastFp16()) {
														
 
															-      std::cout << "use TensorRT FP16 Inference" << std::endl;
														
 
															+      std::cout << "Using TensorRT FP16 Inference" << std::endl;
														
 
															       config->setFlag(nvinfer1::BuilderFlag::kFP16);
														
 
															     } else {
														
 
															-      std::cout << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
														
 
															+       std::cout
														
 
															+        << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
														
 
															     }
														
 
															   }
														
@@ -107,7 +116,7 @@ bool TensorRTWrapper::parseONNX(
 
															   auto network =
														
 
															     tensorrt_common::TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
														
 
															   if (!network) {
														
 
															-    std::cout << "Fail to create network" << std::endl;
														
 
															+    std::cout << "Failed to create network" << std::endl;
														
 
															     return false;
														
 
															   }
														
@@ -116,22 +125,20 @@ bool TensorRTWrapper::parseONNX(
 
															   parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR));
														
 
															   if (!setProfile(*builder, *network, *config)) {
														
 
															-    std::cout << "Fail to set profile" << std::endl;
														
 
															+    std::cout << "Failed to set profile" << std::endl;
														
 
															     return false;
														
 
															   }
														
 
															-  std::cout << "Applying optimizations and building TRT CUDA engine (" << onnx_path << ") ..."
														
 
															-            << std::endl;
														
 
															   plan_ = tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory>(
														
 
															     builder->buildSerializedNetwork(*network, *config));
														
 
															   if (!plan_) {
														
 
															-    std::cout << "Fail to create serialized network" << std::endl;
														
 
															+    std::cout << "Failed to create serialized network" << std::endl;
														
 
															     return false;
														
 
															   }
														
 
															   engine_ = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(
														
 
															     runtime_->deserializeCudaEngine(plan_->data(), plan_->size()));
														
 
															   if (!engine_) {
														
 
															-    std::cout << "Fail to create engine" << std::endl;
														
 
															+    std::cout << "Failed to create engine" << std::endl;
														
 
															     return false;
														
 
															   }
														
--- a/src/detection/detection_lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp
@@ -14,10 +14,9 @@
 
															 #include "lidar_centerpoint/postprocess/non_maximum_suppression.hpp"
														
 
															-#include "perception_utils/geometry.hpp"
														
 
															-#include "perception_utils/perception_utils.hpp"
														
 
															-//#include "tier4_autoware_utils/tier4_autoware_utils.hpp"
														
 
															-
														
 
															+#include "object_recognition_utils/geometry.hpp"
														
 
															+#include "object_recognition_utils/object_recognition_utils.hpp"
														
 
															+#include "tier4_autoware_utils/geometry/geometry.hpp"
														
 
															 namespace centerpoint
														
 
															 {
														
@@ -41,8 +40,8 @@ bool NonMaximumSuppression::isTargetLabel(const uint8_t label)
 
															 bool NonMaximumSuppression::isTargetPairObject(
														
 
															   const DetectedObject & object1, const DetectedObject & object2)
														
 
															 {
														
 
															-  const auto label1 = perception_utils::getHighestProbLabel(object1.classification);
														
 
															-  const auto label2 = perception_utils::getHighestProbLabel(object2.classification);
														
 
															+  const auto label1 = object_recognition_utils::getHighestProbLabel(object1.classification);
														
 
															+  const auto label2 = object_recognition_utils::getHighestProbLabel(object2.classification);
														
 
															   if (isTargetLabel(label1) && isTargetLabel(label2)) {
														
 
															     return true;
														
@@ -50,7 +49,7 @@ bool NonMaximumSuppression::isTargetPairObject(
 
															   const auto search_sqr_dist_2d = params_.search_distance_2d_ * params_.search_distance_2d_;
														
 
															   const auto sqr_dist_2d = tier4_autoware_utils::calcSquaredDistance2d(
														
 
															-    perception_utils::getPose(object1), perception_utils::getPose(object2));
														
 
															+    object_recognition_utils::getPose(object1), object_recognition_utils::getPose(object2));
														
 
															   return sqr_dist_2d <= search_sqr_dist_2d;
														
 
															 }
														
@@ -69,7 +68,7 @@ Eigen::MatrixXd NonMaximumSuppression::generateIoUMatrix(
 
															       }
														
 
															       if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
														
 
															-        const double iou = perception_utils::get2dIoU(target_obj, source_obj);
														
 
															+        const double iou = object_recognition_utils::get2dIoU(target_obj, source_obj);
														
 
															         triangular_matrix(target_i, source_i) = iou;
														
 
															         // NOTE: If the target object has any objects with iou > iou_threshold, it
														
 
															         // will be suppressed regardless of later results.
														
--- a/src/detection/detection_lidar_centerpoint/lib/postprocess/postprocess_kernel.cu
+++ b/src/detection/detection_lidar_centerpoint/lib/postprocess/postprocess_kernel.cu
@@ -48,7 +48,10 @@ struct score_greater
 
															   __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; }
														
 
															 };
														
 
															-__device__ inline float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); }
														
 
															+__device__ inline float sigmoid(float x)
														
 
															+{
														
 
															+  return 1.0f / (1.0f + expf(-x));
														
 
															+}
														
 
															 __global__ void generateBoxes3D_kernel(
														
 
															   const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
														
--- a/src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp
@@ -96,7 +96,8 @@ bool PointCloudDensification::enqueuePointCloud(
 
															 void PointCloudDensification::enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
														
 
															 {
														
 
															-    PointCloudWithTransform pointcloud = {pc_ptr};
														
 
															+ //   affine_world2current_ = affine_world2current;
														
 
															+    PointCloudWithTransform pointcloud = {pc_ptr,affine_world2current_};
														
 
															     pointcloud_cache_.push_front(pointcloud);
														
 
															 }
														
 
															 //void PointCloudDensification::enqueue(
														
--- a/src/detection/detection_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
+++ b/src/detection/detection_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
@@ -41,9 +41,104 @@ const std::size_t ENCODER_IN_FEATURE_SIZE = 9;  // the same as encoder_in_featur
 
															 namespace centerpoint
														
 
															 {
														
 
															+__global__ void generateVoxels_random_kernel(
														
 
															+  const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range,
														
 
															+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
														
 
															+  float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels)
														
 
															+{
														
 
															+  int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
														
 
															+  if (point_idx >= points_size) return;
														
 
															+
														
 
															+  float4 point = ((float4 *)points)[point_idx];
														
 
															+
														
 
															+  if (
														
 
															+    point.x < min_x_range || point.x >= max_x_range || point.y < min_y_range ||
														
 
															+    point.y >= max_y_range || point.z < min_z_range || point.z >= max_z_range)
														
 
															+    return;
														
 
															+
														
 
															+  int voxel_idx = floorf((point.x - min_x_range) / pillar_x_size);
														
 
															+  int voxel_idy = floorf((point.y - min_y_range) / pillar_y_size);
														
 
															+  unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
														
 
															+
														
 
															+  unsigned int point_id = atomicAdd(&(mask[voxel_index]), 1);
														
 
															+
														
 
															+  if (point_id >= MAX_POINT_IN_VOXEL_SIZE) return;
														
 
															+  float * address = voxels + (voxel_index * MAX_POINT_IN_VOXEL_SIZE + point_id) * 4;
														
 
															+  atomicExch(address + 0, point.x);
														
 
															+  atomicExch(address + 1, point.y);
														
 
															+  atomicExch(address + 2, point.z);
														
 
															+  atomicExch(address + 3, point.w);
														
 
															+}
														
 
															+
														
 
															+cudaError_t generateVoxels_random_launch(
														
 
															+  const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range,
														
 
															+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
														
 
															+  float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels,
														
 
															+  cudaStream_t stream)
														
 
															+{
														
 
															+  dim3 blocks((points_size + 256 - 1) / 256);
														
 
															+  dim3 threads(256);
														
 
															+  generateVoxels_random_kernel<<<blocks, threads, 0, stream>>>(
														
 
															+    points, points_size, min_x_range, max_x_range, min_y_range, max_y_range, min_z_range,
														
 
															+    max_z_range, pillar_x_size, pillar_y_size, pillar_z_size, grid_y_size, grid_x_size, mask,
														
 
															+    voxels);
														
 
															+  cudaError_t err = cudaGetLastError();
														
 
															+  return err;
														
 
															+}
														
 
															+
														
 
															+__global__ void generateBaseFeatures_kernel(
														
 
															+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
														
 
															+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs)
														
 
															+{
														
 
															+  unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x;
														
 
															+  unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y;
														
 
															+
														
 
															+  if (voxel_idx >= grid_x_size || voxel_idy >= grid_y_size) return;
														
 
															+
														
 
															+  unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
														
 
															+  unsigned int count = mask[voxel_index];
														
 
															+  if (!(count > 0)) return;
														
 
															+  count = count < MAX_POINT_IN_VOXEL_SIZE ? count : MAX_POINT_IN_VOXEL_SIZE;
														
 
															+
														
 
															+  unsigned int current_pillarId = 0;
														
 
															+  current_pillarId = atomicAdd(pillar_num, 1);
														
 
															+  if (current_pillarId > max_voxel_size - 1) return;
														
 
															+
														
 
															+  voxel_num[current_pillarId] = count;
														
 
															+
														
 
															+  uint3 idx = {0, voxel_idy, voxel_idx};
														
 
															+  ((uint3 *)voxel_idxs)[current_pillarId] = idx;
														
 
															+
														
 
															+  for (int i = 0; i < count; i++) {
														
 
															+    int inIndex = voxel_index * MAX_POINT_IN_VOXEL_SIZE + i;
														
 
															+    int outIndex = current_pillarId * MAX_POINT_IN_VOXEL_SIZE + i;
														
 
															+    ((float4 *)voxel_features)[outIndex] = ((float4 *)voxels)[inIndex];
														
 
															+  }
														
 
															+
														
 
															+  // clear buffer for next infer
														
 
															+  atomicExch(mask + voxel_index, 0);
														
 
															+}
														
 
															+
														
 
															+// create 4 channels
														
 
															+cudaError_t generateBaseFeatures_launch(
														
 
															+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
														
 
															+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
														
 
															+  cudaStream_t stream)
														
 
															+{
														
 
															+  dim3 threads = {32, 32};
														
 
															+  dim3 blocks = {
														
 
															+    (grid_x_size + threads.x - 1) / threads.x, (grid_y_size + threads.y - 1) / threads.y};
														
 
															+
														
 
															+  generateBaseFeatures_kernel<<<blocks, threads, 0, stream>>>(
														
 
															+    mask, voxels, grid_y_size, grid_x_size, max_voxel_size, pillar_num, voxel_features, voxel_num,
														
 
															+    voxel_idxs);
														
 
															+  cudaError_t err = cudaGetLastError();
														
 
															+  return err;
														
 
															+}
														
 
															+
														
 
															 __global__ void generateFeatures_kernel(
														
 
															   const float * voxel_features, const float * voxel_num_points, const int * coords,
														
 
															-  const std::size_t num_voxels, const float voxel_x, const float voxel_y, const float voxel_z,
														
 
															+  const unsigned int * num_voxels, const float voxel_x, const float voxel_y, const float voxel_z,
														
 
															   const float range_min_x, const float range_min_y, const float range_min_z, float * features)
														
 
															 {
														
 
															   // voxel_features (float): (max_voxel_size, max_point_in_voxel_size, point_feature_size)
														
@@ -53,7 +148,8 @@ __global__ void generateFeatures_kernel(
 
															   int point_idx = threadIdx.x % MAX_POINT_IN_VOXEL_SIZE;
														
 
															   int pillar_idx_inBlock = threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;  // max_point_in_voxel_size
														
 
															-  if (pillar_idx >= num_voxels) return;
														
 
															+  unsigned int num_pillars = num_voxels[0];
														
 
															+  if (pillar_idx >= num_pillars) return;
														
 
															   // load src
														
 
															   __shared__ float4 pillarSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE];
														
@@ -144,7 +240,7 @@ __global__ void generateFeatures_kernel(
 
															 // cspell: ignore divup
														
 
															 cudaError_t generateFeatures_launch(
														
 
															   const float * voxel_features, const float * voxel_num_points, const int * coords,
														
 
															-  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
														
 
															+  const unsigned int * num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
														
 
															   const float voxel_size_y, const float voxel_size_z, const float range_min_x,
														
 
															   const float range_min_y, const float range_min_z, float * features, cudaStream_t stream)
														
 
															 {
														
--- a/src/detection/detection_lidar_centerpoint/lib/preprocess/voxel_generator.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/preprocess/voxel_generator.cpp
@@ -54,87 +54,130 @@ bool VoxelGeneratorTemplate::enqueuePointCloud(
 
															 //}
														
 
															-std::size_t VoxelGenerator::pointsToVoxels(
														
 
															-  std::vector<float> & voxels, std::vector<int> & coordinates,
														
 
															-  std::vector<float> & num_points_per_voxel)
														
 
															+std::size_t VoxelGenerator::generateSweepPoints(std::vector<float> & points)
														
 
															 {
														
 
															-  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
														
 
															-  // coordinates (int): (max_voxel_size * point_dim_size)
														
 
															-  // num_points_per_voxel (float): (max_voxel_size)
														
 
															-
														
 
															-  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
														
 
															-  std::vector<int> coord_to_voxel_idx(grid_size, -1);
														
 
															-
														
 
															-  std::size_t voxel_cnt = 0;  // @return
														
 
															-  std::vector<float> point;
														
 
															-  point.resize(config_.point_feature_size_);
														
 
															-  std::vector<float> coord_zyx;
														
 
															-  coord_zyx.resize(config_.point_dim_size_);
														
 
															-  bool out_of_range;
														
 
															-  std::size_t point_cnt;
														
 
															-  int c, coord_idx, voxel_idx;
														
 
															   Eigen::Vector3f point_current, point_past;
														
 
															-
														
 
															+  size_t point_counter{};
														
 
															   for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
														
 
															        pc_cache_iter++) {
														
 
															-    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
														
 
															+    auto pc_msg = pc_cache_iter->pc_ptr;
														
 
															+    auto affine_past2current =
														
 
															+      pd_ptr_->getAffineWorldToCurrent();// * pc_cache_iter->affine_past2world;   //Do not use last point
														
 
															+//    float time_lag = static_cast<float>(
														
 
															+//      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_msg.header.stamp).seconds());
														
 
															+
														
 
															+    float time_lag = 0;
														
 
															+    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
														
 
															     int nsize = pc_ptr->points.size();
														
 
															     int i;
														
 
															     for (i=0;i<nsize;i++) {
														
 
															+        points.at(point_counter * config_.point_feature_size_) = pc_ptr->at(i)._PointXYZI::x;//   point_current.x();
														
 
															+        points.at(point_counter * config_.point_feature_size_ + 1) = pc_ptr->at(i)._PointXYZI::y;// point_current.y();
														
 
															+        points.at(point_counter * config_.point_feature_size_ + 2) = pc_ptr->at(i)._PointXYZI::z;// point_current.z();
														
 
															+        points.at(point_counter * config_.point_feature_size_ + 3) = time_lag;
														
 
															+        ++point_counter;
														
 
															+    }
														
 
															+
														
 
															+//    for (sensor_msgs::PointCloud2ConstIterator<float> x_iter(pc_msg, "x"), y_iter(pc_msg, "y"),
														
 
															+//         z_iter(pc_msg, "z");
														
 
															+//         x_iter != x_iter.end(); ++x_iter, ++y_iter, ++z_iter) {
														
 
															+//      point_past << *x_iter, *y_iter, *z_iter;
														
 
															+//      point_current = affine_past2current * point_past;
														
 
															+//      points.at(point_counter * config_.point_feature_size_) = point_current.x();
														
 
															+//      points.at(point_counter * config_.point_feature_size_ + 1) = point_current.y();
														
 
															+//      points.at(point_counter * config_.point_feature_size_ + 2) = point_current.z();
														
 
															+//      points.at(point_counter * config_.point_feature_size_ + 3) = time_lag;
														
 
															+//      ++point_counter;
														
 
															-      point[0] = pc_ptr->points.at(i).x;
														
 
															-      point[1] = pc_ptr->points.at(i).y;
														
 
															-      point[2] = pc_ptr->points.at(i).z;
														
 
															-      point[3] = 0;
														
 
															-
														
 
															-      out_of_range = false;
														
 
															-      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
														
 
															-        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
														
 
															-        if (c < 0 || c >= grid_size_[di]) {
														
 
															-          out_of_range = true;
														
 
															-          break;
														
 
															-        }
														
 
															-        coord_zyx[config_.point_dim_size_ - di - 1] = c;
														
 
															-      }
														
 
															-      if (out_of_range) {
														
 
															-        continue;
														
 
															-      }
														
 
															-
														
 
															-      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
														
 
															-                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
														
 
															-      voxel_idx = coord_to_voxel_idx[coord_idx];
														
 
															-      if (voxel_idx == -1) {
														
 
															-        voxel_idx = voxel_cnt;
														
 
															-        if (voxel_cnt >= config_.max_voxel_size_) {
														
 
															-          continue;
														
 
															-        }
														
 
															-
														
 
															-        voxel_cnt++;
														
 
															-        coord_to_voxel_idx[coord_idx] = voxel_idx;
														
 
															-        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
														
 
															-          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
														
 
															-        }
														
 
															-      }
														
 
															-
														
 
															-      point_cnt = num_points_per_voxel[voxel_idx];
														
 
															-      if (point_cnt < config_.max_point_in_voxel_size_) {
														
 
															-        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
														
 
															-          voxels
														
 
															-            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
														
 
															-             point_cnt * config_.point_feature_size_ + fi] = point[fi];
														
 
															-        }
														
 
															-        num_points_per_voxel[voxel_idx]++;
														
 
															-      }
														
 
															-    }
														
 
															   }
														
 
															-
														
 
															-  return voxel_cnt;
														
 
															+  return point_counter;
														
 
															 }
														
 
															+//std::size_t VoxelGenerator::pointsToVoxels(
														
 
															+//  std::vector<float> & voxels, std::vector<int> & coordinates,
														
 
															+//  std::vector<float> & num_points_per_voxel)
														
 
															+//{
														
 
															+//  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
														
 
															+//  // coordinates (int): (max_voxel_size * point_dim_size)
														
 
															+//  // num_points_per_voxel (float): (max_voxel_size)
														
 
															+
														
 
															+//  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
														
 
															+//  std::vector<int> coord_to_voxel_idx(grid_size, -1);
														
 
															+
														
 
															+//  std::size_t voxel_cnt = 0;  // @return
														
 
															+//  std::vector<float> point;
														
 
															+//  point.resize(config_.point_feature_size_);
														
 
															+//  std::vector<float> coord_zyx;
														
 
															+//  coord_zyx.resize(config_.point_dim_size_);
														
 
															+//  bool out_of_range;
														
 
															+//  std::size_t point_cnt;
														
 
															+//  int c, coord_idx, voxel_idx;
														
 
															+//  Eigen::Vector3f point_current, point_past;
														
 
															+
														
 
															+//  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
														
 
															+//       pc_cache_iter++) {
														
 
															+//    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
														
 
															+
														
 
															+//    int nsize = pc_ptr->points.size();
														
 
															+//    int i;
														
 
															+
														
 
															+//    for (i=0;i<nsize;i++) {
														
 
															+
														
 
															+
														
 
															+//      point[0] = pc_ptr->points.at(i).x;
														
 
															+//      point[1] = pc_ptr->points.at(i).y;
														
 
															+//      point[2] = pc_ptr->points.at(i).z;
														
 
															+//      point[3] = 0;
														
 
															+
														
 
															+//      out_of_range = false;
														
 
															+//      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
														
 
															+//        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
														
 
															+//        if (c < 0 || c >= grid_size_[di]) {
														
 
															+//          out_of_range = true;
														
 
															+//          break;
														
 
															+//        }
														
 
															+//        coord_zyx[config_.point_dim_size_ - di - 1] = c;
														
 
															+//      }
														
 
															+//      if (out_of_range) {
														
 
															+//        continue;
														
 
															+//      }
														
 
															+
														
 
															+//      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
														
 
															+//                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
														
 
															+//      voxel_idx = coord_to_voxel_idx[coord_idx];
														
 
															+//      if (voxel_idx == -1) {
														
 
															+//        voxel_idx = voxel_cnt;
														
 
															+//        if (voxel_cnt >= config_.max_voxel_size_) {
														
 
															+//          continue;
														
 
															+//        }
														
 
															+
														
 
															+//        voxel_cnt++;
														
 
															+//        coord_to_voxel_idx[coord_idx] = voxel_idx;
														
 
															+//        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
														
 
															+//          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
														
 
															+//        }
														
 
															+//      }
														
 
															+
														
 
															+//      point_cnt = num_points_per_voxel[voxel_idx];
														
 
															+//      if (point_cnt < config_.max_point_in_voxel_size_) {
														
 
															+//        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
														
 
															+//          voxels
														
 
															+//            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
														
 
															+//             point_cnt * config_.point_feature_size_ + fi] = point[fi];
														
 
															+//        }
														
 
															+//        num_points_per_voxel[voxel_idx]++;
														
 
															+//      }
														
 
															+//    }
														
 
															+//  }
														
 
															+
														
 
															+//  return voxel_cnt;
														
 
															+//}
														
 
															+
														
 
															+
														
 
															 //std::size_t VoxelGenerator::pointsToVoxels(
														
 
															 //  std::vector<float> & voxels, std::vector<int> & coordinates,
														
 
															 //  std::vector<float> & num_points_per_voxel)
														
--- a/src/detection/detection_lidar_centerpoint/main.cpp
+++ b/src/detection/detection_lidar_centerpoint/main.cpp
@@ -1,6 +1,6 @@
 
															 #include <QCoreApplication>
														
 
															-
														
 
															+#include <yaml-cpp/yaml.h>
														
 
															 #include <lidar_centerpoint/centerpoint_config.hpp>
														
 
															 #include <lidar_centerpoint/centerpoint_trt.hpp>
														
@@ -16,12 +16,19 @@ using namespace centerpoint;
 
															 std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
														
 
															+void LoadYaml(std::string stryamlname,std::vector<int64_t> & allow_remapping_by_area_matrix
														
 
															+              )
														
 
															+{
														
 
															+
														
 
															+}
														
 
															+
														
 
															 void init()
														
 
															 {
														
 
															     const float score_threshold = 0.35;
														
 
															-    const float circle_nms_dist_threshold =1.5;
														
 
															+    const float circle_nms_dist_threshold =0.5;
														
 
															   //    static_cast<float>(this->declare_parameter<double>("circle_nms_dist_threshold"));
														
 
															     std::vector<double> yaw_norm_thresholds ;
														
 
															+    yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.0);
														
 
															     const std::string densification_world_frame_id = "map";
														
 
															     const int densification_num_past_frames = 1;
														
 
															     const std::string trt_precision = "fp16";
														
@@ -119,9 +126,22 @@ void ListenPointCloud(const char * strdata,const unsigned int nSize,const unsign
 
															 }
														
 
															-void testdet()
														
 
															+
														
 
															+#include <pcl/io/pcd_io.h>
														
 
															+void testdet(std::string & path)
														
 
															 {
														
 
															+    pcl::PointCloud<pcl::PointXYZI>::Ptr point_cloud(
														
 
															+                new pcl::PointCloud<pcl::PointXYZI>());
														
 
															+    pcl::io::loadPCDFile<pcl::PointXYZI>(path,*point_cloud);
														
 
															+
														
 
															+    std::vector<Box3D> det_boxes3d;
														
 
															+    int i;
														
 
															+    for(i=0;i<10;i++)
														
 
															+    {
														
 
															+    detector_ptr_ ->detect(point_cloud,det_boxes3d);
														
 
															+    std::cout<<" box size: "<<det_boxes3d.size()<<std::endl;
														
 
															+    }
														
 
															 }
														
@@ -130,6 +150,9 @@ int main(int argc, char *argv[])
 
															     QCoreApplication a(argc, argv);
														
 
															     init();
														
 
															+    std::string path = "/home/nvidia/1.pcd";
														
 
															+    testdet(path);
														
 
															+
														
 
															     gpa = iv::modulecomm::RegisterRecv("lidarpc_center",ListenPointCloud);
														
 
															     return a.exec();