Browse Source

change detection_lidar_centerpoint, test run ok, not complete.

yuchuli 10 months ago
parent
commit
2ad138b83e
16 changed files with 444 additions and 183 deletions
  1. 26 0
      src/detection/detection_lidar_centerpoint/detection_lidar_centerpoint.pro
  2. 32 4
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
  3. 1 1
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
  4. 1 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
  5. 12 1
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
  6. 9 6
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
  7. 84 72
      src/detection/detection_lidar_centerpoint/lib/centerpoint_trt.cpp
  8. 8 0
      src/detection/detection_lidar_centerpoint/lib/network/network_trt.cpp
  9. 3 3
      src/detection/detection_lidar_centerpoint/lib/network/scatter_kernel.cu
  10. 21 14
      src/detection/detection_lidar_centerpoint/lib/network/tensorrt_wrapper.cpp
  11. 7 8
      src/detection/detection_lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp
  12. 4 1
      src/detection/detection_lidar_centerpoint/lib/postprocess/postprocess_kernel.cu
  13. 2 1
      src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp
  14. 99 3
      src/detection/detection_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
  15. 109 66
      src/detection/detection_lidar_centerpoint/lib/preprocess/voxel_generator.cpp
  16. 26 3
      src/detection/detection_lidar_centerpoint/main.cpp

+ 26 - 0
src/detection/detection_lidar_centerpoint/detection_lidar_centerpoint.pro

@@ -70,6 +70,10 @@ LIBS += -lrt -ldl -lnvinfer -lcudnn  -lcudart -lnvparsers -lnvonnxparser -lnvinf
     error( "Couldn't find the ivprotobuf.pri file!" )
 }
 
+!include(../../../include/ivyaml-cpp.pri ) {
+    error( "Couldn't find the ivyaml-cpp.pri file!" )
+}
+
 INCLUDEPATH += $$PWD/../../include/msgtype
 
 
@@ -174,3 +178,25 @@ HEADERS += \
     ../../include/msgtype/object.pb.h \
     ../../include/msgtype/objectarray.pb.h
 
+
+
+
+unix:LIBS +=  -lpcl_common\
+        -lpcl_features\
+        -lpcl_filters\
+        -lpcl_io\
+        -lpcl_io_ply\
+        -lpcl_kdtree\
+        -lpcl_keypoints\
+        -lpcl_octree\
+        -lpcl_outofcore\
+        -lpcl_people\
+        -lpcl_recognition\
+        -lpcl_registration\
+        -lpcl_sample_consensus\
+        -lpcl_search\
+        -lpcl_segmentation\
+        -lpcl_surface\
+        -lpcl_tracking\
+        -lpcl_visualization
+

+ 32 - 4
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp

@@ -32,6 +32,7 @@
 
 namespace centerpoint
 {
+static constexpr size_t CAPACITY_POINT = 1000000;
 class NetworkParam
 {
 public:
@@ -88,14 +89,16 @@ protected:
   std::unique_ptr<PostProcessCUDA> post_proc_ptr_{nullptr};
   cudaStream_t stream_{nullptr};
 
+
   std::size_t class_size_{0};
   CenterPointConfig config_;
-  std::size_t num_voxels_{0};
   std::size_t encoder_in_feature_size_{0};
   std::size_t spatial_features_size_{0};
-  std::vector<float> voxels_;
-  std::vector<int> coordinates_;
-  std::vector<float> num_points_per_voxel_;
+  std::size_t voxels_buffer_size_{0};
+  std::size_t mask_size_{0};
+  std::size_t voxels_size_{0};
+  std::size_t coordinates_size_{0};
+  std::vector<float> points_;
   cuda::unique_ptr<float[]> voxels_d_{nullptr};
   cuda::unique_ptr<int[]> coordinates_d_{nullptr};
   cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
@@ -108,6 +111,31 @@ protected:
   cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
   cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
   cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
+  cuda::unique_ptr<float[]> points_d_{nullptr};
+  cuda::unique_ptr<float[]> voxels_buffer_d_{nullptr};
+  cuda::unique_ptr<unsigned int[]> mask_d_{nullptr};
+  cuda::unique_ptr<unsigned int[]> num_voxels_d_{nullptr};
+
+//  std::size_t class_size_{0};
+//  CenterPointConfig config_;
+//  std::size_t num_voxels_{0};
+//  std::size_t encoder_in_feature_size_{0};
+//  std::size_t spatial_features_size_{0};
+//  std::vector<float> voxels_;
+//  std::vector<int> coordinates_;
+//  std::vector<float> num_points_per_voxel_;
+//  cuda::unique_ptr<float[]> voxels_d_{nullptr};
+//  cuda::unique_ptr<int[]> coordinates_d_{nullptr};
+//  cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
+//  cuda::unique_ptr<float[]> encoder_in_features_d_{nullptr};
+//  cuda::unique_ptr<float[]> pillar_features_d_{nullptr};
+//  cuda::unique_ptr<float[]> spatial_features_d_{nullptr};
+//  cuda::unique_ptr<float[]> head_out_heatmap_d_{nullptr};
+//  cuda::unique_ptr<float[]> head_out_offset_d_{nullptr};
+//  cuda::unique_ptr<float[]> head_out_z_d_{nullptr};
+//  cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
+//  cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
+//  cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
 };
 
 }  // namespace centerpoint

+ 1 - 1
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp

@@ -21,7 +21,7 @@
 namespace centerpoint
 {
 cudaError_t scatterFeatures_launch(
-  const float * pillar_features, const int * coords, const std::size_t num_pillars,
+  const float * pillar_features, const int * coords, const unsigned int * num_pillars,
   const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
   const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
   cudaStream_t stream);

+ 1 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp

@@ -52,6 +52,7 @@ private:
 struct PointCloudWithTransform
 {
   pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr;
+  Eigen::Affine3f affine_past2world;
 };
 
 

+ 12 - 1
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp

@@ -20,9 +20,20 @@
 
 namespace centerpoint
 {
+cudaError_t generateVoxels_random_launch(
+  const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range,
+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
+  float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels,
+  cudaStream_t stream);
+
+cudaError_t generateBaseFeatures_launch(
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
+  cudaStream_t stream);
+
 cudaError_t generateFeatures_launch(
   const float * voxel_features, const float * voxel_num_points, const int * coords,
-  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
+  const unsigned int * num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
   const float voxel_size_y, const float voxel_size_z, const float range_min_x,
   const float range_min_y, const float range_min_z, float * features, cudaStream_t stream);
 

+ 9 - 6
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp

@@ -31,9 +31,10 @@ public:
   explicit VoxelGeneratorTemplate(
     const DensificationParam & param, const CenterPointConfig & config);
 
-  virtual std::size_t pointsToVoxels(
-    std::vector<float> & voxels, std::vector<int> & coordinates,
-    std::vector<float> & num_points_per_voxel) = 0;
+    virtual std::size_t generateSweepPoints(std::vector<float> & points) = 0;
+//  virtual std::size_t pointsToVoxels(
+//    std::vector<float> & voxels, std::vector<int> & coordinates,
+//    std::vector<float> & num_points_per_voxel) = 0;
 
     bool enqueuePointCloud(
       const pcl::PointCloud<pcl::PointXYZI>::Ptr);
@@ -55,9 +56,11 @@ class VoxelGenerator : public VoxelGeneratorTemplate
 public:
   using VoxelGeneratorTemplate::VoxelGeneratorTemplate;
 
-  std::size_t pointsToVoxels(
-    std::vector<float> & voxels, std::vector<int> & coordinates,
-    std::vector<float> & num_points_per_voxel) override;
+  std::size_t generateSweepPoints(std::vector<float> & points) override;
+
+//  std::size_t pointsToVoxels(
+//    std::vector<float> & voxels, std::vector<int> & coordinates,
+//    std::vector<float> & num_points_per_voxel) override;
 };
 
 }  // namespace centerpoint

+ 84 - 72
src/detection/detection_lidar_centerpoint/lib/centerpoint_trt.cpp

@@ -69,34 +69,40 @@ CenterPointTRT::~CenterPointTRT()
 
 void CenterPointTRT::initPtr()
 {
-  const auto voxels_size =
-    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
-  const auto coordinates_size = config_.max_voxel_size_ * config_.point_dim_size_;
-  encoder_in_feature_size_ =
-    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_;
-  const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_;
-  spatial_features_size_ =
-    config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_;
-  const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_;
-
-  // host
-  voxels_.resize(voxels_size);
-  coordinates_.resize(coordinates_size);
-  num_points_per_voxel_.resize(config_.max_voxel_size_);
-
-  // device
-  voxels_d_ = cuda::make_unique<float[]>(voxels_size);
-  coordinates_d_ = cuda::make_unique<int[]>(coordinates_size);
-  num_points_per_voxel_d_ = cuda::make_unique<float[]>(config_.max_voxel_size_);
-  encoder_in_features_d_ = cuda::make_unique<float[]>(encoder_in_feature_size_);
-  pillar_features_d_ = cuda::make_unique<float[]>(pillar_features_size);
-  spatial_features_d_ = cuda::make_unique<float[]>(spatial_features_size_);
-  head_out_heatmap_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.class_size_);
-  head_out_offset_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_offset_size_);
-  head_out_z_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_z_size_);
-  head_out_dim_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_dim_size_);
-  head_out_rot_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_rot_size_);
-  head_out_vel_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_vel_size_);
+    voxels_size_ =
+      config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
+    coordinates_size_ = config_.max_voxel_size_ * config_.point_dim_size_;
+    encoder_in_feature_size_ =
+      config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_;
+    const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_;
+    spatial_features_size_ =
+      config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_;
+    const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_;
+
+    voxels_buffer_size_ = config_.grid_size_x_ * config_.grid_size_y_ *
+                          config_.max_point_in_voxel_size_ * config_.point_feature_size_;
+    mask_size_ = config_.grid_size_x_ * config_.grid_size_y_;
+
+    // host
+    points_.resize(CAPACITY_POINT * config_.point_feature_size_);
+
+    // device
+    voxels_d_ = cuda::make_unique<float[]>(voxels_size_);
+    coordinates_d_ = cuda::make_unique<int[]>(coordinates_size_);
+    num_points_per_voxel_d_ = cuda::make_unique<float[]>(config_.max_voxel_size_);
+    encoder_in_features_d_ = cuda::make_unique<float[]>(encoder_in_feature_size_);
+    pillar_features_d_ = cuda::make_unique<float[]>(pillar_features_size);
+    spatial_features_d_ = cuda::make_unique<float[]>(spatial_features_size_);
+    head_out_heatmap_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.class_size_);
+    head_out_offset_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_offset_size_);
+    head_out_z_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_z_size_);
+    head_out_dim_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_dim_size_);
+    head_out_rot_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_rot_size_);
+    head_out_vel_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_vel_size_);
+    points_d_ = cuda::make_unique<float[]>(CAPACITY_POINT * config_.point_feature_size_);
+    voxels_buffer_d_ = cuda::make_unique<float[]>(voxels_buffer_size_);
+    mask_d_ = cuda::make_unique<unsigned int[]>(mask_size_);
+    num_voxels_d_ = cuda::make_unique<unsigned int[]>(1);
 }
 
 bool CenterPointTRT::detect(
@@ -105,13 +111,13 @@ bool CenterPointTRT::detect(
 {
 
     int64_t time1 = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
-    std::fill(voxels_.begin(), voxels_.end(), 0);
-    std::fill(coordinates_.begin(), coordinates_.end(), -1);
-    std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
-    CHECK_CUDA_ERROR(cudaMemsetAsync(
-      encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
-    CHECK_CUDA_ERROR(
-      cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
+//    std::fill(voxels_.begin(), voxels_.end(), 0);
+//    std::fill(coordinates_.begin(), coordinates_.end(), -1);
+//    std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
+//    CHECK_CUDA_ERROR(cudaMemsetAsync(
+//      encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
+//    CHECK_CUDA_ERROR(
+//      cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
 
     if (!preprocess(pc_ptr)) {
         std::cout<<"Fail to preprocess and skip to detect."<<std::endl;
@@ -162,27 +168,33 @@ bool CenterPointTRT::preprocess(
     if (!is_success) {
       return false;
     }
-    num_voxels_ = vg_ptr_->pointsToVoxels(voxels_, coordinates_, num_points_per_voxel_);
-    if (num_voxels_ == 0) {
-      return false;
-    }
-
-    const auto voxels_size =
-      num_voxels_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
-    const auto coordinates_size = num_voxels_ * config_.point_dim_size_;
-    // memcpy from host to device (not copy empty voxels)
-    CHECK_CUDA_ERROR(cudaMemcpyAsync(
-      voxels_d_.get(), voxels_.data(), voxels_size * sizeof(float), cudaMemcpyHostToDevice));
-    CHECK_CUDA_ERROR(cudaMemcpyAsync(
-      coordinates_d_.get(), coordinates_.data(), coordinates_size * sizeof(int),
-      cudaMemcpyHostToDevice));
+    const auto count = vg_ptr_->generateSweepPoints(points_);
     CHECK_CUDA_ERROR(cudaMemcpyAsync(
-      num_points_per_voxel_d_.get(), num_points_per_voxel_.data(), num_voxels_ * sizeof(float),
-      cudaMemcpyHostToDevice));
-    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+      points_d_.get(), points_.data(), count * config_.point_feature_size_ * sizeof(float),
+      cudaMemcpyHostToDevice, stream_));
+    CHECK_CUDA_ERROR(cudaMemsetAsync(num_voxels_d_.get(), 0, sizeof(unsigned int), stream_));
+    CHECK_CUDA_ERROR(
+      cudaMemsetAsync(voxels_buffer_d_.get(), 0, voxels_buffer_size_ * sizeof(float), stream_));
+    CHECK_CUDA_ERROR(cudaMemsetAsync(mask_d_.get(), 0, mask_size_ * sizeof(int), stream_));
+    CHECK_CUDA_ERROR(cudaMemsetAsync(voxels_d_.get(), 0, voxels_size_ * sizeof(float), stream_));
+    CHECK_CUDA_ERROR(
+      cudaMemsetAsync(coordinates_d_.get(), 0, coordinates_size_ * sizeof(int), stream_));
+    CHECK_CUDA_ERROR(cudaMemsetAsync(
+      num_points_per_voxel_d_.get(), 0, config_.max_voxel_size_ * sizeof(float), stream_));
+
+    CHECK_CUDA_ERROR(generateVoxels_random_launch(
+      points_d_.get(), count, config_.range_min_x_, config_.range_max_x_, config_.range_min_y_,
+      config_.range_max_y_, config_.range_min_z_, config_.range_max_z_, config_.voxel_size_x_,
+      config_.voxel_size_y_, config_.voxel_size_z_, config_.grid_size_y_, config_.grid_size_x_,
+      mask_d_.get(), voxels_buffer_d_.get(), stream_));
+
+    CHECK_CUDA_ERROR(generateBaseFeatures_launch(
+      mask_d_.get(), voxels_buffer_d_.get(), config_.grid_size_y_, config_.grid_size_x_,
+      config_.max_voxel_size_, num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(),
+      coordinates_d_.get(), stream_));
 
     CHECK_CUDA_ERROR(generateFeatures_launch(
-      voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_,
+      voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_d_.get(),
       config_.max_voxel_size_, config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_,
       config_.range_min_x_, config_.range_min_y_, config_.range_min_z_, encoder_in_features_d_.get(),
       stream_));
@@ -227,26 +239,26 @@ bool CenterPointTRT::preprocess(
 
 void CenterPointTRT::inference()
 {
-  if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) {
-    throw std::runtime_error("Failed to create tensorrt context.");
-  }
+    if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) {
+      throw std::runtime_error("Failed to create tensorrt context.");
+    }
 
-  // pillar encoder network
-  std::vector<void *> encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()};
-  encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr);
-
-  // scatter
-  CHECK_CUDA_ERROR(scatterFeatures_launch(
-    pillar_features_d_.get(), coordinates_d_.get(), num_voxels_, config_.max_voxel_size_,
-    config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_,
-    spatial_features_d_.get(), stream_));
-
-  // head network
-  std::vector<void *> head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(),
-                                      head_out_offset_d_.get(),  head_out_z_d_.get(),
-                                      head_out_dim_d_.get(),     head_out_rot_d_.get(),
-                                      head_out_vel_d_.get()};
-  head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr);
+    // pillar encoder network
+    std::vector<void *> encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()};
+    encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr);
+
+    // scatter
+    CHECK_CUDA_ERROR(scatterFeatures_launch(
+      pillar_features_d_.get(), coordinates_d_.get(), num_voxels_d_.get(), config_.max_voxel_size_,
+      config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_,
+      spatial_features_d_.get(), stream_));
+
+    // head network
+    std::vector<void *> head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(),
+                                        head_out_offset_d_.get(),  head_out_z_d_.get(),
+                                        head_out_dim_d_.get(),     head_out_rot_d_.get(),
+                                        head_out_vel_d_.get()};
+    head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr);
 }
 
 void CenterPointTRT::postProcess(std::vector<Box3D> & det_boxes3d)

+ 8 - 0
src/detection/detection_lidar_centerpoint/lib/network/network_trt.cpp

@@ -59,6 +59,14 @@ bool HeadTRT::setProfile(
 
   for (std::size_t ci = 0; ci < out_channel_sizes_.size(); ci++) {
     auto out_name = network.getOutput(ci)->getName();
+
+    if (
+      out_name == std::string("heatmap") &&
+      network.getOutput(ci)->getDimensions().d[1] != static_cast<int32_t>(out_channel_sizes_[ci])) {
+      std::cout
+        << "Expected and actual number of classes do not match" << std::endl;
+      return false;
+    }
     auto out_dims = nvinfer1::Dims4(
       config_.batch_size_, out_channel_sizes_[ci], config_.down_grid_size_y_,
       config_.down_grid_size_x_);

+ 3 - 3
src/detection/detection_lidar_centerpoint/lib/network/scatter_kernel.cu

@@ -24,7 +24,7 @@ const std::size_t THREADS_PER_BLOCK = 32;
 namespace centerpoint
 {
 __global__ void scatterFeatures_kernel(
-  const float * pillar_features, const int * coords, const std::size_t num_pillars,
+  const float * pillar_features, const int * coords, const unsigned int * num_pillars,
   const std::size_t pillar_feature_size, const std::size_t grid_size_x,
   const std::size_t grid_size_y, float * scattered_features)
 {
@@ -34,7 +34,7 @@ __global__ void scatterFeatures_kernel(
   const auto pillar_i = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
   const auto feature_i = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
 
-  if (pillar_i >= num_pillars || feature_i >= pillar_feature_size) {
+  if (pillar_i >= num_pillars[0] || feature_i >= pillar_feature_size) {
     return;
   }
 
@@ -50,7 +50,7 @@ __global__ void scatterFeatures_kernel(
 
 // cspell: ignore divup
 cudaError_t scatterFeatures_launch(
-  const float * pillar_features, const int * coords, const std::size_t num_pillars,
+  const float * pillar_features, const int * coords, const unsigned int * num_pillars,
   const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
   const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
   cudaStream_t stream)

+ 21 - 14
src/detection/detection_lidar_centerpoint/lib/network/tensorrt_wrapper.cpp

@@ -22,7 +22,9 @@
 
 namespace centerpoint
 {
-TensorRTWrapper::TensorRTWrapper(const CenterPointConfig & config) : config_(config) {}
+TensorRTWrapper::TensorRTWrapper(const CenterPointConfig & config) : config_(config)
+{
+}
 
 TensorRTWrapper::~TensorRTWrapper()
 {
@@ -38,7 +40,7 @@ bool TensorRTWrapper::init(
   runtime_ =
     tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
   if (!runtime_) {
-    std::cout << "Fail to create runtime" << std::endl;
+    std::cout << "Failed to create runtime" << std::endl;
     return false;
   }
 
@@ -47,7 +49,12 @@ bool TensorRTWrapper::init(
   if (engine_file.is_open()) {
     success = loadEngine(engine_path);
   } else {
+      std::cout<<"Applying optimizations and building TRT CUDA engine. Please wait"<<std::endl;
+//    auto log_thread = logger_.log_throttle(
+//      nvinfer1::ILogger::Severity::kINFO,
+//      "Applying optimizations and building TRT CUDA engine. Please wait a minutes...", 5);
     success = parseONNX(onnx_path, engine_path, precision);
+//    logger_.stop_throttle(log_thread);
   }
   success &= createContext();
 
@@ -57,14 +64,15 @@ bool TensorRTWrapper::init(
 bool TensorRTWrapper::createContext()
 {
   if (!engine_) {
-    std::cout << "Fail to create context: Engine isn't created" << std::endl;
+    std::cout
+      << "Failed to create context: Engine was not created" << std::endl;
     return false;
   }
 
   context_ =
     tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
   if (!context_) {
-    std::cout << "Fail to create context" << std::endl;
+    std::cout << "Failed to create context" << std::endl;
     return false;
   }
 
@@ -78,14 +86,14 @@ bool TensorRTWrapper::parseONNX(
   auto builder =
     tensorrt_common::TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
   if (!builder) {
-    std::cout << "Fail to create builder" << std::endl;
+    std::cout << "Failed to create builder" << std::endl;
     return false;
   }
 
   auto config =
     tensorrt_common::TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
   if (!config) {
-    std::cout << "Fail to create config" << std::endl;
+    std::cout << "Failed to create config" << std::endl;
     return false;
   }
 #if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
@@ -95,10 +103,11 @@ bool TensorRTWrapper::parseONNX(
 #endif
   if (precision == "fp16") {
     if (builder->platformHasFastFp16()) {
-      std::cout << "use TensorRT FP16 Inference" << std::endl;
+      std::cout << "Using TensorRT FP16 Inference" << std::endl;
       config->setFlag(nvinfer1::BuilderFlag::kFP16);
     } else {
-      std::cout << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
+       std::cout
+        << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
     }
   }
 
@@ -107,7 +116,7 @@ bool TensorRTWrapper::parseONNX(
   auto network =
     tensorrt_common::TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
   if (!network) {
-    std::cout << "Fail to create network" << std::endl;
+    std::cout << "Failed to create network" << std::endl;
     return false;
   }
 
@@ -116,22 +125,20 @@ bool TensorRTWrapper::parseONNX(
   parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR));
 
   if (!setProfile(*builder, *network, *config)) {
-    std::cout << "Fail to set profile" << std::endl;
+    std::cout << "Failed to set profile" << std::endl;
     return false;
   }
 
-  std::cout << "Applying optimizations and building TRT CUDA engine (" << onnx_path << ") ..."
-            << std::endl;
   plan_ = tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory>(
     builder->buildSerializedNetwork(*network, *config));
   if (!plan_) {
-    std::cout << "Fail to create serialized network" << std::endl;
+    std::cout << "Failed to create serialized network" << std::endl;
     return false;
   }
   engine_ = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(
     runtime_->deserializeCudaEngine(plan_->data(), plan_->size()));
   if (!engine_) {
-    std::cout << "Fail to create engine" << std::endl;
+    std::cout << "Failed to create engine" << std::endl;
     return false;
   }
 

+ 7 - 8
src/detection/detection_lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp

@@ -14,10 +14,9 @@
 
 #include "lidar_centerpoint/postprocess/non_maximum_suppression.hpp"
 
-#include "perception_utils/geometry.hpp"
-#include "perception_utils/perception_utils.hpp"
-//#include "tier4_autoware_utils/tier4_autoware_utils.hpp"
-
+#include "object_recognition_utils/geometry.hpp"
+#include "object_recognition_utils/object_recognition_utils.hpp"
+#include "tier4_autoware_utils/geometry/geometry.hpp"
 namespace centerpoint
 {
 
@@ -41,8 +40,8 @@ bool NonMaximumSuppression::isTargetLabel(const uint8_t label)
 bool NonMaximumSuppression::isTargetPairObject(
   const DetectedObject & object1, const DetectedObject & object2)
 {
-  const auto label1 = perception_utils::getHighestProbLabel(object1.classification);
-  const auto label2 = perception_utils::getHighestProbLabel(object2.classification);
+  const auto label1 = object_recognition_utils::getHighestProbLabel(object1.classification);
+  const auto label2 = object_recognition_utils::getHighestProbLabel(object2.classification);
 
   if (isTargetLabel(label1) && isTargetLabel(label2)) {
     return true;
@@ -50,7 +49,7 @@ bool NonMaximumSuppression::isTargetPairObject(
 
   const auto search_sqr_dist_2d = params_.search_distance_2d_ * params_.search_distance_2d_;
   const auto sqr_dist_2d = tier4_autoware_utils::calcSquaredDistance2d(
-    perception_utils::getPose(object1), perception_utils::getPose(object2));
+    object_recognition_utils::getPose(object1), object_recognition_utils::getPose(object2));
   return sqr_dist_2d <= search_sqr_dist_2d;
 }
 
@@ -69,7 +68,7 @@ Eigen::MatrixXd NonMaximumSuppression::generateIoUMatrix(
       }
 
       if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
-        const double iou = perception_utils::get2dIoU(target_obj, source_obj);
+        const double iou = object_recognition_utils::get2dIoU(target_obj, source_obj);
         triangular_matrix(target_i, source_i) = iou;
         // NOTE: If the target object has any objects with iou > iou_threshold, it
         // will be suppressed regardless of later results.

+ 4 - 1
src/detection/detection_lidar_centerpoint/lib/postprocess/postprocess_kernel.cu

@@ -48,7 +48,10 @@ struct score_greater
   __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; }
 };
 
-__device__ inline float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); }
+__device__ inline float sigmoid(float x)
+{
+  return 1.0f / (1.0f + expf(-x));
+}
 
 __global__ void generateBoxes3D_kernel(
   const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,

+ 2 - 1
src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp

@@ -96,7 +96,8 @@ bool PointCloudDensification::enqueuePointCloud(
 
 void PointCloudDensification::enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
 {
-    PointCloudWithTransform pointcloud = {pc_ptr};
+ //   affine_world2current_ = affine_world2current;
+    PointCloudWithTransform pointcloud = {pc_ptr,affine_world2current_};
     pointcloud_cache_.push_front(pointcloud);
 }
 //void PointCloudDensification::enqueue(

+ 99 - 3
src/detection/detection_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu

@@ -41,9 +41,104 @@ const std::size_t ENCODER_IN_FEATURE_SIZE = 9;  // the same as encoder_in_featur
 
 namespace centerpoint
 {
+__global__ void generateVoxels_random_kernel(
+  const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range,
+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
+  float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels)
+{
+  int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (point_idx >= points_size) return;
+
+  float4 point = ((float4 *)points)[point_idx];
+
+  if (
+    point.x < min_x_range || point.x >= max_x_range || point.y < min_y_range ||
+    point.y >= max_y_range || point.z < min_z_range || point.z >= max_z_range)
+    return;
+
+  int voxel_idx = floorf((point.x - min_x_range) / pillar_x_size);
+  int voxel_idy = floorf((point.y - min_y_range) / pillar_y_size);
+  unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
+
+  unsigned int point_id = atomicAdd(&(mask[voxel_index]), 1);
+
+  if (point_id >= MAX_POINT_IN_VOXEL_SIZE) return;
+  float * address = voxels + (voxel_index * MAX_POINT_IN_VOXEL_SIZE + point_id) * 4;
+  atomicExch(address + 0, point.x);
+  atomicExch(address + 1, point.y);
+  atomicExch(address + 2, point.z);
+  atomicExch(address + 3, point.w);
+}
+
+cudaError_t generateVoxels_random_launch(
+  const float * points, size_t points_size, float min_x_range, float max_x_range, float min_y_range,
+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
+  float pillar_z_size, int grid_y_size, int grid_x_size, unsigned int * mask, float * voxels,
+  cudaStream_t stream)
+{
+  dim3 blocks((points_size + 256 - 1) / 256);
+  dim3 threads(256);
+  generateVoxels_random_kernel<<<blocks, threads, 0, stream>>>(
+    points, points_size, min_x_range, max_x_range, min_y_range, max_y_range, min_z_range,
+    max_z_range, pillar_x_size, pillar_y_size, pillar_z_size, grid_y_size, grid_x_size, mask,
+    voxels);
+  cudaError_t err = cudaGetLastError();
+  return err;
+}
+
+__global__ void generateBaseFeatures_kernel(
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs)
+{
+  unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y;
+
+  if (voxel_idx >= grid_x_size || voxel_idy >= grid_y_size) return;
+
+  unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
+  unsigned int count = mask[voxel_index];
+  if (!(count > 0)) return;
+  count = count < MAX_POINT_IN_VOXEL_SIZE ? count : MAX_POINT_IN_VOXEL_SIZE;
+
+  unsigned int current_pillarId = 0;
+  current_pillarId = atomicAdd(pillar_num, 1);
+  if (current_pillarId > max_voxel_size - 1) return;
+
+  voxel_num[current_pillarId] = count;
+
+  uint3 idx = {0, voxel_idy, voxel_idx};
+  ((uint3 *)voxel_idxs)[current_pillarId] = idx;
+
+  for (int i = 0; i < count; i++) {
+    int inIndex = voxel_index * MAX_POINT_IN_VOXEL_SIZE + i;
+    int outIndex = current_pillarId * MAX_POINT_IN_VOXEL_SIZE + i;
+    ((float4 *)voxel_features)[outIndex] = ((float4 *)voxels)[inIndex];
+  }
+
+  // clear buffer for next infer
+  atomicExch(mask + voxel_index, 0);
+}
+
+// create 4 channels
+cudaError_t generateBaseFeatures_launch(
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
+  cudaStream_t stream)
+{
+  dim3 threads = {32, 32};
+  dim3 blocks = {
+    (grid_x_size + threads.x - 1) / threads.x, (grid_y_size + threads.y - 1) / threads.y};
+
+  generateBaseFeatures_kernel<<<blocks, threads, 0, stream>>>(
+    mask, voxels, grid_y_size, grid_x_size, max_voxel_size, pillar_num, voxel_features, voxel_num,
+    voxel_idxs);
+  cudaError_t err = cudaGetLastError();
+  return err;
+}
+
 __global__ void generateFeatures_kernel(
   const float * voxel_features, const float * voxel_num_points, const int * coords,
-  const std::size_t num_voxels, const float voxel_x, const float voxel_y, const float voxel_z,
+  const unsigned int * num_voxels, const float voxel_x, const float voxel_y, const float voxel_z,
   const float range_min_x, const float range_min_y, const float range_min_z, float * features)
 {
   // voxel_features (float): (max_voxel_size, max_point_in_voxel_size, point_feature_size)
@@ -53,7 +148,8 @@ __global__ void generateFeatures_kernel(
   int point_idx = threadIdx.x % MAX_POINT_IN_VOXEL_SIZE;
   int pillar_idx_inBlock = threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;  // max_point_in_voxel_size
 
-  if (pillar_idx >= num_voxels) return;
+  unsigned int num_pillars = num_voxels[0];
+  if (pillar_idx >= num_pillars) return;
 
   // load src
   __shared__ float4 pillarSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE];
@@ -144,7 +240,7 @@ __global__ void generateFeatures_kernel(
 // cspell: ignore divup
 cudaError_t generateFeatures_launch(
   const float * voxel_features, const float * voxel_num_points, const int * coords,
-  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
+  const unsigned int * num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
   const float voxel_size_y, const float voxel_size_z, const float range_min_x,
   const float range_min_y, const float range_min_z, float * features, cudaStream_t stream)
 {

+ 109 - 66
src/detection/detection_lidar_centerpoint/lib/preprocess/voxel_generator.cpp

@@ -54,87 +54,130 @@ bool VoxelGeneratorTemplate::enqueuePointCloud(
 //}
 
 
-std::size_t VoxelGenerator::pointsToVoxels(
-  std::vector<float> & voxels, std::vector<int> & coordinates,
-  std::vector<float> & num_points_per_voxel)
+std::size_t VoxelGenerator::generateSweepPoints(std::vector<float> & points)
 {
-  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
-  // coordinates (int): (max_voxel_size * point_dim_size)
-  // num_points_per_voxel (float): (max_voxel_size)
-
-  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
-  std::vector<int> coord_to_voxel_idx(grid_size, -1);
-
-  std::size_t voxel_cnt = 0;  // @return
-  std::vector<float> point;
-  point.resize(config_.point_feature_size_);
-  std::vector<float> coord_zyx;
-  coord_zyx.resize(config_.point_dim_size_);
-  bool out_of_range;
-  std::size_t point_cnt;
-  int c, coord_idx, voxel_idx;
   Eigen::Vector3f point_current, point_past;
-
+  size_t point_counter{};
   for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
        pc_cache_iter++) {
-    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
+    auto pc_msg = pc_cache_iter->pc_ptr;
+    auto affine_past2current =
+      pd_ptr_->getAffineWorldToCurrent();// * pc_cache_iter->affine_past2world;   //Do not use last point
+//    float time_lag = static_cast<float>(
+//      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_msg.header.stamp).seconds());
+
+    float time_lag = 0;
 
+    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
     int nsize = pc_ptr->points.size();
     int i;
 
     for (i=0;i<nsize;i++) {
+        points.at(point_counter * config_.point_feature_size_) = pc_ptr->at(i)._PointXYZI::x;//   point_current.x();
+        points.at(point_counter * config_.point_feature_size_ + 1) = pc_ptr->at(i)._PointXYZI::y;// point_current.y();
+        points.at(point_counter * config_.point_feature_size_ + 2) = pc_ptr->at(i)._PointXYZI::z;// point_current.z();
+        points.at(point_counter * config_.point_feature_size_ + 3) = time_lag;
+        ++point_counter;
+    }
+
+//    for (sensor_msgs::PointCloud2ConstIterator<float> x_iter(pc_msg, "x"), y_iter(pc_msg, "y"),
+//         z_iter(pc_msg, "z");
+//         x_iter != x_iter.end(); ++x_iter, ++y_iter, ++z_iter) {
+//      point_past << *x_iter, *y_iter, *z_iter;
+//      point_current = affine_past2current * point_past;
 
+//      points.at(point_counter * config_.point_feature_size_) = point_current.x();
+//      points.at(point_counter * config_.point_feature_size_ + 1) = point_current.y();
+//      points.at(point_counter * config_.point_feature_size_ + 2) = point_current.z();
+//      points.at(point_counter * config_.point_feature_size_ + 3) = time_lag;
+//      ++point_counter;
 
-      point[0] = pc_ptr->points.at(i).x;
-      point[1] = pc_ptr->points.at(i).y;
-      point[2] = pc_ptr->points.at(i).z;
-      point[3] = 0;
-
-      out_of_range = false;
-      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
-        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
-        if (c < 0 || c >= grid_size_[di]) {
-          out_of_range = true;
-          break;
-        }
-        coord_zyx[config_.point_dim_size_ - di - 1] = c;
-      }
-      if (out_of_range) {
-        continue;
-      }
-
-      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
-                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
-      voxel_idx = coord_to_voxel_idx[coord_idx];
-      if (voxel_idx == -1) {
-        voxel_idx = voxel_cnt;
-        if (voxel_cnt >= config_.max_voxel_size_) {
-          continue;
-        }
-
-        voxel_cnt++;
-        coord_to_voxel_idx[coord_idx] = voxel_idx;
-        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
-          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
-        }
-      }
-
-      point_cnt = num_points_per_voxel[voxel_idx];
-      if (point_cnt < config_.max_point_in_voxel_size_) {
-        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
-          voxels
-            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
-             point_cnt * config_.point_feature_size_ + fi] = point[fi];
-        }
-        num_points_per_voxel[voxel_idx]++;
-      }
-    }
   }
-
-  return voxel_cnt;
+  return point_counter;
 }
 
 
+//std::size_t VoxelGenerator::pointsToVoxels(
+//  std::vector<float> & voxels, std::vector<int> & coordinates,
+//  std::vector<float> & num_points_per_voxel)
+//{
+//  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
+//  // coordinates (int): (max_voxel_size * point_dim_size)
+//  // num_points_per_voxel (float): (max_voxel_size)
+
+//  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
+//  std::vector<int> coord_to_voxel_idx(grid_size, -1);
+
+//  std::size_t voxel_cnt = 0;  // @return
+//  std::vector<float> point;
+//  point.resize(config_.point_feature_size_);
+//  std::vector<float> coord_zyx;
+//  coord_zyx.resize(config_.point_dim_size_);
+//  bool out_of_range;
+//  std::size_t point_cnt;
+//  int c, coord_idx, voxel_idx;
+//  Eigen::Vector3f point_current, point_past;
+
+//  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
+//       pc_cache_iter++) {
+//    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
+
+//    int nsize = pc_ptr->points.size();
+//    int i;
+
+//    for (i=0;i<nsize;i++) {
+
+
+//      point[0] = pc_ptr->points.at(i).x;
+//      point[1] = pc_ptr->points.at(i).y;
+//      point[2] = pc_ptr->points.at(i).z;
+//      point[3] = 0;
+
+//      out_of_range = false;
+//      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
+//        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
+//        if (c < 0 || c >= grid_size_[di]) {
+//          out_of_range = true;
+//          break;
+//        }
+//        coord_zyx[config_.point_dim_size_ - di - 1] = c;
+//      }
+//      if (out_of_range) {
+//        continue;
+//      }
+
+//      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
+//                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
+//      voxel_idx = coord_to_voxel_idx[coord_idx];
+//      if (voxel_idx == -1) {
+//        voxel_idx = voxel_cnt;
+//        if (voxel_cnt >= config_.max_voxel_size_) {
+//          continue;
+//        }
+
+//        voxel_cnt++;
+//        coord_to_voxel_idx[coord_idx] = voxel_idx;
+//        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
+//          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
+//        }
+//      }
+
+//      point_cnt = num_points_per_voxel[voxel_idx];
+//      if (point_cnt < config_.max_point_in_voxel_size_) {
+//        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
+//          voxels
+//            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
+//             point_cnt * config_.point_feature_size_ + fi] = point[fi];
+//        }
+//        num_points_per_voxel[voxel_idx]++;
+//      }
+//    }
+//  }
+
+//  return voxel_cnt;
+//}
+
+
 //std::size_t VoxelGenerator::pointsToVoxels(
 //  std::vector<float> & voxels, std::vector<int> & coordinates,
 //  std::vector<float> & num_points_per_voxel)

+ 26 - 3
src/detection/detection_lidar_centerpoint/main.cpp

@@ -1,6 +1,6 @@
 #include <QCoreApplication>
 
-
+#include <yaml-cpp/yaml.h>
 
 #include <lidar_centerpoint/centerpoint_config.hpp>
 #include <lidar_centerpoint/centerpoint_trt.hpp>
@@ -16,12 +16,19 @@ using namespace centerpoint;
 
 std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
 
+void LoadYaml(std::string stryamlname,std::vector<int64_t> & allow_remapping_by_area_matrix
+              )
+{
+
+}
+
 void init()
 {
     const float score_threshold = 0.35;
-    const float circle_nms_dist_threshold =1.5;
+    const float circle_nms_dist_threshold =0.5;
   //    static_cast<float>(this->declare_parameter<double>("circle_nms_dist_threshold"));
     std::vector<double> yaw_norm_thresholds ;
+    yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.0);
     const std::string densification_world_frame_id = "map";
     const int densification_num_past_frames = 1;
     const std::string trt_precision = "fp16";
@@ -119,9 +126,22 @@ void ListenPointCloud(const char * strdata,const unsigned int nSize,const unsign
 
 }
 
-void testdet()
+
+#include <pcl/io/pcd_io.h>
+void testdet(std::string & path)
 {
+    pcl::PointCloud<pcl::PointXYZI>::Ptr point_cloud(
+                new pcl::PointCloud<pcl::PointXYZI>());
 
+    pcl::io::loadPCDFile<pcl::PointXYZI>(path,*point_cloud);
+
+    std::vector<Box3D> det_boxes3d;
+    int i;
+    for(i=0;i<10;i++)
+    {
+    detector_ptr_ ->detect(point_cloud,det_boxes3d);
+    std::cout<<" box size: "<<det_boxes3d.size()<<std::endl;
+    }
 }
 
 
@@ -130,6 +150,9 @@ int main(int argc, char *argv[])
     QCoreApplication a(argc, argv);
     init();
 
+    std::string path = "/home/nvidia/1.pcd";
+    testdet(path);
+
     gpa = iv::modulecomm::RegisterRecv("lidarpc_center",ListenPointCloud);
 
     return a.exec();