Browse Source

add centerpoint in src1/perception. but not compile ok in agx.

yuchuli 2 years ago
parent
commit
e2ead73810
28 changed files with 2352 additions and 5 deletions
  1. 6 5
      src/driver/vtd_pilot_if/vtd_pilot.cpp
  2. 73 0
      src1/perception/perception_lidar_centerpoint/.gitignore
  3. 1 0
      src1/perception/perception_lidar_centerpoint/Readme.md
  4. 196 0
      src1/perception/perception_lidar_centerpoint/centerpoint_trt.cpp
  5. 114 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp
  6. 109 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
  7. 120 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp
  8. 55 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp
  9. 31 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
  10. 94 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp
  11. 58 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/node.hpp
  12. 32 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp
  13. 46 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp
  14. 86 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
  15. 31 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
  16. 62 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
  17. 44 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp
  18. 42 0
      src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/utils.hpp
  19. 8 0
      src1/perception/perception_lidar_centerpoint/main.cpp
  20. 75 0
      src1/perception/perception_lidar_centerpoint/network/network_trt.cpp
  21. 67 0
      src1/perception/perception_lidar_centerpoint/network/scatter_kernel.cu
  22. 154 0
      src1/perception/perception_lidar_centerpoint/network/tensorrt_wrapper.cpp
  23. 158 0
      src1/perception/perception_lidar_centerpoint/perception_lidar_centerpoint.pro
  24. 144 0
      src1/perception/perception_lidar_centerpoint/postprocess/circle_nms_kernel.cu
  25. 160 0
      src1/perception/perception_lidar_centerpoint/postprocess/postprocess_kernel.cu
  26. 97 0
      src1/perception/perception_lidar_centerpoint/preprocess/pointcloud_densification.cpp
  27. 159 0
      src1/perception/perception_lidar_centerpoint/preprocess/preprocess_kernel.cu
  28. 130 0
      src1/perception/perception_lidar_centerpoint/preprocess/voxel_generator.cpp

+ 6 - 5
src/driver/vtd_pilot_if/vtd_pilot.cpp

@@ -144,6 +144,7 @@ void vtd_pilot::ConvertToObjectArray(std::vector<iv::vtd::rdbdata> &xvectorrdbda
                     dimy = xobj.base.geo.dimY;
                     dimz = xobj.base.geo.dimZ;
   //                  std::cout<<" dimx: "<<dimx<<" dimy: "<<dimy<<std::endl;
+                    std::cout<<"relx : "<<relx<<" rely: "<<rely<<std::endl;
                     double xoff,yoff;
                     std::cout<<" obj hedad: "<<xobj.base.pos.h<<" our head: "<<mfHeading<<" rel heading: "<<fobjheading<<std::endl;
                     for(xoff = (dimx*(-0.5));xoff<=(dimx*0.5);xoff=xoff+0.2)
@@ -151,12 +152,12 @@ void vtd_pilot::ConvertToObjectArray(std::vector<iv::vtd::rdbdata> &xvectorrdbda
                         for(yoff = (dimy*(-0.5));yoff<=(dimy*0.5);yoff = yoff+0.2)
                         {
                             double reloffx,reloffy;
-                            if(fabs(fobjheading) < 0.1)
-                            {
-                                for(int m=0;m<1000;m++)
-                                std::cout<<" hello;"<<std::endl;
+//                            if(fabs(fobjheading) < 0.1)
+//                            {
+//                                for(int m=0;m<1000;m++)
+//                                std::cout<<" hello;"<<std::endl;
 
-                            }
+//                            }
     //                        fobjheading = 0;
     //                        std::cout<<"xoff: "<<xoff<<" yoff: "<<yoff<<std::endl;
                             reloffx = xoff*cos(fobjheading) - yoff*sin(fobjheading) + relx;

+ 73 - 0
src1/perception/perception_lidar_centerpoint/.gitignore

@@ -0,0 +1,73 @@
+# This file is used to ignore files which are generated
+# ----------------------------------------------------------------------------
+
+*~
+*.autosave
+*.a
+*.core
+*.moc
+*.o
+*.obj
+*.orig
+*.rej
+*.so
+*.so.*
+*_pch.h.cpp
+*_resource.rc
+*.qm
+.#*
+*.*#
+core
+!core/
+tags
+.DS_Store
+.directory
+*.debug
+Makefile*
+*.prl
+*.app
+moc_*.cpp
+ui_*.h
+qrc_*.cpp
+Thumbs.db
+*.res
+*.rc
+/.qmake.cache
+/.qmake.stash
+
+# qtcreator generated files
+*.pro.user*
+
+# xemacs temporary files
+*.flc
+
+# Vim temporary files
+.*.swp
+
+# Visual Studio generated files
+*.ib_pdb_index
+*.idb
+*.ilk
+*.pdb
+*.sln
+*.suo
+*.vcproj
+*vcproj.*.*.user
+*.ncb
+*.sdf
+*.opensdf
+*.vcxproj
+*vcxproj.*
+
+# MinGW generated files
+*.Debug
+*.Release
+
+# Python byte code
+*.pyc
+
+# Binaries
+# --------
+*.dll
+*.exe
+

+ 1 - 0
src1/perception/perception_lidar_centerpoint/Readme.md

@@ -0,0 +1 @@
+from autowrare, but not compile ok in agx xvier.

+ 196 - 0
src1/perception/perception_lidar_centerpoint/centerpoint_trt.cpp

@@ -0,0 +1,196 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/centerpoint_trt.hpp"
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+#include <lidar_centerpoint/network/scatter_kernel.hpp>
+#include <lidar_centerpoint/preprocess/preprocess_kernel.hpp>
+//#include <tier4_autoware_utils/math/constants.hpp>
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace centerpoint
+{
+CenterPointTRT::CenterPointTRT(
+  const NetworkParam & encoder_param, const NetworkParam & head_param,
+  const DensificationParam & densification_param, const CenterPointConfig & config)
+: config_(config)
+{
+  vg_ptr_ = std::make_unique<VoxelGenerator>(densification_param, config_);
+  post_proc_ptr_ = std::make_unique<PostProcessCUDA>(config_);
+
+  // encoder
+  encoder_trt_ptr_ = std::make_unique<VoxelEncoderTRT>(config_, verbose_);
+  encoder_trt_ptr_->init(
+    encoder_param.onnx_path(), encoder_param.engine_path(), encoder_param.trt_precision());
+  encoder_trt_ptr_->context_->setBindingDimensions(
+    0,
+    nvinfer1::Dims3(
+      config_.max_voxel_size_, config_.max_point_in_voxel_size_, config_.encoder_in_feature_size_));
+
+  // head
+  std::vector<std::size_t> out_channel_sizes = {
+    config_.class_size_,        config_.head_out_offset_size_, config_.head_out_z_size_,
+    config_.head_out_dim_size_, config_.head_out_rot_size_,    config_.head_out_vel_size_};
+  head_trt_ptr_ = std::make_unique<HeadTRT>(out_channel_sizes, config_, verbose_);
+  head_trt_ptr_->init(head_param.onnx_path(), head_param.engine_path(), head_param.trt_precision());
+  head_trt_ptr_->context_->setBindingDimensions(
+    0, nvinfer1::Dims4(
+         config_.batch_size_, config_.encoder_out_feature_size_, config_.grid_size_y_,
+         config_.grid_size_x_));
+
+  initPtr();
+
+  cudaStreamCreate(&stream_);
+}
+
+CenterPointTRT::~CenterPointTRT()
+{
+  if (stream_) {
+    cudaStreamSynchronize(stream_);
+    cudaStreamDestroy(stream_);
+  }
+}
+
+void CenterPointTRT::initPtr()
+{
+  const auto voxels_size =
+    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
+  const auto coordinates_size = config_.max_voxel_size_ * config_.point_dim_size_;
+  encoder_in_feature_size_ =
+    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_;
+  const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_;
+  spatial_features_size_ =
+    config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_;
+  const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_;
+
+  // host
+  voxels_.resize(voxels_size);
+  coordinates_.resize(coordinates_size);
+  num_points_per_voxel_.resize(config_.max_voxel_size_);
+
+  // device
+  voxels_d_ = cuda::make_unique<float[]>(voxels_size);
+  coordinates_d_ = cuda::make_unique<int[]>(coordinates_size);
+  num_points_per_voxel_d_ = cuda::make_unique<float[]>(config_.max_voxel_size_);
+  encoder_in_features_d_ = cuda::make_unique<float[]>(encoder_in_feature_size_);
+  pillar_features_d_ = cuda::make_unique<float[]>(pillar_features_size);
+  spatial_features_d_ = cuda::make_unique<float[]>(spatial_features_size_);
+  head_out_heatmap_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.class_size_);
+  head_out_offset_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_offset_size_);
+  head_out_z_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_z_size_);
+  head_out_dim_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_dim_size_);
+  head_out_rot_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_rot_size_);
+  head_out_vel_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_vel_size_);
+}
+
+bool CenterPointTRT::detect(
+  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer,
+  std::vector<Box3D> & det_boxes3d)
+{
+  std::fill(voxels_.begin(), voxels_.end(), 0);
+  std::fill(coordinates_.begin(), coordinates_.end(), -1);
+  std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
+  CHECK_CUDA_ERROR(cudaMemsetAsync(
+    encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
+  CHECK_CUDA_ERROR(
+    cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
+
+  if (!preprocess(input_pointcloud_msg, tf_buffer)) {
+    RCLCPP_WARN_STREAM(
+      rclcpp::get_logger("lidar_centerpoint"), "Fail to preprocess and skip to detect.");
+    return false;
+  }
+
+  inference();
+
+  postProcess(det_boxes3d);
+
+  return true;
+}
+
+bool CenterPointTRT::preprocess(
+  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
+{
+  bool is_success = vg_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer);
+  if (!is_success) {
+    return false;
+  }
+  num_voxels_ = vg_ptr_->pointsToVoxels(voxels_, coordinates_, num_points_per_voxel_);
+  if (num_voxels_ == 0) {
+    return false;
+  }
+
+  const auto voxels_size =
+    num_voxels_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
+  const auto coordinates_size = num_voxels_ * config_.point_dim_size_;
+  // memcpy from host to device (not copy empty voxels)
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    voxels_d_.get(), voxels_.data(), voxels_size * sizeof(float), cudaMemcpyHostToDevice));
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    coordinates_d_.get(), coordinates_.data(), coordinates_size * sizeof(int),
+    cudaMemcpyHostToDevice));
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    num_points_per_voxel_d_.get(), num_points_per_voxel_.data(), num_voxels_ * sizeof(float),
+    cudaMemcpyHostToDevice));
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  CHECK_CUDA_ERROR(generateFeatures_launch(
+    voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_,
+    config_.max_voxel_size_, config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_,
+    config_.range_min_x_, config_.range_min_y_, config_.range_min_z_, encoder_in_features_d_.get(),
+    stream_));
+
+  return true;
+}
+
+void CenterPointTRT::inference()
+{
+  if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) {
+    throw std::runtime_error("Failed to create tensorrt context.");
+  }
+
+  // pillar encoder network
+  std::vector<void *> encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()};
+  encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr);
+
+  // scatter
+  CHECK_CUDA_ERROR(scatterFeatures_launch(
+    pillar_features_d_.get(), coordinates_d_.get(), num_voxels_, config_.max_voxel_size_,
+    config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_,
+    spatial_features_d_.get(), stream_));
+
+  // head network
+  std::vector<void *> head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(),
+                                      head_out_offset_d_.get(),  head_out_z_d_.get(),
+                                      head_out_dim_d_.get(),     head_out_rot_d_.get(),
+                                      head_out_vel_d_.get()};
+  head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr);
+}
+
+void CenterPointTRT::postProcess(std::vector<Box3D> & det_boxes3d)
+{
+  CHECK_CUDA_ERROR(post_proc_ptr_->generateDetectedBoxes3D_launch(
+    head_out_heatmap_d_.get(), head_out_offset_d_.get(), head_out_z_d_.get(), head_out_dim_d_.get(),
+    head_out_rot_d_.get(), head_out_vel_d_.get(), det_boxes3d, stream_));
+  if (det_boxes3d.size() == 0) {
+    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_centerpoint"), "No detected boxes.");
+  }
+}
+
+}  // namespace centerpoint

+ 114 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp

@@ -0,0 +1,114 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_
+#define LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_
+
+#include <cstddef>
+#include <vector>
+
+namespace centerpoint
+{
+class CenterPointConfig
+{
+public:
+  explicit CenterPointConfig(
+    const std::size_t class_size, const float point_feature_size, const std::size_t max_voxel_size,
+    const std::vector<double> & point_cloud_range, const std::vector<double> & voxel_size,
+    const std::size_t downsample_factor, const std::size_t encoder_in_feature_size,
+    const float score_threshold, const float circle_nms_dist_threshold)
+  {
+    class_size_ = class_size;
+    point_feature_size_ = point_feature_size;
+    max_voxel_size_ = max_voxel_size;
+    if (point_cloud_range.size() == 6) {
+      range_min_x_ = static_cast<float>(point_cloud_range[0]);
+      range_min_y_ = static_cast<float>(point_cloud_range[1]);
+      range_min_z_ = static_cast<float>(point_cloud_range[2]);
+      range_max_x_ = static_cast<float>(point_cloud_range[3]);
+      range_max_y_ = static_cast<float>(point_cloud_range[4]);
+      range_max_z_ = static_cast<float>(point_cloud_range[5]);
+    }
+    if (voxel_size.size() == 3) {
+      voxel_size_x_ = static_cast<float>(voxel_size[0]);
+      voxel_size_y_ = static_cast<float>(voxel_size[1]);
+      voxel_size_z_ = static_cast<float>(voxel_size[2]);
+    }
+    downsample_factor_ = downsample_factor;
+    encoder_in_feature_size_ = encoder_in_feature_size;
+
+    if (score_threshold > 0 && score_threshold < 1) {
+      score_threshold_ = score_threshold;
+    }
+
+    if (circle_nms_dist_threshold > 0) {
+      circle_nms_dist_threshold_ = circle_nms_dist_threshold;
+    }
+
+    grid_size_x_ = static_cast<std::size_t>((range_max_x_ - range_min_x_) / voxel_size_x_);
+    grid_size_y_ = static_cast<std::size_t>((range_max_y_ - range_min_y_) / voxel_size_y_);
+    grid_size_z_ = static_cast<std::size_t>((range_max_z_ - range_min_z_) / voxel_size_z_);
+    offset_x_ = range_min_x_ + voxel_size_x_ / 2;
+    offset_y_ = range_min_y_ + voxel_size_y_ / 2;
+    offset_z_ = range_min_z_ + voxel_size_z_ / 2;
+    down_grid_size_x_ = grid_size_x_ / downsample_factor_;
+    down_grid_size_y_ = grid_size_y_ / downsample_factor_;
+  };
+
+  // input params
+  std::size_t class_size_{3};
+  const std::size_t point_dim_size_{3};  // x, y and z
+  std::size_t point_feature_size_{4};    // x, y, z and timelag
+  std::size_t max_point_in_voxel_size_{32};
+  std::size_t max_voxel_size_{40000};
+  float range_min_x_{-89.6f};
+  float range_min_y_{-89.6f};
+  float range_min_z_{-3.0f};
+  float range_max_x_{89.6f};
+  float range_max_y_{89.6f};
+  float range_max_z_{5.0f};
+  float voxel_size_x_{0.32f};
+  float voxel_size_y_{0.32f};
+  float voxel_size_z_{8.0f};
+
+  // network params
+  const std::size_t batch_size_{1};
+  std::size_t downsample_factor_{2};
+  std::size_t encoder_in_feature_size_{9};
+  const std::size_t encoder_out_feature_size_{32};
+  const std::size_t head_out_size_{6};
+  const std::size_t head_out_offset_size_{2};
+  const std::size_t head_out_z_size_{1};
+  const std::size_t head_out_dim_size_{3};
+  const std::size_t head_out_rot_size_{2};
+  const std::size_t head_out_vel_size_{2};
+
+  // post-process params
+  float score_threshold_{0.4f};
+  float circle_nms_dist_threshold_{1.5f};
+
+  // calculated params
+  std::size_t grid_size_x_ = (range_max_x_ - range_min_x_) / voxel_size_x_;
+  std::size_t grid_size_y_ = (range_max_y_ - range_min_y_) / voxel_size_y_;
+  std::size_t grid_size_z_ = (range_max_z_ - range_min_z_) / voxel_size_z_;
+  float offset_x_ = range_min_x_ + voxel_size_x_ / 2;
+  float offset_y_ = range_min_y_ + voxel_size_y_ / 2;
+  float offset_z_ = range_min_z_ + voxel_size_z_ / 2;
+  std::size_t down_grid_size_x_ = grid_size_x_ / downsample_factor_;
+  std::size_t down_grid_size_y_ = grid_size_y_ / downsample_factor_;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_

+ 109 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp

@@ -0,0 +1,109 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_
+#define LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_
+
+#include <lidar_centerpoint/cuda_utils.hpp>
+#include <lidar_centerpoint/network/network_trt.hpp>
+#include <lidar_centerpoint/postprocess/postprocess_kernel.hpp>
+#include <lidar_centerpoint/preprocess/voxel_generator.hpp>
+
+#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace centerpoint
+{
+class NetworkParam
+{
+public:
+  NetworkParam(std::string onnx_path, std::string engine_path, std::string trt_precision)
+  : onnx_path_(std::move(onnx_path)),
+    engine_path_(std::move(engine_path)),
+    trt_precision_(std::move(trt_precision))
+  {
+  }
+
+  std::string onnx_path() const { return onnx_path_; }
+  std::string engine_path() const { return engine_path_; }
+  std::string trt_precision() const { return trt_precision_; }
+
+private:
+  std::string onnx_path_;
+  std::string engine_path_;
+  std::string trt_precision_;
+};
+
+class CenterPointTRT
+{
+public:
+  explicit CenterPointTRT(
+    const NetworkParam & encoder_param, const NetworkParam & head_param,
+    const DensificationParam & densification_param, const CenterPointConfig & config);
+
+  ~CenterPointTRT();
+
+  bool detect(
+    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer,
+    std::vector<Box3D> & det_boxes3d);
+
+protected:
+  void initPtr();
+
+  virtual bool preprocess(
+    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
+
+  void inference();
+
+  void postProcess(std::vector<Box3D> & det_boxes3d);
+
+  std::unique_ptr<VoxelGeneratorTemplate> vg_ptr_{nullptr};
+  std::unique_ptr<VoxelEncoderTRT> encoder_trt_ptr_{nullptr};
+  std::unique_ptr<HeadTRT> head_trt_ptr_{nullptr};
+  std::unique_ptr<PostProcessCUDA> post_proc_ptr_{nullptr};
+  cudaStream_t stream_{nullptr};
+
+  bool verbose_{false};
+  std::size_t class_size_{0};
+  CenterPointConfig config_;
+  std::size_t num_voxels_{0};
+  std::size_t encoder_in_feature_size_{0};
+  std::size_t spatial_features_size_{0};
+  std::vector<float> voxels_;
+  std::vector<int> coordinates_;
+  std::vector<float> num_points_per_voxel_;
+  cuda::unique_ptr<float[]> voxels_d_{nullptr};
+  cuda::unique_ptr<int[]> coordinates_d_{nullptr};
+  cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
+  cuda::unique_ptr<float[]> encoder_in_features_d_{nullptr};
+  cuda::unique_ptr<float[]> pillar_features_d_{nullptr};
+  cuda::unique_ptr<float[]> spatial_features_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_heatmap_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_offset_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_z_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_

+ 120 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp

@@ -0,0 +1,120 @@
+// Copyright 2020 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This code is licensed under CC0 1.0 Universal (Public Domain).
+ * You can use this without any limitation.
+ * https://creativecommons.org/publicdomain/zero/1.0/deed.en
+ */
+
+#ifndef LIDAR_CENTERPOINT__CUDA_UTILS_HPP_
+#define LIDAR_CENTERPOINT__CUDA_UTILS_HPP_
+
+#include <cuda_runtime_api.h>
+
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <type_traits>
+
+#define CHECK_CUDA_ERROR(e) (cuda::check_error(e, __FILE__, __LINE__))
+
+namespace cuda
+{
+inline void check_error(const ::cudaError_t e, const char * f, int n)
+{
+  if (e != ::cudaSuccess) {
+    std::stringstream s;
+    s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": "
+      << ::cudaGetErrorString(e);
+    throw std::runtime_error{s.str()};
+  }
+}
+
+struct deleter
+{
+  void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); }
+};
+
+template <typename T>
+using unique_ptr = std::unique_ptr<T, deleter>;
+
+template <typename T>
+typename std::enable_if<std::is_array<T>::value, cuda::unique_ptr<T>>::type make_unique(
+  const std::size_t n)
+{
+  using U = typename std::remove_extent<T>::type;
+  U * p;
+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(U) * n));
+  return cuda::unique_ptr<T>{p};
+}
+
+template <typename T>
+cuda::unique_ptr<T> make_unique()
+{
+  T * p;
+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(T)));
+  return cuda::unique_ptr<T>{p};
+}
+
+constexpr size_t CUDA_ALIGN = 256;
+
+template <typename T>
+inline size_t get_size_aligned(size_t num_elem)
+{
+  size_t size = num_elem * sizeof(T);
+  size_t extra_align = 0;
+  if (size % CUDA_ALIGN != 0) {
+    extra_align = CUDA_ALIGN - size % CUDA_ALIGN;
+  }
+  return size + extra_align;
+}
+
+template <typename T>
+inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size)
+{
+  size_t size = get_size_aligned<T>(num_elem);
+  if (size > workspace_size) {
+    throw std::runtime_error("Workspace is too small!");
+  }
+  workspace_size -= size;
+  T * ptr = reinterpret_cast<T *>(workspace);
+  workspace = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(workspace) + size);
+  return ptr;
+}
+
+}  // namespace cuda
+
+#endif  // LIDAR_CENTERPOINT__CUDA_UTILS_HPP_

+ 55 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp

@@ -0,0 +1,55 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_
+#define LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+#include <lidar_centerpoint/network/tensorrt_wrapper.hpp>
+
+#include <vector>
+
+namespace centerpoint
+{
+class VoxelEncoderTRT : public TensorRTWrapper
+{
+public:
+  using TensorRTWrapper::TensorRTWrapper;
+
+protected:
+  bool setProfile(
+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+    nvinfer1::IBuilderConfig & config) override;
+};
+
+class HeadTRT : public TensorRTWrapper
+{
+public:
+  using TensorRTWrapper::TensorRTWrapper;
+
+  HeadTRT(
+    const std::vector<std::size_t> & out_channel_sizes, const CenterPointConfig & config,
+    const bool verbose);
+
+protected:
+  bool setProfile(
+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+    nvinfer1::IBuilderConfig & config) override;
+
+  std::vector<std::size_t> out_channel_sizes_;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_

+ 31 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp

@@ -0,0 +1,31 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_
+#define LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+namespace centerpoint
+{
+cudaError_t scatterFeatures_launch(
+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
+  const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
+  const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
+  cudaStream_t stream);
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_

+ 94 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp

@@ -0,0 +1,94 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_
+#define LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+
+#include <NvInfer.h>
+
+#include <iostream>
+#include <memory>
+#include <string>
+
+namespace centerpoint
+{
+struct Deleter
+{
+  template <typename T>
+  void operator()(T * obj) const
+  {
+    if (obj) {
+      delete obj;
+    }
+  }
+};
+
+template <typename T>
+using unique_ptr = std::unique_ptr<T, Deleter>;
+
+class Logger : public nvinfer1::ILogger
+{
+public:
+  explicit Logger(bool verbose) : verbose_(verbose) {}
+
+  void log(Severity severity, const char * msg) noexcept override
+  {
+    if (verbose_ || ((severity != Severity::kINFO) && (severity != Severity::kVERBOSE))) {
+      std::cout << msg << std::endl;
+    }
+  }
+
+private:
+  bool verbose_{false};
+};
+
+class TensorRTWrapper
+{
+public:
+  explicit TensorRTWrapper(const CenterPointConfig & config, const bool verbose);
+
+  bool init(
+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision);
+
+  unique_ptr<nvinfer1::IExecutionContext> context_ = nullptr;
+
+protected:
+  virtual bool setProfile(
+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+    nvinfer1::IBuilderConfig & config) = 0;
+
+  CenterPointConfig config_;
+  Logger logger_;
+
+private:
+  bool parseONNX(
+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
+    size_t workspace_size = (1ULL << 30));
+
+  bool saveEngine(const std::string & engine_path);
+
+  bool loadEngine(const std::string & engine_path);
+
+  bool createContext();
+
+  unique_ptr<nvinfer1::IRuntime> runtime_ = nullptr;
+  unique_ptr<nvinfer1::IHostMemory> plan_ = nullptr;
+  unique_ptr<nvinfer1::ICudaEngine> engine_ = nullptr;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_

+ 58 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/node.hpp

@@ -0,0 +1,58 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__NODE_HPP_
+#define LIDAR_CENTERPOINT__NODE_HPP_
+
+#include <lidar_centerpoint/centerpoint_trt.hpp>
+#include <rclcpp/rclcpp.hpp>
+
+#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
+#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
+#include <autoware_auto_perception_msgs/msg/shape.hpp>
+#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace centerpoint
+{
+
+class LidarCenterPointNode : public rclcpp::Node
+{
+public:
+  explicit LidarCenterPointNode(const rclcpp::NodeOptions & node_options);
+
+private:
+  void pointCloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr input_pointcloud_msg);
+
+  tf2_ros::Buffer tf_buffer_;
+  tf2_ros::TransformListener tf_listener_{tf_buffer_};
+
+  rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr pointcloud_sub_;
+  rclcpp::Publisher<autoware_auto_perception_msgs::msg::DetectedObjects>::SharedPtr objects_pub_;
+
+  float score_threshold_{0.0};
+  std::vector<std::string> class_names_;
+  bool rename_car_to_truck_and_bus_{false};
+  bool has_twist_{false};
+
+  std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__NODE_HPP_

+ 32 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp

@@ -0,0 +1,32 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
+#define LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
+
+#include <lidar_centerpoint/utils.hpp>
+
+#include <thrust/device_vector.h>
+
+namespace centerpoint
+{
+// Non-maximum suppression (NMS) uses the distance on the xy plane instead of
+// intersection over union (IoU) to suppress overlapped objects.
+std::size_t circleNMS(
+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream);
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_

+ 46 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp

@@ -0,0 +1,46 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
+#define LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+#include <lidar_centerpoint/utils.hpp>
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <thrust/device_vector.h>
+
+#include <vector>
+
+namespace centerpoint
+{
+class PostProcessCUDA
+{
+public:
+  explicit PostProcessCUDA(const CenterPointConfig & config);
+
+  cudaError_t generateDetectedBoxes3D_launch(
+    const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
+    const float * out_rot, const float * out_vel, std::vector<Box3D> & det_boxes3d,
+    cudaStream_t stream);
+
+private:
+  CenterPointConfig config_;
+  thrust::device_vector<Box3D> boxes3d_d_;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_

+ 86 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp

@@ -0,0 +1,86 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
+#define LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
+
+#include <tf2_ros/buffer.h>
+#include <tf2_ros/transform_listener.h>
+#ifdef ROS_DISTRO_GALACTIC
+#include <tf2_sensor_msgs/tf2_sensor_msgs.h>
+#else
+#include <tf2_sensor_msgs/tf2_sensor_msgs.hpp>
+#endif
+
+#include <list>
+#include <string>
+#include <utility>
+
+namespace centerpoint
+{
+class DensificationParam
+{
+public:
+  DensificationParam(const std::string & world_frame_id, const unsigned int num_past_frames)
+  : world_frame_id_(std::move(world_frame_id)),
+    pointcloud_cache_size_(num_past_frames + /*current frame*/ 1)
+  {
+  }
+
+  std::string world_frame_id() const { return world_frame_id_; }
+  unsigned int pointcloud_cache_size() const { return pointcloud_cache_size_; }
+
+private:
+  std::string world_frame_id_;
+  unsigned int pointcloud_cache_size_{1};
+};
+
+struct PointCloudWithTransform
+{
+  sensor_msgs::msg::PointCloud2 pointcloud_msg;
+  Eigen::Affine3f affine_past2world;
+};
+
+class PointCloudDensification
+{
+public:
+  explicit PointCloudDensification(const DensificationParam & param);
+
+  bool enqueuePointCloud(
+    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
+
+  double getCurrentTimestamp() const { return current_timestamp_; }
+  Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; }
+  std::list<PointCloudWithTransform>::iterator getPointCloudCacheIter()
+  {
+    return pointcloud_cache_.begin();
+  }
+  bool isCacheEnd(std::list<PointCloudWithTransform>::iterator iter)
+  {
+    return iter == pointcloud_cache_.end();
+  }
+
+private:
+  void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine);
+  void dequeue();
+
+  DensificationParam param_;
+  double current_timestamp_{0.0};
+  Eigen::Affine3f affine_world2current_;
+  std::list<PointCloudWithTransform> pointcloud_cache_;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_

+ 31 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp

@@ -0,0 +1,31 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_
+#define LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+namespace centerpoint
+{
+cudaError_t generateFeatures_launch(
+  const float * voxel_features, const float * voxel_num_points, const int * coords,
+  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
+  const float voxel_size_y, const float voxel_size_z, const float range_min_x,
+  const float range_min_y, const float range_min_z, float * features, cudaStream_t stream);
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_

+ 62 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp

@@ -0,0 +1,62 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_
+#define LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+#include <lidar_centerpoint/preprocess/pointcloud_densification.hpp>
+
+#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <memory>
+#include <vector>
+
+namespace centerpoint
+{
+class VoxelGeneratorTemplate
+{
+public:
+  explicit VoxelGeneratorTemplate(
+    const DensificationParam & param, const CenterPointConfig & config);
+
+  virtual std::size_t pointsToVoxels(
+    std::vector<float> & voxels, std::vector<int> & coordinates,
+    std::vector<float> & num_points_per_voxel) = 0;
+
+  bool enqueuePointCloud(
+    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
+
+protected:
+  std::unique_ptr<PointCloudDensification> pd_ptr_{nullptr};
+
+  CenterPointConfig config_;
+  std::array<float, 6> range_;
+  std::array<int, 3> grid_size_;
+  std::array<float, 3> recip_voxel_size_;
+};
+
+class VoxelGenerator : public VoxelGeneratorTemplate
+{
+public:
+  using VoxelGeneratorTemplate::VoxelGeneratorTemplate;
+
+  std::size_t pointsToVoxels(
+    std::vector<float> & voxels, std::vector<int> & coordinates,
+    std::vector<float> & num_points_per_voxel) override;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_

+ 44 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp

@@ -0,0 +1,44 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__ROS_UTILS_HPP_
+#define LIDAR_CENTERPOINT__ROS_UTILS_HPP_
+
+// ros packages cannot be included from cuda.
+
+#include <lidar_centerpoint/utils.hpp>
+
+#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
+#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
+#include <autoware_auto_perception_msgs/msg/shape.hpp>
+
+#include <string>
+#include <vector>
+
+namespace centerpoint
+{
+
+void box3DToDetectedObject(
+  const Box3D & box3d, const std::vector<std::string> & class_names,
+  const bool rename_car_to_truck_and_bus, const bool has_twist,
+  autoware_auto_perception_msgs::msg::DetectedObject & obj);
+
+uint8_t getSemanticType(const std::string & class_name);
+
+bool isCarLikeVehicleLabel(const uint8_t label);
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__ROS_UTILS_HPP_

+ 42 - 0
src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/utils.hpp

@@ -0,0 +1,42 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__UTILS_HPP_
+#define LIDAR_CENTERPOINT__UTILS_HPP_
+
+#include <cstddef>
+
+namespace centerpoint
+{
+struct Box3D
+{
+  // initializer not allowed for __shared__ variable
+  int label;
+  float score;
+  float x;
+  float y;
+  float z;
+  float length;
+  float width;
+  float height;
+  float yaw;
+  float vel_x;
+  float vel_y;
+};
+
+std::size_t divup(const std::size_t a, const std::size_t b);
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__UTILS_HPP_

+ 8 - 0
src1/perception/perception_lidar_centerpoint/main.cpp

@@ -0,0 +1,8 @@
+#include <QCoreApplication>
+
+int main(int argc, char *argv[])
+{
+    QCoreApplication a(argc, argv);
+
+    return a.exec();
+}

+ 75 - 0
src1/perception/perception_lidar_centerpoint/network/network_trt.cpp

@@ -0,0 +1,75 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/network/network_trt.hpp"
+
+namespace centerpoint
+{
+bool VoxelEncoderTRT::setProfile(
+  nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+  nvinfer1::IBuilderConfig & config)
+{
+  auto profile = builder.createOptimizationProfile();
+  auto in_name = network.getInput(0)->getName();
+  auto in_dims = nvinfer1::Dims3(
+    config_.max_voxel_size_, config_.max_point_in_voxel_size_, config_.encoder_in_feature_size_);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMIN, in_dims);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kOPT, in_dims);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMAX, in_dims);
+
+  auto out_name = network.getOutput(0)->getName();
+  auto out_dims = nvinfer1::Dims2(config_.max_voxel_size_, config_.encoder_out_feature_size_);
+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMIN, out_dims);
+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kOPT, out_dims);
+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMAX, out_dims);
+  config.addOptimizationProfile(profile);
+
+  return true;
+}
+
+HeadTRT::HeadTRT(
+  const std::vector<std::size_t> & out_channel_sizes, const CenterPointConfig & config,
+  const bool verbose)
+: TensorRTWrapper(config, verbose), out_channel_sizes_(out_channel_sizes)
+{
+}
+
+bool HeadTRT::setProfile(
+  nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+  nvinfer1::IBuilderConfig & config)
+{
+  auto profile = builder.createOptimizationProfile();
+  auto in_name = network.getInput(0)->getName();
+  auto in_dims = nvinfer1::Dims4(
+    config_.batch_size_, config_.encoder_out_feature_size_, config_.grid_size_y_,
+    config_.grid_size_x_);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMIN, in_dims);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kOPT, in_dims);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMAX, in_dims);
+
+  for (std::size_t ci = 0; ci < out_channel_sizes_.size(); ci++) {
+    auto out_name = network.getOutput(ci)->getName();
+    auto out_dims = nvinfer1::Dims4(
+      config_.batch_size_, out_channel_sizes_[ci], config_.down_grid_size_y_,
+      config_.down_grid_size_x_);
+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMIN, out_dims);
+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kOPT, out_dims);
+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMAX, out_dims);
+  }
+  config.addOptimizationProfile(profile);
+
+  return true;
+}
+
+}  // namespace centerpoint

+ 67 - 0
src1/perception/perception_lidar_centerpoint/network/scatter_kernel.cu

@@ -0,0 +1,67 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/network/scatter_kernel.hpp"
+
+#include <lidar_centerpoint/utils.hpp>
+
+namespace
+{
+const std::size_t THREADS_PER_BLOCK = 32;
+}  // namespace
+
+namespace centerpoint
+{
+__global__ void scatterFeatures_kernel(
+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
+  const std::size_t pillar_feature_size, const std::size_t grid_size_x,
+  const std::size_t grid_size_y, float * scattered_features)
+{
+  // pillar_features: shape of (max_num_pillars, pillar_feature_size)
+  // coords: shape of (max_num_pillars, 3)
+  // scattered_features: shape of (num_pillars, grid_size_y, grid_size_x)
+  const auto pillar_i = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
+  const auto feature_i = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
+
+  if (pillar_i >= num_pillars || feature_i >= pillar_feature_size) {
+    return;
+  }
+
+  const int3 coord = ((int3 *)coords)[pillar_i];  // zyx
+  if (coord.x < 0) {
+    return;
+  }
+
+  const auto feature = pillar_features[pillar_feature_size * pillar_i + feature_i];
+  scattered_features[grid_size_y * grid_size_x * feature_i + grid_size_x * coord.y + coord.z] =
+    feature;
+}
+
+cudaError_t scatterFeatures_launch(
+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
+  const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
+  const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
+  cudaStream_t stream)
+{
+  dim3 blocks(
+    divup(max_voxel_size, THREADS_PER_BLOCK), divup(encoder_out_feature_size, THREADS_PER_BLOCK));
+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
+  scatterFeatures_kernel<<<blocks, threads, 0, stream>>>(
+    pillar_features, coords, num_pillars, encoder_out_feature_size, grid_size_x, grid_size_y,
+    scattered_features);
+
+  return cudaGetLastError();
+}
+
+}  // namespace centerpoint

+ 154 - 0
src1/perception/perception_lidar_centerpoint/network/tensorrt_wrapper.cpp

@@ -0,0 +1,154 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/network/tensorrt_wrapper.hpp"
+
+#include <NvOnnxParser.h>
+
+#include <fstream>
+#include <memory>
+#include <string>
+
+namespace centerpoint
+{
+TensorRTWrapper::TensorRTWrapper(const CenterPointConfig & config, const bool verbose)
+: config_(config), logger_(Logger(verbose))
+{
+}
+
+bool TensorRTWrapper::init(
+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision)
+{
+  runtime_ = unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
+  if (!runtime_) {
+    std::cout << "Fail to create runtime" << std::endl;
+    return false;
+  }
+
+  bool success;
+  std::ifstream engine_file(engine_path);
+  if (engine_file.is_open()) {
+    success = loadEngine(engine_path);
+  } else {
+    success = parseONNX(onnx_path, engine_path, precision);
+  }
+  success &= createContext();
+
+  return success;
+}
+
+bool TensorRTWrapper::createContext()
+{
+  if (!engine_) {
+    std::cout << "Fail to create context: Engine isn't created" << std::endl;
+    return false;
+  }
+
+  context_ = unique_ptr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
+  if (!context_) {
+    std::cout << "Fail to create context" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool TensorRTWrapper::parseONNX(
+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
+  const size_t workspace_size)
+{
+  auto builder = unique_ptr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
+  if (!builder) {
+    std::cout << "Fail to create builder" << std::endl;
+    return false;
+  }
+
+  auto config = unique_ptr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+  if (!config) {
+    std::cout << "Fail to create config" << std::endl;
+    return false;
+  }
+  config->setMaxWorkspaceSize(workspace_size);
+  if (precision == "fp16") {
+    if (builder->platformHasFastFp16()) {
+      std::cout << "use TensorRT FP16 Inference" << std::endl;
+      config->setFlag(nvinfer1::BuilderFlag::kFP16);
+    } else {
+      std::cout << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
+    }
+  }
+
+  const auto flag =
+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
+  auto network = unique_ptr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
+  if (!network) {
+    std::cout << "Fail to create network" << std::endl;
+    return false;
+  }
+
+  auto parser = unique_ptr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger_));
+  parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR));
+
+  if (!setProfile(*builder, *network, *config)) {
+    std::cout << "Fail to set profile" << std::endl;
+    return false;
+  }
+
+  std::cout << "Applying optimizations and building TRT CUDA engine (" << onnx_path << ") ..."
+            << std::endl;
+  plan_ = unique_ptr<nvinfer1::IHostMemory>(builder->buildSerializedNetwork(*network, *config));
+  if (!plan_) {
+    std::cout << "Fail to create serialized network" << std::endl;
+    return false;
+  }
+  engine_ = unique_ptr<nvinfer1::ICudaEngine>(
+    runtime_->deserializeCudaEngine(plan_->data(), plan_->size()));
+  if (!engine_) {
+    std::cout << "Fail to create engine" << std::endl;
+    return false;
+  }
+
+  return saveEngine(engine_path);
+}
+
+bool TensorRTWrapper::saveEngine(const std::string & engine_path)
+{
+  std::cout << "Writing to " << engine_path << std::endl;
+  std::ofstream file(engine_path, std::ios::out | std::ios::binary);
+  file.write(reinterpret_cast<const char *>(plan_->data()), plan_->size());
+  return true;
+}
+
+bool TensorRTWrapper::loadEngine(const std::string & engine_path)
+{
+  std::ifstream file(engine_path, std::ios::in | std::ios::binary);
+  file.seekg(0, std::ifstream::end);
+  const size_t size = file.tellg();
+  file.seekg(0, std::ifstream::beg);
+
+  std::unique_ptr<char[]> buffer{new char[size]};
+  file.read(buffer.get(), size);
+  file.close();
+
+  if (!runtime_) {
+    std::cout << "Fail to load engine: Runtime isn't created" << std::endl;
+    return false;
+  }
+
+  std::cout << "Loading from " << engine_path << std::endl;
+  engine_ = unique_ptr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(buffer.get(), size));
+  return true;
+}
+
+}  // namespace centerpoint

+ 158 - 0
src1/perception/perception_lidar_centerpoint/perception_lidar_centerpoint.pro

@@ -0,0 +1,158 @@
+QT -= gui
+
+CONFIG += c++17 console
+CONFIG -= app_bundle
+
+# The following define makes your compiler emit warnings if you use
+# any feature of Qt which as been marked deprecated (the exact warnings
+# depend on your compiler). Please consult the documentation of the
+# deprecated API in order to know how to port your code away from it.
+DEFINES += QT_DEPRECATED_WARNINGS
+
+# You can also make your code fail to compile if you use deprecated APIs.
+# In order to do so, uncomment the following line.
+# You can also select to disable deprecated APIs only up to a certain version of Qt.
+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
+
+SOURCES += main.cpp \
+    centerpoint_trt.cpp \
+    network/network_trt.cpp \
+    network/tensorrt_wrapper.cpp \
+    preprocess/pointcloud_densification.cpp \
+    preprocess/voxel_generator.cpp
+
+INCLUDEPATH += $$PWD/include
+INCLUDEPATH += /opt/ros/melodic/include
+
+DISTFILES += \
+    network/scatter_kernel.cu \
+    postprocess/circle_nms_kernel.cu \
+    postprocess/postprocess_kernel.cu \
+    preprocess/preprocess_kernel.cu
+
+CUDA_SOURCES += \
+    network/scatter_kernel.cu \
+    postprocess/circle_nms_kernel.cu \
+    postprocess/postprocess_kernel.cu \
+    preprocess/preprocess_kernel.cu
+
+
+CUDA_SDK = "/usr/local/cuda/"   # cudaSDK路径
+
+CUDA_DIR = "/usr/local/cuda/"            # CUDA tookit路径
+
+SYSTEM_NAME = linux         # 自己系统环境 'Win32', 'x64', or 'Win64'
+
+SYSTEM_TYPE = 64           #操作系统位数 '32' or '64',
+
+CUDA_ARCH = sm_72         # cuda架构, for example 'compute_10', 'compute_11', 'sm_10'
+
+NVCC_OPTIONS = --use_fast_math --compiler-options "-fPIC"
+
+
+# include paths
+
+INCLUDEPATH += $$CUDA_DIR/include
+#INCLUDEPATH += /usr/local/cuda-10.0/targets/aarch64-linux/include/crt
+
+# library directories
+
+QMAKE_LIBDIR += $$CUDA_DIR/lib/
+
+CUDA_OBJECTS_DIR = ./
+
+# The following library conflicts with something in Cuda
+
+#QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib
+
+#QMAKE_LFLAGS_DEBUG   = /NODEFAULTLIB:msvcrtd.lib
+
+# Add the necessary libraries
+
+CUDA_LIBS =  cudart cufft
+
+# The following makes sure all path names (which often include spaces) are put between quotation marks
+
+CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
+
+NVCC_LIBS = $$join(CUDA_LIBS,' -l','-l', '')
+
+#LIBS += $$join(CUDA_LIBS,'.so ', '', '.so')
+
+# Configuration of the Cuda compiler
+
+CONFIG(debug, debug|release) {
+
+    # Debug mode
+
+    cuda_d.input = CUDA_SOURCES
+
+    cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
+
+    cuda_d.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
+
+    cuda_d.dependency_type = TYPE_C
+
+    QMAKE_EXTRA_COMPILERS += cuda_d
+
+}
+
+else {
+
+    # Release mode
+
+    cuda.input = CUDA_SOURCES
+
+    cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
+
+    cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -O3 -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
+
+    cuda.dependency_type = TYPE_C
+
+    QMAKE_EXTRA_COMPILERS += cuda
+
+}
+
+
+LIBS += -L/usr/local/cuda-10.2/targets/aarch64-linux/lib
+
+LIBS += -lcudart -lcufft -lyaml-cpp
+
+#LIBS += -L/home/adc/soft/cudnn-10.2-linux-x64-v7.6.5.32/cuda/lib64 -lcudnn
+
+LIBS +=  -lmyelin -lnvinfer -lnvonnxparser -lnvcaffe_parser
+
+#LIBS += -L/home/nvidia/git/libtorch_gpu-1.6.0-linux-aarch64/lib -ltorch_cuda  -ltorch -lc10 -ltorch_cpu
+
+unix:INCLUDEPATH += /usr/include/eigen3
+unix:INCLUDEPATH += /usr/include/pcl-1.7
+unix:INCLUDEPATH += /usr/include/pcl-1.8
+
+!include(../../../include/common.pri ) {
+    error( "Couldn't find the common.pri file!" )
+}
+
+!include(../../../include/ivprotobuf.pri ) {
+    error( "Couldn't find the ivprotobuf.pri file!" )
+}
+
+LIBS += -lboost_system
+
+unix:LIBS +=  -lpcl_common\
+        -lpcl_features\
+        -lpcl_filters\
+        -lpcl_io\
+        -lpcl_io_ply\
+        -lpcl_kdtree\
+        -lpcl_keypoints\
+        -lpcl_octree\
+        -lpcl_outofcore\
+        -lpcl_people\
+        -lpcl_recognition\
+        -lpcl_registration\
+        -lpcl_sample_consensus\
+        -lpcl_search\
+        -lpcl_segmentation\
+        -lpcl_surface\
+        -lpcl_tracking\
+        -lpcl_visualization

+ 144 - 0
src1/perception/perception_lidar_centerpoint/postprocess/circle_nms_kernel.cu

@@ -0,0 +1,144 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Modified from
+// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu
+
+/*
+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
+Written by Shaoshuai Shi
+All Rights Reserved 2019-2020.
+*/
+
+#include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp"
+
+#include <lidar_centerpoint/cuda_utils.hpp>
+#include <lidar_centerpoint/utils.hpp>
+
+#include <thrust/host_vector.h>
+
+namespace
+{
+const std::size_t THREADS_PER_BLOCK_NMS = 16;
+}  // namespace
+
+namespace centerpoint
+{
+
+__device__ inline float dist2dPow(const Box3D * a, const Box3D * b)
+{
+  return powf(a->x - b->x, 2) + powf(a->y - b->y, 2);
+}
+
+__global__ void circleNMS_Kernel(
+  const Box3D * boxes, const std::size_t num_boxes3d, const std::size_t col_blocks,
+  const float dist2d_pow_threshold, std::uint64_t * mask)
+{
+  // params: boxes (N,)
+  // params: mask (N, divup(N/THREADS_PER_BLOCK_NMS))
+
+  const auto row_start = blockIdx.y;
+  const auto col_start = blockIdx.x;
+
+  if (row_start > col_start) return;
+
+  const std::size_t row_size =
+    fminf(num_boxes3d - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+  const std::size_t col_size =
+    fminf(num_boxes3d - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+
+  __shared__ Box3D block_boxes[THREADS_PER_BLOCK_NMS];
+
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x] = boxes[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const std::size_t cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+    const Box3D * cur_box = boxes + cur_box_idx;
+
+    std::uint64_t t = 0;
+    std::size_t start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (std::size_t i = start; i < col_size; i++) {
+      if (dist2dPow(cur_box, block_boxes + i) < dist2d_pow_threshold) {
+        t |= 1ULL << i;
+      }
+    }
+    mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+cudaError_t circleNMS_launch(
+  const thrust::device_vector<Box3D> & boxes3d, const std::size_t num_boxes3d,
+  std::size_t col_blocks, const float distance_threshold,
+  thrust::device_vector<std::uint64_t> & mask, cudaStream_t stream)
+{
+  const float dist2d_pow_thres = powf(distance_threshold, 2);
+
+  dim3 blocks(col_blocks, col_blocks);
+  dim3 threads(THREADS_PER_BLOCK_NMS);
+  circleNMS_Kernel<<<blocks, threads, 0, stream>>>(
+    thrust::raw_pointer_cast(boxes3d.data()), num_boxes3d, col_blocks, dist2d_pow_thres,
+    thrust::raw_pointer_cast(mask.data()));
+
+  return cudaGetLastError();
+}
+
+std::size_t circleNMS(
+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream)
+{
+  const auto num_boxes3d = boxes3d.size();
+  const auto col_blocks = divup(num_boxes3d, THREADS_PER_BLOCK_NMS);
+  thrust::device_vector<std::uint64_t> mask_d(num_boxes3d * col_blocks);
+
+  CHECK_CUDA_ERROR(
+    circleNMS_launch(boxes3d, num_boxes3d, col_blocks, distance_threshold, mask_d, stream));
+
+  // memcpy device to host
+  thrust::host_vector<std::uint64_t> mask_h(mask_d.size());
+  thrust::copy(mask_d.begin(), mask_d.end(), mask_h.begin());
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream));
+
+  // generate keep_mask
+  std::vector<std::uint64_t> remv_h(col_blocks);
+  thrust::host_vector<bool> keep_mask_h(keep_mask.size());
+  std::size_t num_to_keep = 0;
+  for (std::size_t i = 0; i < num_boxes3d; i++) {
+    auto nblock = i / THREADS_PER_BLOCK_NMS;
+    auto inblock = i % THREADS_PER_BLOCK_NMS;
+
+    if (!(remv_h[nblock] & (1ULL << inblock))) {
+      keep_mask_h[i] = true;
+      num_to_keep++;
+      std::uint64_t * p = &mask_h[0] + i * col_blocks;
+      for (std::size_t j = nblock; j < col_blocks; j++) {
+        remv_h[j] |= p[j];
+      }
+    } else {
+      keep_mask_h[i] = false;
+    }
+  }
+
+  // memcpy host to device
+  keep_mask = keep_mask_h;
+
+  return num_to_keep;
+}
+
+}  // namespace centerpoint

+ 160 - 0
src1/perception/perception_lidar_centerpoint/postprocess/postprocess_kernel.cu

@@ -0,0 +1,160 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp"
+
+#include <lidar_centerpoint/postprocess/postprocess_kernel.hpp>
+
+#include <thrust/count.h>
+#include <thrust/sort.h>
+
+namespace
+{
+const std::size_t THREADS_PER_BLOCK = 32;
+}  // namespace
+
+namespace centerpoint
+{
+
+struct is_score_greater
+{
+  is_score_greater(float t) : t_(t) {}
+
+  __device__ bool operator()(const Box3D & b) { return b.score > t_; }
+
+private:
+  float t_{0.0};
+};
+
+struct is_kept
+{
+  __device__ bool operator()(const bool keep) { return keep; }
+};
+
+struct score_greater
+{
+  __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; }
+};
+
+__device__ inline float sigmoid(float x) { return 1.0f / expf(-x); }
+
+__global__ void generateBoxes3D_kernel(
+  const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
+  const float * out_rot, const float * out_vel, const float voxel_size_x, const float voxel_size_y,
+  const float range_min_x, const float range_min_y, const std::size_t down_grid_size_x,
+  const std::size_t down_grid_size_y, const std::size_t downsample_factor, const int class_size,
+  Box3D * det_boxes3d)
+{
+  // generate boxes3d from the outputs of the network.
+  // shape of out_*: (N, DOWN_GRID_SIZE_Y, DOWN_GRID_SIZE_X)
+  // heatmap: N = class_size, offset: N = 2, z: N = 1, dim: N = 3, rot: N = 2, vel: N = 2
+  const auto yi = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
+  const auto xi = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
+  const auto idx = down_grid_size_x * yi + xi;
+  const auto down_grid_size = down_grid_size_y * down_grid_size_x;
+
+  if (yi >= down_grid_size_y || xi >= down_grid_size_x) {
+    return;
+  }
+
+  int label = -1;
+  float max_score = -1;
+  for (int ci = 0; ci < class_size; ci++) {
+    float score = sigmoid(out_heatmap[down_grid_size * ci + idx]);
+    if (score > max_score) {
+      label = ci;
+      max_score = score;
+    }
+  }
+
+  const float offset_x = out_offset[down_grid_size * 0 + idx];
+  const float offset_y = out_offset[down_grid_size * 1 + idx];
+  const float x = voxel_size_x * downsample_factor * (xi + offset_x) + range_min_x;
+  const float y = voxel_size_y * downsample_factor * (yi + offset_y) + range_min_y;
+  const float z = out_z[idx];
+  const float w = out_dim[down_grid_size * 0 + idx];
+  const float l = out_dim[down_grid_size * 1 + idx];
+  const float h = out_dim[down_grid_size * 2 + idx];
+  const float yaw_sin = out_rot[down_grid_size * 0 + idx];
+  const float yaw_cos = out_rot[down_grid_size * 1 + idx];
+  const float vel_x = out_vel[down_grid_size * 0 + idx];
+  const float vel_y = out_vel[down_grid_size * 1 + idx];
+
+  det_boxes3d[idx].label = label;
+  det_boxes3d[idx].score = max_score;
+  det_boxes3d[idx].x = x;
+  det_boxes3d[idx].y = y;
+  det_boxes3d[idx].z = z;
+  det_boxes3d[idx].length = expf(l);
+  det_boxes3d[idx].width = expf(w);
+  det_boxes3d[idx].height = expf(h);
+  det_boxes3d[idx].yaw = atan2f(yaw_sin, yaw_cos);
+  det_boxes3d[idx].vel_x = vel_x;
+  det_boxes3d[idx].vel_y = vel_y;
+}
+
+PostProcessCUDA::PostProcessCUDA(const CenterPointConfig & config) : config_(config)
+{
+  const auto num_raw_boxes3d = config.down_grid_size_y_ * config.down_grid_size_x_;
+  boxes3d_d_ = thrust::device_vector<Box3D>(num_raw_boxes3d);
+}
+
+cudaError_t PostProcessCUDA::generateDetectedBoxes3D_launch(
+  const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
+  const float * out_rot, const float * out_vel, std::vector<Box3D> & det_boxes3d,
+  cudaStream_t stream)
+{
+  dim3 blocks(
+    divup(config_.down_grid_size_y_, THREADS_PER_BLOCK),
+    divup(config_.down_grid_size_x_, THREADS_PER_BLOCK));
+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
+  generateBoxes3D_kernel<<<blocks, threads, 0, stream>>>(
+    out_heatmap, out_offset, out_z, out_dim, out_rot, out_vel, config_.voxel_size_x_,
+    config_.voxel_size_y_, config_.range_min_x_, config_.range_min_y_, config_.down_grid_size_x_,
+    config_.down_grid_size_y_, config_.downsample_factor_, config_.class_size_,
+    thrust::raw_pointer_cast(boxes3d_d_.data()));
+
+  // suppress by socre
+  const auto num_det_boxes3d = thrust::count_if(
+    thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(),
+    is_score_greater(config_.score_threshold_));
+  if (num_det_boxes3d == 0) {
+    return cudaGetLastError();
+  }
+  thrust::device_vector<Box3D> det_boxes3d_d(num_det_boxes3d);
+  thrust::copy_if(
+    thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), det_boxes3d_d.begin(),
+    is_score_greater(config_.score_threshold_));
+
+  // sort by score
+  thrust::sort(det_boxes3d_d.begin(), det_boxes3d_d.end(), score_greater());
+
+  // supress by NMS
+  thrust::device_vector<bool> final_keep_mask_d(num_det_boxes3d);
+  const auto num_final_det_boxes3d =
+    circleNMS(det_boxes3d_d, config_.circle_nms_dist_threshold_, final_keep_mask_d, stream);
+
+  thrust::device_vector<Box3D> final_det_boxes3d_d(num_final_det_boxes3d);
+  thrust::copy_if(
+    thrust::device, det_boxes3d_d.begin(), det_boxes3d_d.end(), final_keep_mask_d.begin(),
+    final_det_boxes3d_d.begin(), is_kept());
+
+  // memcpy device to host
+  det_boxes3d.resize(num_final_det_boxes3d);
+  thrust::copy(final_det_boxes3d_d.begin(), final_det_boxes3d_d.end(), det_boxes3d.begin());
+
+  return cudaGetLastError();
+}
+
+}  // namespace centerpoint

+ 97 - 0
src1/perception/perception_lidar_centerpoint/preprocess/pointcloud_densification.cpp

@@ -0,0 +1,97 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/preprocess/pointcloud_densification.hpp"
+
+#include <pcl_ros/transforms.hpp>
+
+#include <boost/optional.hpp>
+
+#include <pcl_conversions/pcl_conversions.h>
+#ifdef ROS_DISTRO_GALACTIC
+#include <tf2_eigen/tf2_eigen.h>
+#else
+#include <tf2_eigen/tf2_eigen.hpp>
+#endif
+
+#include <string>
+#include <utility>
+
+namespace
+{
+boost::optional<geometry_msgs::msg::Transform> getTransform(
+  const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id,
+  const std::string & source_frame_id, const rclcpp::Time & time)
+{
+  try {
+    geometry_msgs::msg::TransformStamped transform_stamped;
+    transform_stamped = tf_buffer.lookupTransform(
+      target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5));
+    return transform_stamped.transform;
+  } catch (tf2::TransformException & ex) {
+    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_centerpoint"), ex.what());
+    return boost::none;
+  }
+}
+
+Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t)
+{
+  Eigen::Affine3f a;
+  a.matrix() = tf2::transformToEigen(t).matrix().cast<float>();
+  return a;
+}
+
+}  // namespace
+
+namespace centerpoint
+{
+PointCloudDensification::PointCloudDensification(const DensificationParam & param) : param_(param)
+{
+}
+
+bool PointCloudDensification::enqueuePointCloud(
+  const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
+{
+  const auto header = pointcloud_msg.header;
+
+  auto transform_world2current =
+    getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp);
+  if (!transform_world2current) {
+    return false;
+  }
+  auto affine_world2current = transformToEigen(transform_world2current.get());
+
+  enqueue(pointcloud_msg, affine_world2current);
+  dequeue();
+
+  return true;
+}
+
+void PointCloudDensification::enqueue(
+  const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current)
+{
+  affine_world2current_ = affine_world2current;
+  current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
+  PointCloudWithTransform pointcloud = {msg, affine_world2current.inverse()};
+  pointcloud_cache_.push_front(pointcloud);
+}
+
+void PointCloudDensification::dequeue()
+{
+  if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) {
+    pointcloud_cache_.pop_back();
+  }
+}
+
+}  // namespace centerpoint

+ 159 - 0
src1/perception/perception_lidar_centerpoint/preprocess/preprocess_kernel.cu

@@ -0,0 +1,159 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lidar_centerpoint/preprocess/preprocess_kernel.hpp"
+
+#include <lidar_centerpoint/utils.hpp>
+
+namespace
+{
+const std::size_t MAX_POINT_IN_VOXEL_SIZE = 32;  // the same as max_point_in_voxel_size_ in config
+const std::size_t WARPS_PER_BLOCK = 4;
+const std::size_t ENCODER_IN_FEATURE_SIZE = 9;  // the same as encoder_in_feature_size_ in config
+}  // namespace
+
+namespace centerpoint
+{
+__global__ void generateFeatures_kernel(
+  const float * voxel_features, const float * voxel_num_points, const int * coords,
+  const std::size_t num_voxels, const float voxel_x, const float voxel_y, const float voxel_z,
+  const float range_min_x, const float range_min_y, const float range_min_z, float * features)
+{
+  // voxel_features (float): (max_voxel_size, max_point_in_voxel_size, point_feature_size)
+  // voxel_num_points (int): (max_voxel_size)
+  // coords (int): (max_voxel_size, point_dim_size)
+  int pillar_idx = blockIdx.x * WARPS_PER_BLOCK + threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;
+  int point_idx = threadIdx.x % MAX_POINT_IN_VOXEL_SIZE;
+  int pillar_idx_inBlock = threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;  // max_point_in_voxel_size
+
+  if (pillar_idx >= num_voxels) return;
+
+  // load src
+  __shared__ float4 pillarSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE];
+  __shared__ float3 pillarSumSM[WARPS_PER_BLOCK];
+  __shared__ int3 cordsSM[WARPS_PER_BLOCK];
+  __shared__ int pointsNumSM[WARPS_PER_BLOCK];
+  __shared__ float pillarOutSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE][ENCODER_IN_FEATURE_SIZE];
+
+  if (threadIdx.x < WARPS_PER_BLOCK) {
+    pointsNumSM[threadIdx.x] = voxel_num_points[blockIdx.x * WARPS_PER_BLOCK + threadIdx.x];
+    cordsSM[threadIdx.x] = ((int3 *)coords)[blockIdx.x * WARPS_PER_BLOCK + threadIdx.x];
+    pillarSumSM[threadIdx.x] = {0, 0, 0};
+  }
+
+  pillarSM[pillar_idx_inBlock][point_idx] =
+    ((float4 *)voxel_features)[pillar_idx * MAX_POINT_IN_VOXEL_SIZE + point_idx];
+  __syncthreads();
+
+  // calculate sm in a pillar
+  if (point_idx < pointsNumSM[pillar_idx_inBlock]) {
+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].x), pillarSM[pillar_idx_inBlock][point_idx].x);
+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].y), pillarSM[pillar_idx_inBlock][point_idx].y);
+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].z), pillarSM[pillar_idx_inBlock][point_idx].z);
+  }
+  __syncthreads();
+
+  // feature-mean
+  float3 mean;
+  float validPoints = pointsNumSM[pillar_idx_inBlock];
+  mean.x = pillarSumSM[pillar_idx_inBlock].x / validPoints;
+  mean.y = pillarSumSM[pillar_idx_inBlock].y / validPoints;
+  mean.z = pillarSumSM[pillar_idx_inBlock].z / validPoints;
+
+  mean.x = pillarSM[pillar_idx_inBlock][point_idx].x - mean.x;
+  mean.y = pillarSM[pillar_idx_inBlock][point_idx].y - mean.y;
+  mean.z = pillarSM[pillar_idx_inBlock][point_idx].z - mean.z;
+
+  // calculate offset
+  float x_offset = voxel_x / 2 + cordsSM[pillar_idx_inBlock].z * voxel_x + range_min_x;
+  float y_offset = voxel_y / 2 + cordsSM[pillar_idx_inBlock].y * voxel_y + range_min_y;
+  float z_offset = voxel_z / 2 + cordsSM[pillar_idx_inBlock].x * voxel_z + range_min_z;
+
+  // feature-offset
+  float3 center;
+  center.x = pillarSM[pillar_idx_inBlock][point_idx].x - x_offset;
+  center.y = pillarSM[pillar_idx_inBlock][point_idx].y - y_offset;
+  center.z = pillarSM[pillar_idx_inBlock][point_idx].z - z_offset;
+
+  // store output
+  if (point_idx < pointsNumSM[pillar_idx_inBlock]) {
+    pillarOutSM[pillar_idx_inBlock][point_idx][0] = pillarSM[pillar_idx_inBlock][point_idx].x;
+    pillarOutSM[pillar_idx_inBlock][point_idx][1] = pillarSM[pillar_idx_inBlock][point_idx].y;
+    pillarOutSM[pillar_idx_inBlock][point_idx][2] = pillarSM[pillar_idx_inBlock][point_idx].z;
+    pillarOutSM[pillar_idx_inBlock][point_idx][3] = pillarSM[pillar_idx_inBlock][point_idx].w;
+
+    pillarOutSM[pillar_idx_inBlock][point_idx][4] = mean.x;
+    pillarOutSM[pillar_idx_inBlock][point_idx][5] = mean.y;
+    pillarOutSM[pillar_idx_inBlock][point_idx][6] = mean.z;
+
+    pillarOutSM[pillar_idx_inBlock][point_idx][7] = center.x;
+    pillarOutSM[pillar_idx_inBlock][point_idx][8] = center.y;
+
+  } else {
+    pillarOutSM[pillar_idx_inBlock][point_idx][0] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][1] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][2] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][3] = 0;
+
+    pillarOutSM[pillar_idx_inBlock][point_idx][4] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][5] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][6] = 0;
+
+    pillarOutSM[pillar_idx_inBlock][point_idx][7] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][8] = 0;
+  }
+
+  __syncthreads();
+
+  for (int i = 0; i < ENCODER_IN_FEATURE_SIZE; i++) {
+    int outputSMId = pillar_idx_inBlock * MAX_POINT_IN_VOXEL_SIZE * ENCODER_IN_FEATURE_SIZE +
+                     i * MAX_POINT_IN_VOXEL_SIZE + point_idx;
+    int outputId = pillar_idx * MAX_POINT_IN_VOXEL_SIZE * ENCODER_IN_FEATURE_SIZE +
+                   i * MAX_POINT_IN_VOXEL_SIZE + point_idx;
+    features[outputId] = ((float *)pillarOutSM)[outputSMId];
+  }
+}
+
+cudaError_t generateFeatures_launch(
+  const float * voxel_features, const float * voxel_num_points, const int * coords,
+  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
+  const float voxel_size_y, const float voxel_size_z, const float range_min_x,
+  const float range_min_y, const float range_min_z, float * features, cudaStream_t stream)
+{
+  dim3 blocks(divup(max_voxel_size, WARPS_PER_BLOCK));
+  dim3 threads(WARPS_PER_BLOCK * MAX_POINT_IN_VOXEL_SIZE);
+  generateFeatures_kernel<<<blocks, threads, 0, stream>>>(
+    voxel_features, voxel_num_points, coords, num_voxels, voxel_size_x, voxel_size_y, voxel_size_z,
+    range_min_x, range_min_y, range_min_z, features);
+
+  return cudaGetLastError();
+}
+
+}  // namespace centerpoint

+ 130 - 0
src1/perception/perception_lidar_centerpoint/preprocess/voxel_generator.cpp

@@ -0,0 +1,130 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/preprocess/voxel_generator.hpp"
+
+#include <sensor_msgs/point_cloud2_iterator.hpp>
+
+namespace centerpoint
+{
+VoxelGeneratorTemplate::VoxelGeneratorTemplate(
+  const DensificationParam & param, const CenterPointConfig & config)
+: config_(config)
+{
+  pd_ptr_ = std::make_unique<PointCloudDensification>(param);
+  range_[0] = config.range_min_x_;
+  range_[1] = config.range_min_y_;
+  range_[2] = config.range_min_z_;
+  range_[3] = config.range_max_x_;
+  range_[4] = config.range_max_y_;
+  range_[5] = config.range_max_z_;
+  grid_size_[0] = config.grid_size_x_;
+  grid_size_[1] = config.grid_size_y_;
+  grid_size_[2] = config.grid_size_z_;
+  recip_voxel_size_[0] = 1 / config.voxel_size_x_;
+  recip_voxel_size_[1] = 1 / config.voxel_size_y_;
+  recip_voxel_size_[2] = 1 / config.voxel_size_z_;
+}
+
+bool VoxelGeneratorTemplate::enqueuePointCloud(
+  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
+{
+  return pd_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer);
+}
+
+std::size_t VoxelGenerator::pointsToVoxels(
+  std::vector<float> & voxels, std::vector<int> & coordinates,
+  std::vector<float> & num_points_per_voxel)
+{
+  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
+  // coordinates (int): (max_voxel_size * point_dim_size)
+  // num_points_per_voxel (float): (max_voxel_size)
+
+  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
+  std::vector<int> coord_to_voxel_idx(grid_size, -1);
+
+  std::size_t voxel_cnt = 0;  // @return
+  std::vector<float> point;
+  point.resize(config_.point_feature_size_);
+  std::vector<float> coord_zyx;
+  coord_zyx.resize(config_.point_dim_size_);
+  bool out_of_range;
+  std::size_t point_cnt;
+  int c, coord_idx, voxel_idx;
+  Eigen::Vector3f point_current, point_past;
+
+  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
+       pc_cache_iter++) {
+    auto pc_msg = pc_cache_iter->pointcloud_msg;
+    auto affine_past2current =
+      pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world;
+    float timelag = static_cast<float>(
+      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_msg.header.stamp).seconds());
+
+    for (sensor_msgs::PointCloud2ConstIterator<float> x_iter(pc_msg, "x"), y_iter(pc_msg, "y"),
+         z_iter(pc_msg, "z");
+         x_iter != x_iter.end(); ++x_iter, ++y_iter, ++z_iter) {
+      point_past << *x_iter, *y_iter, *z_iter;
+      point_current = affine_past2current * point_past;
+
+      point[0] = point_current.x();
+      point[1] = point_current.y();
+      point[2] = point_current.z();
+      point[3] = timelag;
+
+      out_of_range = false;
+      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
+        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
+        if (c < 0 || c >= grid_size_[di]) {
+          out_of_range = true;
+          break;
+        }
+        coord_zyx[config_.point_dim_size_ - di - 1] = c;
+      }
+      if (out_of_range) {
+        continue;
+      }
+
+      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
+                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
+      voxel_idx = coord_to_voxel_idx[coord_idx];
+      if (voxel_idx == -1) {
+        voxel_idx = voxel_cnt;
+        if (voxel_cnt >= config_.max_voxel_size_) {
+          continue;
+        }
+
+        voxel_cnt++;
+        coord_to_voxel_idx[coord_idx] = voxel_idx;
+        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
+          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
+        }
+      }
+
+      point_cnt = num_points_per_voxel[voxel_idx];
+      if (point_cnt < config_.max_point_in_voxel_size_) {
+        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
+          voxels
+            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
+             point_cnt * config_.point_feature_size_ + fi] = point[fi];
+        }
+        num_points_per_voxel[voxel_idx]++;
+      }
+    }
+  }
+
+  return voxel_cnt;
+}
+
+}  // namespace centerpoint