2 years ago · e2ead73810
--- a/src/driver/vtd_pilot_if/vtd_pilot.cpp
+++ b/src/driver/vtd_pilot_if/vtd_pilot.cpp
@@ -144,6 +144,7 @@ void vtd_pilot::ConvertToObjectArray(std::vector<iv::vtd::rdbdata> &xvectorrdbda
 
				                     dimy = xobj.base.geo.dimY;
			
 
				                     dimz = xobj.base.geo.dimZ;
			
 
				   //                  std::cout<<" dimx: "<<dimx<<" dimy: "<<dimy<<std::endl;
			
 
				+                    std::cout<<"relx : "<<relx<<" rely: "<<rely<<std::endl;
			
 
				                     double xoff,yoff;
			
 
				                     std::cout<<" obj hedad: "<<xobj.base.pos.h<<" our head: "<<mfHeading<<" rel heading: "<<fobjheading<<std::endl;
			
 
				                     for(xoff = (dimx*(-0.5));xoff<=(dimx*0.5);xoff=xoff+0.2)
			
@@ -151,12 +152,12 @@ void vtd_pilot::ConvertToObjectArray(std::vector<iv::vtd::rdbdata> &xvectorrdbda
 
				                         for(yoff = (dimy*(-0.5));yoff<=(dimy*0.5);yoff = yoff+0.2)
			
 
				                         {
			
 
				                             double reloffx,reloffy;
			
 
				-                            if(fabs(fobjheading) < 0.1)
			
 
				-                            {
			
 
				-                                for(int m=0;m<1000;m++)
			
 
				-                                std::cout<<" hello;"<<std::endl;
			
 
				+//                            if(fabs(fobjheading) < 0.1)
			
 
				+//                            {
			
 
				+//                                for(int m=0;m<1000;m++)
			
 
				+//                                std::cout<<" hello;"<<std::endl;
			
 
				 
			
 
				-                            }
			
 
				+//                            }
			
 
				     //                        fobjheading = 0;
			
 
				     //                        std::cout<<"xoff: "<<xoff<<" yoff: "<<yoff<<std::endl;
			
 
				                             reloffx = xoff*cos(fobjheading) - yoff*sin(fobjheading) + relx;
			
--- a/src1/perception/perception_lidar_centerpoint/.gitignore
+++ b/src1/perception/perception_lidar_centerpoint/.gitignore
@@ -0,0 +1,73 @@
 
				+# This file is used to ignore files which are generated
			
 
				+# ----------------------------------------------------------------------------
			
 
				+
			
 
				+*~
			
 
				+*.autosave
			
 
				+*.a
			
 
				+*.core
			
 
				+*.moc
			
 
				+*.o
			
 
				+*.obj
			
 
				+*.orig
			
 
				+*.rej
			
 
				+*.so
			
 
				+*.so.*
			
 
				+*_pch.h.cpp
			
 
				+*_resource.rc
			
 
				+*.qm
			
 
				+.#*
			
 
				+*.*#
			
 
				+core
			
 
				+!core/
			
 
				+tags
			
 
				+.DS_Store
			
 
				+.directory
			
 
				+*.debug
			
 
				+Makefile*
			
 
				+*.prl
			
 
				+*.app
			
 
				+moc_*.cpp
			
 
				+ui_*.h
			
 
				+qrc_*.cpp
			
 
				+Thumbs.db
			
 
				+*.res
			
 
				+*.rc
			
 
				+/.qmake.cache
			
 
				+/.qmake.stash
			
 
				+
			
 
				+# qtcreator generated files
			
 
				+*.pro.user*
			
 
				+
			
 
				+# xemacs temporary files
			
 
				+*.flc
			
 
				+
			
 
				+# Vim temporary files
			
 
				+.*.swp
			
 
				+
			
 
				+# Visual Studio generated files
			
 
				+*.ib_pdb_index
			
 
				+*.idb
			
 
				+*.ilk
			
 
				+*.pdb
			
 
				+*.sln
			
 
				+*.suo
			
 
				+*.vcproj
			
 
				+*vcproj.*.*.user
			
 
				+*.ncb
			
 
				+*.sdf
			
 
				+*.opensdf
			
 
				+*.vcxproj
			
 
				+*vcxproj.*
			
 
				+
			
 
				+# MinGW generated files
			
 
				+*.Debug
			
 
				+*.Release
			
 
				+
			
 
				+# Python byte code
			
 
				+*.pyc
			
 
				+
			
 
				+# Binaries
			
 
				+# --------
			
 
				+*.dll
			
 
				+*.exe
			
 
				+
			
--- a/src1/perception/perception_lidar_centerpoint/Readme.md
+++ b/src1/perception/perception_lidar_centerpoint/Readme.md
@@ -0,0 +1 @@
 
				+from autowrare, but not compile ok in agx xvier.
			
--- a/src1/perception/perception_lidar_centerpoint/centerpoint_trt.cpp
+++ b/src1/perception/perception_lidar_centerpoint/centerpoint_trt.cpp
@@ -0,0 +1,196 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/centerpoint_trt.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+#include <lidar_centerpoint/network/scatter_kernel.hpp>
			
 
				+#include <lidar_centerpoint/preprocess/preprocess_kernel.hpp>
			
 
				+//#include <tier4_autoware_utils/math/constants.hpp>
			
 
				+
			
 
				+#include <iostream>
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+CenterPointTRT::CenterPointTRT(
			
 
				+  const NetworkParam & encoder_param, const NetworkParam & head_param,
			
 
				+  const DensificationParam & densification_param, const CenterPointConfig & config)
			
 
				+: config_(config)
			
 
				+{
			
 
				+  vg_ptr_ = std::make_unique<VoxelGenerator>(densification_param, config_);
			
 
				+  post_proc_ptr_ = std::make_unique<PostProcessCUDA>(config_);
			
 
				+
			
 
				+  // encoder
			
 
				+  encoder_trt_ptr_ = std::make_unique<VoxelEncoderTRT>(config_, verbose_);
			
 
				+  encoder_trt_ptr_->init(
			
 
				+    encoder_param.onnx_path(), encoder_param.engine_path(), encoder_param.trt_precision());
			
 
				+  encoder_trt_ptr_->context_->setBindingDimensions(
			
 
				+    0,
			
 
				+    nvinfer1::Dims3(
			
 
				+      config_.max_voxel_size_, config_.max_point_in_voxel_size_, config_.encoder_in_feature_size_));
			
 
				+
			
 
				+  // head
			
 
				+  std::vector<std::size_t> out_channel_sizes = {
			
 
				+    config_.class_size_,        config_.head_out_offset_size_, config_.head_out_z_size_,
			
 
				+    config_.head_out_dim_size_, config_.head_out_rot_size_,    config_.head_out_vel_size_};
			
 
				+  head_trt_ptr_ = std::make_unique<HeadTRT>(out_channel_sizes, config_, verbose_);
			
 
				+  head_trt_ptr_->init(head_param.onnx_path(), head_param.engine_path(), head_param.trt_precision());
			
 
				+  head_trt_ptr_->context_->setBindingDimensions(
			
 
				+    0, nvinfer1::Dims4(
			
 
				+         config_.batch_size_, config_.encoder_out_feature_size_, config_.grid_size_y_,
			
 
				+         config_.grid_size_x_));
			
 
				+
			
 
				+  initPtr();
			
 
				+
			
 
				+  cudaStreamCreate(&stream_);
			
 
				+}
			
 
				+
			
 
				+CenterPointTRT::~CenterPointTRT()
			
 
				+{
			
 
				+  if (stream_) {
			
 
				+    cudaStreamSynchronize(stream_);
			
 
				+    cudaStreamDestroy(stream_);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void CenterPointTRT::initPtr()
			
 
				+{
			
 
				+  const auto voxels_size =
			
 
				+    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
			
 
				+  const auto coordinates_size = config_.max_voxel_size_ * config_.point_dim_size_;
			
 
				+  encoder_in_feature_size_ =
			
 
				+    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_;
			
 
				+  const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_;
			
 
				+  spatial_features_size_ =
			
 
				+    config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_;
			
 
				+  const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_;
			
 
				+
			
 
				+  // host
			
 
				+  voxels_.resize(voxels_size);
			
 
				+  coordinates_.resize(coordinates_size);
			
 
				+  num_points_per_voxel_.resize(config_.max_voxel_size_);
			
 
				+
			
 
				+  // device
			
 
				+  voxels_d_ = cuda::make_unique<float[]>(voxels_size);
			
 
				+  coordinates_d_ = cuda::make_unique<int[]>(coordinates_size);
			
 
				+  num_points_per_voxel_d_ = cuda::make_unique<float[]>(config_.max_voxel_size_);
			
 
				+  encoder_in_features_d_ = cuda::make_unique<float[]>(encoder_in_feature_size_);
			
 
				+  pillar_features_d_ = cuda::make_unique<float[]>(pillar_features_size);
			
 
				+  spatial_features_d_ = cuda::make_unique<float[]>(spatial_features_size_);
			
 
				+  head_out_heatmap_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.class_size_);
			
 
				+  head_out_offset_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_offset_size_);
			
 
				+  head_out_z_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_z_size_);
			
 
				+  head_out_dim_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_dim_size_);
			
 
				+  head_out_rot_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_rot_size_);
			
 
				+  head_out_vel_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_vel_size_);
			
 
				+}
			
 
				+
			
 
				+bool CenterPointTRT::detect(
			
 
				+  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer,
			
 
				+  std::vector<Box3D> & det_boxes3d)
			
 
				+{
			
 
				+  std::fill(voxels_.begin(), voxels_.end(), 0);
			
 
				+  std::fill(coordinates_.begin(), coordinates_.end(), -1);
			
 
				+  std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
			
 
				+  CHECK_CUDA_ERROR(cudaMemsetAsync(
			
 
				+    encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
			
 
				+  CHECK_CUDA_ERROR(
			
 
				+    cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
			
 
				+
			
 
				+  if (!preprocess(input_pointcloud_msg, tf_buffer)) {
			
 
				+    RCLCPP_WARN_STREAM(
			
 
				+      rclcpp::get_logger("lidar_centerpoint"), "Fail to preprocess and skip to detect.");
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  inference();
			
 
				+
			
 
				+  postProcess(det_boxes3d);
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool CenterPointTRT::preprocess(
			
 
				+  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
			
 
				+{
			
 
				+  bool is_success = vg_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer);
			
 
				+  if (!is_success) {
			
 
				+    return false;
			
 
				+  }
			
 
				+  num_voxels_ = vg_ptr_->pointsToVoxels(voxels_, coordinates_, num_points_per_voxel_);
			
 
				+  if (num_voxels_ == 0) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  const auto voxels_size =
			
 
				+    num_voxels_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
			
 
				+  const auto coordinates_size = num_voxels_ * config_.point_dim_size_;
			
 
				+  // memcpy from host to device (not copy empty voxels)
			
 
				+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				+    voxels_d_.get(), voxels_.data(), voxels_size * sizeof(float), cudaMemcpyHostToDevice));
			
 
				+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				+    coordinates_d_.get(), coordinates_.data(), coordinates_size * sizeof(int),
			
 
				+    cudaMemcpyHostToDevice));
			
 
				+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				+    num_points_per_voxel_d_.get(), num_points_per_voxel_.data(), num_voxels_ * sizeof(float),
			
 
				+    cudaMemcpyHostToDevice));
			
 
				+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
			
 
				+
			
 
				+  CHECK_CUDA_ERROR(generateFeatures_launch(
			
 
				+    voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_,
			
 
				+    config_.max_voxel_size_, config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_,
			
 
				+    config_.range_min_x_, config_.range_min_y_, config_.range_min_z_, encoder_in_features_d_.get(),
			
 
				+    stream_));
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+void CenterPointTRT::inference()
			
 
				+{
			
 
				+  if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) {
			
 
				+    throw std::runtime_error("Failed to create tensorrt context.");
			
 
				+  }
			
 
				+
			
 
				+  // pillar encoder network
			
 
				+  std::vector<void *> encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()};
			
 
				+  encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr);
			
 
				+
			
 
				+  // scatter
			
 
				+  CHECK_CUDA_ERROR(scatterFeatures_launch(
			
 
				+    pillar_features_d_.get(), coordinates_d_.get(), num_voxels_, config_.max_voxel_size_,
			
 
				+    config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_,
			
 
				+    spatial_features_d_.get(), stream_));
			
 
				+
			
 
				+  // head network
			
 
				+  std::vector<void *> head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(),
			
 
				+                                      head_out_offset_d_.get(),  head_out_z_d_.get(),
			
 
				+                                      head_out_dim_d_.get(),     head_out_rot_d_.get(),
			
 
				+                                      head_out_vel_d_.get()};
			
 
				+  head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr);
			
 
				+}
			
 
				+
			
 
				+void CenterPointTRT::postProcess(std::vector<Box3D> & det_boxes3d)
			
 
				+{
			
 
				+  CHECK_CUDA_ERROR(post_proc_ptr_->generateDetectedBoxes3D_launch(
			
 
				+    head_out_heatmap_d_.get(), head_out_offset_d_.get(), head_out_z_d_.get(), head_out_dim_d_.get(),
			
 
				+    head_out_rot_d_.get(), head_out_vel_d_.get(), det_boxes3d, stream_));
			
 
				+  if (det_boxes3d.size() == 0) {
			
 
				+    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_centerpoint"), "No detected boxes.");
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp
@@ -0,0 +1,114 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_
			
 
				+#define LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_
			
 
				+
			
 
				+#include <cstddef>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class CenterPointConfig
			
 
				+{
			
 
				+public:
			
 
				+  explicit CenterPointConfig(
			
 
				+    const std::size_t class_size, const float point_feature_size, const std::size_t max_voxel_size,
			
 
				+    const std::vector<double> & point_cloud_range, const std::vector<double> & voxel_size,
			
 
				+    const std::size_t downsample_factor, const std::size_t encoder_in_feature_size,
			
 
				+    const float score_threshold, const float circle_nms_dist_threshold)
			
 
				+  {
			
 
				+    class_size_ = class_size;
			
 
				+    point_feature_size_ = point_feature_size;
			
 
				+    max_voxel_size_ = max_voxel_size;
			
 
				+    if (point_cloud_range.size() == 6) {
			
 
				+      range_min_x_ = static_cast<float>(point_cloud_range[0]);
			
 
				+      range_min_y_ = static_cast<float>(point_cloud_range[1]);
			
 
				+      range_min_z_ = static_cast<float>(point_cloud_range[2]);
			
 
				+      range_max_x_ = static_cast<float>(point_cloud_range[3]);
			
 
				+      range_max_y_ = static_cast<float>(point_cloud_range[4]);
			
 
				+      range_max_z_ = static_cast<float>(point_cloud_range[5]);
			
 
				+    }
			
 
				+    if (voxel_size.size() == 3) {
			
 
				+      voxel_size_x_ = static_cast<float>(voxel_size[0]);
			
 
				+      voxel_size_y_ = static_cast<float>(voxel_size[1]);
			
 
				+      voxel_size_z_ = static_cast<float>(voxel_size[2]);
			
 
				+    }
			
 
				+    downsample_factor_ = downsample_factor;
			
 
				+    encoder_in_feature_size_ = encoder_in_feature_size;
			
 
				+
			
 
				+    if (score_threshold > 0 && score_threshold < 1) {
			
 
				+      score_threshold_ = score_threshold;
			
 
				+    }
			
 
				+
			
 
				+    if (circle_nms_dist_threshold > 0) {
			
 
				+      circle_nms_dist_threshold_ = circle_nms_dist_threshold;
			
 
				+    }
			
 
				+
			
 
				+    grid_size_x_ = static_cast<std::size_t>((range_max_x_ - range_min_x_) / voxel_size_x_);
			
 
				+    grid_size_y_ = static_cast<std::size_t>((range_max_y_ - range_min_y_) / voxel_size_y_);
			
 
				+    grid_size_z_ = static_cast<std::size_t>((range_max_z_ - range_min_z_) / voxel_size_z_);
			
 
				+    offset_x_ = range_min_x_ + voxel_size_x_ / 2;
			
 
				+    offset_y_ = range_min_y_ + voxel_size_y_ / 2;
			
 
				+    offset_z_ = range_min_z_ + voxel_size_z_ / 2;
			
 
				+    down_grid_size_x_ = grid_size_x_ / downsample_factor_;
			
 
				+    down_grid_size_y_ = grid_size_y_ / downsample_factor_;
			
 
				+  };
			
 
				+
			
 
				+  // input params
			
 
				+  std::size_t class_size_{3};
			
 
				+  const std::size_t point_dim_size_{3};  // x, y and z
			
 
				+  std::size_t point_feature_size_{4};    // x, y, z and timelag
			
 
				+  std::size_t max_point_in_voxel_size_{32};
			
 
				+  std::size_t max_voxel_size_{40000};
			
 
				+  float range_min_x_{-89.6f};
			
 
				+  float range_min_y_{-89.6f};
			
 
				+  float range_min_z_{-3.0f};
			
 
				+  float range_max_x_{89.6f};
			
 
				+  float range_max_y_{89.6f};
			
 
				+  float range_max_z_{5.0f};
			
 
				+  float voxel_size_x_{0.32f};
			
 
				+  float voxel_size_y_{0.32f};
			
 
				+  float voxel_size_z_{8.0f};
			
 
				+
			
 
				+  // network params
			
 
				+  const std::size_t batch_size_{1};
			
 
				+  std::size_t downsample_factor_{2};
			
 
				+  std::size_t encoder_in_feature_size_{9};
			
 
				+  const std::size_t encoder_out_feature_size_{32};
			
 
				+  const std::size_t head_out_size_{6};
			
 
				+  const std::size_t head_out_offset_size_{2};
			
 
				+  const std::size_t head_out_z_size_{1};
			
 
				+  const std::size_t head_out_dim_size_{3};
			
 
				+  const std::size_t head_out_rot_size_{2};
			
 
				+  const std::size_t head_out_vel_size_{2};
			
 
				+
			
 
				+  // post-process params
			
 
				+  float score_threshold_{0.4f};
			
 
				+  float circle_nms_dist_threshold_{1.5f};
			
 
				+
			
 
				+  // calculated params
			
 
				+  std::size_t grid_size_x_ = (range_max_x_ - range_min_x_) / voxel_size_x_;
			
 
				+  std::size_t grid_size_y_ = (range_max_y_ - range_min_y_) / voxel_size_y_;
			
 
				+  std::size_t grid_size_z_ = (range_max_z_ - range_min_z_) / voxel_size_z_;
			
 
				+  float offset_x_ = range_min_x_ + voxel_size_x_ / 2;
			
 
				+  float offset_y_ = range_min_y_ + voxel_size_y_ / 2;
			
 
				+  float offset_z_ = range_min_z_ + voxel_size_z_ / 2;
			
 
				+  std::size_t down_grid_size_x_ = grid_size_x_ / downsample_factor_;
			
 
				+  std::size_t down_grid_size_y_ = grid_size_y_ / downsample_factor_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
@@ -0,0 +1,109 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_
			
 
				+#define LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/cuda_utils.hpp>
			
 
				+#include <lidar_centerpoint/network/network_trt.hpp>
			
 
				+#include <lidar_centerpoint/postprocess/postprocess_kernel.hpp>
			
 
				+#include <lidar_centerpoint/preprocess/voxel_generator.hpp>
			
 
				+
			
 
				+#include <sensor_msgs/msg/point_cloud2.hpp>
			
 
				+
			
 
				+#include <pcl/point_cloud.h>
			
 
				+#include <pcl/point_types.h>
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <utility>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class NetworkParam
			
 
				+{
			
 
				+public:
			
 
				+  NetworkParam(std::string onnx_path, std::string engine_path, std::string trt_precision)
			
 
				+  : onnx_path_(std::move(onnx_path)),
			
 
				+    engine_path_(std::move(engine_path)),
			
 
				+    trt_precision_(std::move(trt_precision))
			
 
				+  {
			
 
				+  }
			
 
				+
			
 
				+  std::string onnx_path() const { return onnx_path_; }
			
 
				+  std::string engine_path() const { return engine_path_; }
			
 
				+  std::string trt_precision() const { return trt_precision_; }
			
 
				+
			
 
				+private:
			
 
				+  std::string onnx_path_;
			
 
				+  std::string engine_path_;
			
 
				+  std::string trt_precision_;
			
 
				+};
			
 
				+
			
 
				+class CenterPointTRT
			
 
				+{
			
 
				+public:
			
 
				+  explicit CenterPointTRT(
			
 
				+    const NetworkParam & encoder_param, const NetworkParam & head_param,
			
 
				+    const DensificationParam & densification_param, const CenterPointConfig & config);
			
 
				+
			
 
				+  ~CenterPointTRT();
			
 
				+
			
 
				+  bool detect(
			
 
				+    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer,
			
 
				+    std::vector<Box3D> & det_boxes3d);
			
 
				+
			
 
				+protected:
			
 
				+  void initPtr();
			
 
				+
			
 
				+  virtual bool preprocess(
			
 
				+    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
			
 
				+
			
 
				+  void inference();
			
 
				+
			
 
				+  void postProcess(std::vector<Box3D> & det_boxes3d);
			
 
				+
			
 
				+  std::unique_ptr<VoxelGeneratorTemplate> vg_ptr_{nullptr};
			
 
				+  std::unique_ptr<VoxelEncoderTRT> encoder_trt_ptr_{nullptr};
			
 
				+  std::unique_ptr<HeadTRT> head_trt_ptr_{nullptr};
			
 
				+  std::unique_ptr<PostProcessCUDA> post_proc_ptr_{nullptr};
			
 
				+  cudaStream_t stream_{nullptr};
			
 
				+
			
 
				+  bool verbose_{false};
			
 
				+  std::size_t class_size_{0};
			
 
				+  CenterPointConfig config_;
			
 
				+  std::size_t num_voxels_{0};
			
 
				+  std::size_t encoder_in_feature_size_{0};
			
 
				+  std::size_t spatial_features_size_{0};
			
 
				+  std::vector<float> voxels_;
			
 
				+  std::vector<int> coordinates_;
			
 
				+  std::vector<float> num_points_per_voxel_;
			
 
				+  cuda::unique_ptr<float[]> voxels_d_{nullptr};
			
 
				+  cuda::unique_ptr<int[]> coordinates_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> encoder_in_features_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> pillar_features_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> spatial_features_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_heatmap_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_offset_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_z_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp
@@ -0,0 +1,120 @@
 
				+// Copyright 2020 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+/*
			
 
				+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a
			
 
				+ * copy of this software and associated documentation files (the "Software"),
			
 
				+ * to deal in the Software without restriction, including without limitation
			
 
				+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				+ * and/or sell copies of the Software, and to permit persons to whom the
			
 
				+ * Software is furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
			
 
				+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * This code is licensed under CC0 1.0 Universal (Public Domain).
			
 
				+ * You can use this without any limitation.
			
 
				+ * https://creativecommons.org/publicdomain/zero/1.0/deed.en
			
 
				+ */
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__CUDA_UTILS_HPP_
			
 
				+#define LIDAR_CENTERPOINT__CUDA_UTILS_HPP_
			
 
				+
			
 
				+#include <cuda_runtime_api.h>
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <sstream>
			
 
				+#include <stdexcept>
			
 
				+#include <type_traits>
			
 
				+
			
 
				+#define CHECK_CUDA_ERROR(e) (cuda::check_error(e, __FILE__, __LINE__))
			
 
				+
			
 
				+namespace cuda
			
 
				+{
			
 
				+inline void check_error(const ::cudaError_t e, const char * f, int n)
			
 
				+{
			
 
				+  if (e != ::cudaSuccess) {
			
 
				+    std::stringstream s;
			
 
				+    s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": "
			
 
				+      << ::cudaGetErrorString(e);
			
 
				+    throw std::runtime_error{s.str()};
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+struct deleter
			
 
				+{
			
 
				+  void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); }
			
 
				+};
			
 
				+
			
 
				+template <typename T>
			
 
				+using unique_ptr = std::unique_ptr<T, deleter>;
			
 
				+
			
 
				+template <typename T>
			
 
				+typename std::enable_if<std::is_array<T>::value, cuda::unique_ptr<T>>::type make_unique(
			
 
				+  const std::size_t n)
			
 
				+{
			
 
				+  using U = typename std::remove_extent<T>::type;
			
 
				+  U * p;
			
 
				+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(U) * n));
			
 
				+  return cuda::unique_ptr<T>{p};
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+cuda::unique_ptr<T> make_unique()
			
 
				+{
			
 
				+  T * p;
			
 
				+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(T)));
			
 
				+  return cuda::unique_ptr<T>{p};
			
 
				+}
			
 
				+
			
 
				+constexpr size_t CUDA_ALIGN = 256;
			
 
				+
			
 
				+template <typename T>
			
 
				+inline size_t get_size_aligned(size_t num_elem)
			
 
				+{
			
 
				+  size_t size = num_elem * sizeof(T);
			
 
				+  size_t extra_align = 0;
			
 
				+  if (size % CUDA_ALIGN != 0) {
			
 
				+    extra_align = CUDA_ALIGN - size % CUDA_ALIGN;
			
 
				+  }
			
 
				+  return size + extra_align;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size)
			
 
				+{
			
 
				+  size_t size = get_size_aligned<T>(num_elem);
			
 
				+  if (size > workspace_size) {
			
 
				+    throw std::runtime_error("Workspace is too small!");
			
 
				+  }
			
 
				+  workspace_size -= size;
			
 
				+  T * ptr = reinterpret_cast<T *>(workspace);
			
 
				+  workspace = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(workspace) + size);
			
 
				+  return ptr;
			
 
				+}
			
 
				+
			
 
				+}  // namespace cuda
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__CUDA_UTILS_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp
@@ -0,0 +1,55 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_
			
 
				+#define LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+#include <lidar_centerpoint/network/tensorrt_wrapper.hpp>
			
 
				+
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class VoxelEncoderTRT : public TensorRTWrapper
			
 
				+{
			
 
				+public:
			
 
				+  using TensorRTWrapper::TensorRTWrapper;
			
 
				+
			
 
				+protected:
			
 
				+  bool setProfile(
			
 
				+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
			
 
				+    nvinfer1::IBuilderConfig & config) override;
			
 
				+};
			
 
				+
			
 
				+class HeadTRT : public TensorRTWrapper
			
 
				+{
			
 
				+public:
			
 
				+  using TensorRTWrapper::TensorRTWrapper;
			
 
				+
			
 
				+  HeadTRT(
			
 
				+    const std::vector<std::size_t> & out_channel_sizes, const CenterPointConfig & config,
			
 
				+    const bool verbose);
			
 
				+
			
 
				+protected:
			
 
				+  bool setProfile(
			
 
				+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
			
 
				+    nvinfer1::IBuilderConfig & config) override;
			
 
				+
			
 
				+  std::vector<std::size_t> out_channel_sizes_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
@@ -0,0 +1,31 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_
			
 
				+#define LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_
			
 
				+
			
 
				+#include <cuda.h>
			
 
				+#include <cuda_runtime_api.h>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+cudaError_t scatterFeatures_launch(
			
 
				+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
			
 
				+  const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
			
 
				+  const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
			
 
				+  cudaStream_t stream);
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp
@@ -0,0 +1,94 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_
			
 
				+#define LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+
			
 
				+#include <NvInfer.h>
			
 
				+
			
 
				+#include <iostream>
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+struct Deleter
			
 
				+{
			
 
				+  template <typename T>
			
 
				+  void operator()(T * obj) const
			
 
				+  {
			
 
				+    if (obj) {
			
 
				+      delete obj;
			
 
				+    }
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+template <typename T>
			
 
				+using unique_ptr = std::unique_ptr<T, Deleter>;
			
 
				+
			
 
				+class Logger : public nvinfer1::ILogger
			
 
				+{
			
 
				+public:
			
 
				+  explicit Logger(bool verbose) : verbose_(verbose) {}
			
 
				+
			
 
				+  void log(Severity severity, const char * msg) noexcept override
			
 
				+  {
			
 
				+    if (verbose_ || ((severity != Severity::kINFO) && (severity != Severity::kVERBOSE))) {
			
 
				+      std::cout << msg << std::endl;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+private:
			
 
				+  bool verbose_{false};
			
 
				+};
			
 
				+
			
 
				+class TensorRTWrapper
			
 
				+{
			
 
				+public:
			
 
				+  explicit TensorRTWrapper(const CenterPointConfig & config, const bool verbose);
			
 
				+
			
 
				+  bool init(
			
 
				+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision);
			
 
				+
			
 
				+  unique_ptr<nvinfer1::IExecutionContext> context_ = nullptr;
			
 
				+
			
 
				+protected:
			
 
				+  virtual bool setProfile(
			
 
				+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
			
 
				+    nvinfer1::IBuilderConfig & config) = 0;
			
 
				+
			
 
				+  CenterPointConfig config_;
			
 
				+  Logger logger_;
			
 
				+
			
 
				+private:
			
 
				+  bool parseONNX(
			
 
				+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
			
 
				+    size_t workspace_size = (1ULL << 30));
			
 
				+
			
 
				+  bool saveEngine(const std::string & engine_path);
			
 
				+
			
 
				+  bool loadEngine(const std::string & engine_path);
			
 
				+
			
 
				+  bool createContext();
			
 
				+
			
 
				+  unique_ptr<nvinfer1::IRuntime> runtime_ = nullptr;
			
 
				+  unique_ptr<nvinfer1::IHostMemory> plan_ = nullptr;
			
 
				+  unique_ptr<nvinfer1::ICudaEngine> engine_ = nullptr;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/node.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/node.hpp
@@ -0,0 +1,58 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__NODE_HPP_
			
 
				+#define LIDAR_CENTERPOINT__NODE_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_trt.hpp>
			
 
				+#include <rclcpp/rclcpp.hpp>
			
 
				+
			
 
				+#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/shape.hpp>
			
 
				+#include <sensor_msgs/msg/point_cloud2.hpp>
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+class LidarCenterPointNode : public rclcpp::Node
			
 
				+{
			
 
				+public:
			
 
				+  explicit LidarCenterPointNode(const rclcpp::NodeOptions & node_options);
			
 
				+
			
 
				+private:
			
 
				+  void pointCloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr input_pointcloud_msg);
			
 
				+
			
 
				+  tf2_ros::Buffer tf_buffer_;
			
 
				+  tf2_ros::TransformListener tf_listener_{tf_buffer_};
			
 
				+
			
 
				+  rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr pointcloud_sub_;
			
 
				+  rclcpp::Publisher<autoware_auto_perception_msgs::msg::DetectedObjects>::SharedPtr objects_pub_;
			
 
				+
			
 
				+  float score_threshold_{0.0};
			
 
				+  std::vector<std::string> class_names_;
			
 
				+  bool rename_car_to_truck_and_bus_{false};
			
 
				+  bool has_twist_{false};
			
 
				+
			
 
				+  std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__NODE_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp
@@ -0,0 +1,32 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
			
 
				+#define LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+#include <thrust/device_vector.h>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+// Non-maximum suppression (NMS) uses the distance on the xy plane instead of
			
 
				+// intersection over union (IoU) to suppress overlapped objects.
			
 
				+std::size_t circleNMS(
			
 
				+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
			
 
				+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream);
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp
@@ -0,0 +1,46 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
			
 
				+#define LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+#include <cuda.h>
			
 
				+#include <cuda_runtime_api.h>
			
 
				+#include <thrust/device_vector.h>
			
 
				+
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class PostProcessCUDA
			
 
				+{
			
 
				+public:
			
 
				+  explicit PostProcessCUDA(const CenterPointConfig & config);
			
 
				+
			
 
				+  cudaError_t generateDetectedBoxes3D_launch(
			
 
				+    const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
			
 
				+    const float * out_rot, const float * out_vel, std::vector<Box3D> & det_boxes3d,
			
 
				+    cudaStream_t stream);
			
 
				+
			
 
				+private:
			
 
				+  CenterPointConfig config_;
			
 
				+  thrust::device_vector<Box3D> boxes3d_d_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
@@ -0,0 +1,86 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
			
 
				+#define LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
			
 
				+
			
 
				+#include <tf2_ros/buffer.h>
			
 
				+#include <tf2_ros/transform_listener.h>
			
 
				+#ifdef ROS_DISTRO_GALACTIC
			
 
				+#include <tf2_sensor_msgs/tf2_sensor_msgs.h>
			
 
				+#else
			
 
				+#include <tf2_sensor_msgs/tf2_sensor_msgs.hpp>
			
 
				+#endif
			
 
				+
			
 
				+#include <list>
			
 
				+#include <string>
			
 
				+#include <utility>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class DensificationParam
			
 
				+{
			
 
				+public:
			
 
				+  DensificationParam(const std::string & world_frame_id, const unsigned int num_past_frames)
			
 
				+  : world_frame_id_(std::move(world_frame_id)),
			
 
				+    pointcloud_cache_size_(num_past_frames + /*current frame*/ 1)
			
 
				+  {
			
 
				+  }
			
 
				+
			
 
				+  std::string world_frame_id() const { return world_frame_id_; }
			
 
				+  unsigned int pointcloud_cache_size() const { return pointcloud_cache_size_; }
			
 
				+
			
 
				+private:
			
 
				+  std::string world_frame_id_;
			
 
				+  unsigned int pointcloud_cache_size_{1};
			
 
				+};
			
 
				+
			
 
				+struct PointCloudWithTransform
			
 
				+{
			
 
				+  sensor_msgs::msg::PointCloud2 pointcloud_msg;
			
 
				+  Eigen::Affine3f affine_past2world;
			
 
				+};
			
 
				+
			
 
				+class PointCloudDensification
			
 
				+{
			
 
				+public:
			
 
				+  explicit PointCloudDensification(const DensificationParam & param);
			
 
				+
			
 
				+  bool enqueuePointCloud(
			
 
				+    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
			
 
				+
			
 
				+  double getCurrentTimestamp() const { return current_timestamp_; }
			
 
				+  Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; }
			
 
				+  std::list<PointCloudWithTransform>::iterator getPointCloudCacheIter()
			
 
				+  {
			
 
				+    return pointcloud_cache_.begin();
			
 
				+  }
			
 
				+  bool isCacheEnd(std::list<PointCloudWithTransform>::iterator iter)
			
 
				+  {
			
 
				+    return iter == pointcloud_cache_.end();
			
 
				+  }
			
 
				+
			
 
				+private:
			
 
				+  void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine);
			
 
				+  void dequeue();
			
 
				+
			
 
				+  DensificationParam param_;
			
 
				+  double current_timestamp_{0.0};
			
 
				+  Eigen::Affine3f affine_world2current_;
			
 
				+  std::list<PointCloudWithTransform> pointcloud_cache_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
@@ -0,0 +1,31 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_
			
 
				+#define LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_
			
 
				+
			
 
				+#include <cuda.h>
			
 
				+#include <cuda_runtime_api.h>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+cudaError_t generateFeatures_launch(
			
 
				+  const float * voxel_features, const float * voxel_num_points, const int * coords,
			
 
				+  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
			
 
				+  const float voxel_size_y, const float voxel_size_z, const float range_min_x,
			
 
				+  const float range_min_y, const float range_min_z, float * features, cudaStream_t stream);
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
@@ -0,0 +1,62 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_
			
 
				+#define LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+#include <lidar_centerpoint/preprocess/pointcloud_densification.hpp>
			
 
				+
			
 
				+#include <sensor_msgs/msg/point_cloud2.hpp>
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class VoxelGeneratorTemplate
			
 
				+{
			
 
				+public:
			
 
				+  explicit VoxelGeneratorTemplate(
			
 
				+    const DensificationParam & param, const CenterPointConfig & config);
			
 
				+
			
 
				+  virtual std::size_t pointsToVoxels(
			
 
				+    std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				+    std::vector<float> & num_points_per_voxel) = 0;
			
 
				+
			
 
				+  bool enqueuePointCloud(
			
 
				+    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
			
 
				+
			
 
				+protected:
			
 
				+  std::unique_ptr<PointCloudDensification> pd_ptr_{nullptr};
			
 
				+
			
 
				+  CenterPointConfig config_;
			
 
				+  std::array<float, 6> range_;
			
 
				+  std::array<int, 3> grid_size_;
			
 
				+  std::array<float, 3> recip_voxel_size_;
			
 
				+};
			
 
				+
			
 
				+class VoxelGenerator : public VoxelGeneratorTemplate
			
 
				+{
			
 
				+public:
			
 
				+  using VoxelGeneratorTemplate::VoxelGeneratorTemplate;
			
 
				+
			
 
				+  std::size_t pointsToVoxels(
			
 
				+    std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				+    std::vector<float> & num_points_per_voxel) override;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp
@@ -0,0 +1,44 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__ROS_UTILS_HPP_
			
 
				+#define LIDAR_CENTERPOINT__ROS_UTILS_HPP_
			
 
				+
			
 
				+// ros packages cannot be included from cuda.
			
 
				+
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/shape.hpp>
			
 
				+
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+void box3DToDetectedObject(
			
 
				+  const Box3D & box3d, const std::vector<std::string> & class_names,
			
 
				+  const bool rename_car_to_truck_and_bus, const bool has_twist,
			
 
				+  autoware_auto_perception_msgs::msg::DetectedObject & obj);
			
 
				+
			
 
				+uint8_t getSemanticType(const std::string & class_name);
			
 
				+
			
 
				+bool isCarLikeVehicleLabel(const uint8_t label);
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__ROS_UTILS_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/utils.hpp
+++ b/src1/perception/perception_lidar_centerpoint/include/lidar_centerpoint/utils.hpp
@@ -0,0 +1,42 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__UTILS_HPP_
			
 
				+#define LIDAR_CENTERPOINT__UTILS_HPP_
			
 
				+
			
 
				+#include <cstddef>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+struct Box3D
			
 
				+{
			
 
				+  // initializer not allowed for __shared__ variable
			
 
				+  int label;
			
 
				+  float score;
			
 
				+  float x;
			
 
				+  float y;
			
 
				+  float z;
			
 
				+  float length;
			
 
				+  float width;
			
 
				+  float height;
			
 
				+  float yaw;
			
 
				+  float vel_x;
			
 
				+  float vel_y;
			
 
				+};
			
 
				+
			
 
				+std::size_t divup(const std::size_t a, const std::size_t b);
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__UTILS_HPP_
			
--- a/src1/perception/perception_lidar_centerpoint/main.cpp
+++ b/src1/perception/perception_lidar_centerpoint/main.cpp
@@ -0,0 +1,8 @@
 
				+#include <QCoreApplication>
			
 
				+
			
 
				+int main(int argc, char *argv[])
			
 
				+{
			
 
				+    QCoreApplication a(argc, argv);
			
 
				+
			
 
				+    return a.exec();
			
 
				+}
			
--- a/src1/perception/perception_lidar_centerpoint/network/network_trt.cpp
+++ b/src1/perception/perception_lidar_centerpoint/network/network_trt.cpp
@@ -0,0 +1,75 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/network/network_trt.hpp"
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+bool VoxelEncoderTRT::setProfile(
			
 
				+  nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
			
 
				+  nvinfer1::IBuilderConfig & config)
			
 
				+{
			
 
				+  auto profile = builder.createOptimizationProfile();
			
 
				+  auto in_name = network.getInput(0)->getName();
			
 
				+  auto in_dims = nvinfer1::Dims3(
			
 
				+    config_.max_voxel_size_, config_.max_point_in_voxel_size_, config_.encoder_in_feature_size_);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMIN, in_dims);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kOPT, in_dims);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMAX, in_dims);
			
 
				+
			
 
				+  auto out_name = network.getOutput(0)->getName();
			
 
				+  auto out_dims = nvinfer1::Dims2(config_.max_voxel_size_, config_.encoder_out_feature_size_);
			
 
				+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMIN, out_dims);
			
 
				+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kOPT, out_dims);
			
 
				+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMAX, out_dims);
			
 
				+  config.addOptimizationProfile(profile);
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+HeadTRT::HeadTRT(
			
 
				+  const std::vector<std::size_t> & out_channel_sizes, const CenterPointConfig & config,
			
 
				+  const bool verbose)
			
 
				+: TensorRTWrapper(config, verbose), out_channel_sizes_(out_channel_sizes)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+bool HeadTRT::setProfile(
			
 
				+  nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
			
 
				+  nvinfer1::IBuilderConfig & config)
			
 
				+{
			
 
				+  auto profile = builder.createOptimizationProfile();
			
 
				+  auto in_name = network.getInput(0)->getName();
			
 
				+  auto in_dims = nvinfer1::Dims4(
			
 
				+    config_.batch_size_, config_.encoder_out_feature_size_, config_.grid_size_y_,
			
 
				+    config_.grid_size_x_);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMIN, in_dims);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kOPT, in_dims);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMAX, in_dims);
			
 
				+
			
 
				+  for (std::size_t ci = 0; ci < out_channel_sizes_.size(); ci++) {
			
 
				+    auto out_name = network.getOutput(ci)->getName();
			
 
				+    auto out_dims = nvinfer1::Dims4(
			
 
				+      config_.batch_size_, out_channel_sizes_[ci], config_.down_grid_size_y_,
			
 
				+      config_.down_grid_size_x_);
			
 
				+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMIN, out_dims);
			
 
				+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kOPT, out_dims);
			
 
				+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMAX, out_dims);
			
 
				+  }
			
 
				+  config.addOptimizationProfile(profile);
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src1/perception/perception_lidar_centerpoint/network/scatter_kernel.cu
+++ b/src1/perception/perception_lidar_centerpoint/network/scatter_kernel.cu
@@ -0,0 +1,67 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/network/scatter_kernel.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+const std::size_t THREADS_PER_BLOCK = 32;
			
 
				+}  // namespace
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+__global__ void scatterFeatures_kernel(
			
 
				+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
			
 
				+  const std::size_t pillar_feature_size, const std::size_t grid_size_x,
			
 
				+  const std::size_t grid_size_y, float * scattered_features)
			
 
				+{
			
 
				+  // pillar_features: shape of (max_num_pillars, pillar_feature_size)
			
 
				+  // coords: shape of (max_num_pillars, 3)
			
 
				+  // scattered_features: shape of (num_pillars, grid_size_y, grid_size_x)
			
 
				+  const auto pillar_i = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
			
 
				+  const auto feature_i = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
			
 
				+
			
 
				+  if (pillar_i >= num_pillars || feature_i >= pillar_feature_size) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  const int3 coord = ((int3 *)coords)[pillar_i];  // zyx
			
 
				+  if (coord.x < 0) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  const auto feature = pillar_features[pillar_feature_size * pillar_i + feature_i];
			
 
				+  scattered_features[grid_size_y * grid_size_x * feature_i + grid_size_x * coord.y + coord.z] =
			
 
				+    feature;
			
 
				+}
			
 
				+
			
 
				+cudaError_t scatterFeatures_launch(
			
 
				+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
			
 
				+  const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
			
 
				+  const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
			
 
				+  cudaStream_t stream)
			
 
				+{
			
 
				+  dim3 blocks(
			
 
				+    divup(max_voxel_size, THREADS_PER_BLOCK), divup(encoder_out_feature_size, THREADS_PER_BLOCK));
			
 
				+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
			
 
				+  scatterFeatures_kernel<<<blocks, threads, 0, stream>>>(
			
 
				+    pillar_features, coords, num_pillars, encoder_out_feature_size, grid_size_x, grid_size_y,
			
 
				+    scattered_features);
			
 
				+
			
 
				+  return cudaGetLastError();
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src1/perception/perception_lidar_centerpoint/network/tensorrt_wrapper.cpp
+++ b/src1/perception/perception_lidar_centerpoint/network/tensorrt_wrapper.cpp
@@ -0,0 +1,154 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/network/tensorrt_wrapper.hpp"
			
 
				+
			
 
				+#include <NvOnnxParser.h>
			
 
				+
			
 
				+#include <fstream>
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+TensorRTWrapper::TensorRTWrapper(const CenterPointConfig & config, const bool verbose)
			
 
				+: config_(config), logger_(Logger(verbose))
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+bool TensorRTWrapper::init(
			
 
				+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision)
			
 
				+{
			
 
				+  runtime_ = unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
			
 
				+  if (!runtime_) {
			
 
				+    std::cout << "Fail to create runtime" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  bool success;
			
 
				+  std::ifstream engine_file(engine_path);
			
 
				+  if (engine_file.is_open()) {
			
 
				+    success = loadEngine(engine_path);
			
 
				+  } else {
			
 
				+    success = parseONNX(onnx_path, engine_path, precision);
			
 
				+  }
			
 
				+  success &= createContext();
			
 
				+
			
 
				+  return success;
			
 
				+}
			
 
				+
			
 
				+bool TensorRTWrapper::createContext()
			
 
				+{
			
 
				+  if (!engine_) {
			
 
				+    std::cout << "Fail to create context: Engine isn't created" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  context_ = unique_ptr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
			
 
				+  if (!context_) {
			
 
				+    std::cout << "Fail to create context" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool TensorRTWrapper::parseONNX(
			
 
				+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
			
 
				+  const size_t workspace_size)
			
 
				+{
			
 
				+  auto builder = unique_ptr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
			
 
				+  if (!builder) {
			
 
				+    std::cout << "Fail to create builder" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  auto config = unique_ptr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
			
 
				+  if (!config) {
			
 
				+    std::cout << "Fail to create config" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+  config->setMaxWorkspaceSize(workspace_size);
			
 
				+  if (precision == "fp16") {
			
 
				+    if (builder->platformHasFastFp16()) {
			
 
				+      std::cout << "use TensorRT FP16 Inference" << std::endl;
			
 
				+      config->setFlag(nvinfer1::BuilderFlag::kFP16);
			
 
				+    } else {
			
 
				+      std::cout << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  const auto flag =
			
 
				+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
			
 
				+  auto network = unique_ptr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
			
 
				+  if (!network) {
			
 
				+    std::cout << "Fail to create network" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  auto parser = unique_ptr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger_));
			
 
				+  parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR));
			
 
				+
			
 
				+  if (!setProfile(*builder, *network, *config)) {
			
 
				+    std::cout << "Fail to set profile" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  std::cout << "Applying optimizations and building TRT CUDA engine (" << onnx_path << ") ..."
			
 
				+            << std::endl;
			
 
				+  plan_ = unique_ptr<nvinfer1::IHostMemory>(builder->buildSerializedNetwork(*network, *config));
			
 
				+  if (!plan_) {
			
 
				+    std::cout << "Fail to create serialized network" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+  engine_ = unique_ptr<nvinfer1::ICudaEngine>(
			
 
				+    runtime_->deserializeCudaEngine(plan_->data(), plan_->size()));
			
 
				+  if (!engine_) {
			
 
				+    std::cout << "Fail to create engine" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  return saveEngine(engine_path);
			
 
				+}
			
 
				+
			
 
				+bool TensorRTWrapper::saveEngine(const std::string & engine_path)
			
 
				+{
			
 
				+  std::cout << "Writing to " << engine_path << std::endl;
			
 
				+  std::ofstream file(engine_path, std::ios::out | std::ios::binary);
			
 
				+  file.write(reinterpret_cast<const char *>(plan_->data()), plan_->size());
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool TensorRTWrapper::loadEngine(const std::string & engine_path)
			
 
				+{
			
 
				+  std::ifstream file(engine_path, std::ios::in | std::ios::binary);
			
 
				+  file.seekg(0, std::ifstream::end);
			
 
				+  const size_t size = file.tellg();
			
 
				+  file.seekg(0, std::ifstream::beg);
			
 
				+
			
 
				+  std::unique_ptr<char[]> buffer{new char[size]};
			
 
				+  file.read(buffer.get(), size);
			
 
				+  file.close();
			
 
				+
			
 
				+  if (!runtime_) {
			
 
				+    std::cout << "Fail to load engine: Runtime isn't created" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  std::cout << "Loading from " << engine_path << std::endl;
			
 
				+  engine_ = unique_ptr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(buffer.get(), size));
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src1/perception/perception_lidar_centerpoint/perception_lidar_centerpoint.pro
+++ b/src1/perception/perception_lidar_centerpoint/perception_lidar_centerpoint.pro
@@ -0,0 +1,158 @@
 
				+QT -= gui
			
 
				+
			
 
				+CONFIG += c++17 console
			
 
				+CONFIG -= app_bundle
			
 
				+
			
 
				+# The following define makes your compiler emit warnings if you use
			
 
				+# any feature of Qt which as been marked deprecated (the exact warnings
			
 
				+# depend on your compiler). Please consult the documentation of the
			
 
				+# deprecated API in order to know how to port your code away from it.
			
 
				+DEFINES += QT_DEPRECATED_WARNINGS
			
 
				+
			
 
				+# You can also make your code fail to compile if you use deprecated APIs.
			
 
				+# In order to do so, uncomment the following line.
			
 
				+# You can also select to disable deprecated APIs only up to a certain version of Qt.
			
 
				+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
			
 
				+
			
 
				+SOURCES += main.cpp \
			
 
				+    centerpoint_trt.cpp \
			
 
				+    network/network_trt.cpp \
			
 
				+    network/tensorrt_wrapper.cpp \
			
 
				+    preprocess/pointcloud_densification.cpp \
			
 
				+    preprocess/voxel_generator.cpp
			
 
				+
			
 
				+INCLUDEPATH += $$PWD/include
			
 
				+INCLUDEPATH += /opt/ros/melodic/include
			
 
				+
			
 
				+DISTFILES += \
			
 
				+    network/scatter_kernel.cu \
			
 
				+    postprocess/circle_nms_kernel.cu \
			
 
				+    postprocess/postprocess_kernel.cu \
			
 
				+    preprocess/preprocess_kernel.cu
			
 
				+
			
 
				+CUDA_SOURCES += \
			
 
				+    network/scatter_kernel.cu \
			
 
				+    postprocess/circle_nms_kernel.cu \
			
 
				+    postprocess/postprocess_kernel.cu \
			
 
				+    preprocess/preprocess_kernel.cu
			
 
				+
			
 
				+
			
 
				+CUDA_SDK = "/usr/local/cuda/"   # cudaSDK路径
			
 
				+
			
 
				+CUDA_DIR = "/usr/local/cuda/"            # CUDA tookit路径
			
 
				+
			
 
				+SYSTEM_NAME = linux         # 自己系统环境 'Win32', 'x64', or 'Win64'
			
 
				+
			
 
				+SYSTEM_TYPE = 64           #操作系统位数 '32' or '64',
			
 
				+
			
 
				+CUDA_ARCH = sm_72         # cuda架构, for example 'compute_10', 'compute_11', 'sm_10'
			
 
				+
			
 
				+NVCC_OPTIONS = --use_fast_math --compiler-options "-fPIC"
			
 
				+
			
 
				+
			
 
				+# include paths
			
 
				+
			
 
				+INCLUDEPATH += $$CUDA_DIR/include
			
 
				+#INCLUDEPATH += /usr/local/cuda-10.0/targets/aarch64-linux/include/crt
			
 
				+
			
 
				+# library directories
			
 
				+
			
 
				+QMAKE_LIBDIR += $$CUDA_DIR/lib/
			
 
				+
			
 
				+CUDA_OBJECTS_DIR = ./
			
 
				+
			
 
				+# The following library conflicts with something in Cuda
			
 
				+
			
 
				+#QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib
			
 
				+
			
 
				+#QMAKE_LFLAGS_DEBUG   = /NODEFAULTLIB:msvcrtd.lib
			
 
				+
			
 
				+# Add the necessary libraries
			
 
				+
			
 
				+CUDA_LIBS =  cudart cufft
			
 
				+
			
 
				+# The following makes sure all path names (which often include spaces) are put between quotation marks
			
 
				+
			
 
				+CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
			
 
				+
			
 
				+NVCC_LIBS = $$join(CUDA_LIBS,' -l','-l', '')
			
 
				+
			
 
				+#LIBS += $$join(CUDA_LIBS,'.so ', '', '.so')
			
 
				+
			
 
				+# Configuration of the Cuda compiler
			
 
				+
			
 
				+CONFIG(debug, debug|release) {
			
 
				+
			
 
				+    # Debug mode
			
 
				+
			
 
				+    cuda_d.input = CUDA_SOURCES
			
 
				+
			
 
				+    cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
			
 
				+
			
 
				+    cuda_d.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
			
 
				+
			
 
				+    cuda_d.dependency_type = TYPE_C
			
 
				+
			
 
				+    QMAKE_EXTRA_COMPILERS += cuda_d
			
 
				+
			
 
				+}
			
 
				+
			
 
				+else {
			
 
				+
			
 
				+    # Release mode
			
 
				+
			
 
				+    cuda.input = CUDA_SOURCES
			
 
				+
			
 
				+    cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
			
 
				+
			
 
				+    cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -O3 -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
			
 
				+
			
 
				+    cuda.dependency_type = TYPE_C
			
 
				+
			
 
				+    QMAKE_EXTRA_COMPILERS += cuda
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				+LIBS += -L/usr/local/cuda-10.2/targets/aarch64-linux/lib
			
 
				+
			
 
				+LIBS += -lcudart -lcufft -lyaml-cpp
			
 
				+
			
 
				+#LIBS += -L/home/adc/soft/cudnn-10.2-linux-x64-v7.6.5.32/cuda/lib64 -lcudnn
			
 
				+
			
 
				+LIBS +=  -lmyelin -lnvinfer -lnvonnxparser -lnvcaffe_parser
			
 
				+
			
 
				+#LIBS += -L/home/nvidia/git/libtorch_gpu-1.6.0-linux-aarch64/lib -ltorch_cuda  -ltorch -lc10 -ltorch_cpu
			
 
				+
			
 
				+unix:INCLUDEPATH += /usr/include/eigen3
			
 
				+unix:INCLUDEPATH += /usr/include/pcl-1.7
			
 
				+unix:INCLUDEPATH += /usr/include/pcl-1.8
			
 
				+
			
 
				+!include(../../../include/common.pri ) {
			
 
				+    error( "Couldn't find the common.pri file!" )
			
 
				+}
			
 
				+
			
 
				+!include(../../../include/ivprotobuf.pri ) {
			
 
				+    error( "Couldn't find the ivprotobuf.pri file!" )
			
 
				+}
			
 
				+
			
 
				+LIBS += -lboost_system
			
 
				+
			
 
				+unix:LIBS +=  -lpcl_common\
			
 
				+        -lpcl_features\
			
 
				+        -lpcl_filters\
			
 
				+        -lpcl_io\
			
 
				+        -lpcl_io_ply\
			
 
				+        -lpcl_kdtree\
			
 
				+        -lpcl_keypoints\
			
 
				+        -lpcl_octree\
			
 
				+        -lpcl_outofcore\
			
 
				+        -lpcl_people\
			
 
				+        -lpcl_recognition\
			
 
				+        -lpcl_registration\
			
 
				+        -lpcl_sample_consensus\
			
 
				+        -lpcl_search\
			
 
				+        -lpcl_segmentation\
			
 
				+        -lpcl_surface\
			
 
				+        -lpcl_tracking\
			
 
				+        -lpcl_visualization
			
--- a/src1/perception/perception_lidar_centerpoint/postprocess/circle_nms_kernel.cu
+++ b/src1/perception/perception_lidar_centerpoint/postprocess/circle_nms_kernel.cu
@@ -0,0 +1,144 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+// Modified from
			
 
				+// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu
			
 
				+
			
 
				+/*
			
 
				+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
			
 
				+Written by Shaoshuai Shi
			
 
				+All Rights Reserved 2019-2020.
			
 
				+*/
			
 
				+
			
 
				+#include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/cuda_utils.hpp>
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+#include <thrust/host_vector.h>
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+const std::size_t THREADS_PER_BLOCK_NMS = 16;
			
 
				+}  // namespace
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+__device__ inline float dist2dPow(const Box3D * a, const Box3D * b)
			
 
				+{
			
 
				+  return powf(a->x - b->x, 2) + powf(a->y - b->y, 2);
			
 
				+}
			
 
				+
			
 
				+__global__ void circleNMS_Kernel(
			
 
				+  const Box3D * boxes, const std::size_t num_boxes3d, const std::size_t col_blocks,
			
 
				+  const float dist2d_pow_threshold, std::uint64_t * mask)
			
 
				+{
			
 
				+  // params: boxes (N,)
			
 
				+  // params: mask (N, divup(N/THREADS_PER_BLOCK_NMS))
			
 
				+
			
 
				+  const auto row_start = blockIdx.y;
			
 
				+  const auto col_start = blockIdx.x;
			
 
				+
			
 
				+  if (row_start > col_start) return;
			
 
				+
			
 
				+  const std::size_t row_size =
			
 
				+    fminf(num_boxes3d - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
			
 
				+  const std::size_t col_size =
			
 
				+    fminf(num_boxes3d - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
			
 
				+
			
 
				+  __shared__ Box3D block_boxes[THREADS_PER_BLOCK_NMS];
			
 
				+
			
 
				+  if (threadIdx.x < col_size) {
			
 
				+    block_boxes[threadIdx.x] = boxes[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x];
			
 
				+  }
			
 
				+  __syncthreads();
			
 
				+
			
 
				+  if (threadIdx.x < row_size) {
			
 
				+    const std::size_t cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
			
 
				+    const Box3D * cur_box = boxes + cur_box_idx;
			
 
				+
			
 
				+    std::uint64_t t = 0;
			
 
				+    std::size_t start = 0;
			
 
				+    if (row_start == col_start) {
			
 
				+      start = threadIdx.x + 1;
			
 
				+    }
			
 
				+    for (std::size_t i = start; i < col_size; i++) {
			
 
				+      if (dist2dPow(cur_box, block_boxes + i) < dist2d_pow_threshold) {
			
 
				+        t |= 1ULL << i;
			
 
				+      }
			
 
				+    }
			
 
				+    mask[cur_box_idx * col_blocks + col_start] = t;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+cudaError_t circleNMS_launch(
			
 
				+  const thrust::device_vector<Box3D> & boxes3d, const std::size_t num_boxes3d,
			
 
				+  std::size_t col_blocks, const float distance_threshold,
			
 
				+  thrust::device_vector<std::uint64_t> & mask, cudaStream_t stream)
			
 
				+{
			
 
				+  const float dist2d_pow_thres = powf(distance_threshold, 2);
			
 
				+
			
 
				+  dim3 blocks(col_blocks, col_blocks);
			
 
				+  dim3 threads(THREADS_PER_BLOCK_NMS);
			
 
				+  circleNMS_Kernel<<<blocks, threads, 0, stream>>>(
			
 
				+    thrust::raw_pointer_cast(boxes3d.data()), num_boxes3d, col_blocks, dist2d_pow_thres,
			
 
				+    thrust::raw_pointer_cast(mask.data()));
			
 
				+
			
 
				+  return cudaGetLastError();
			
 
				+}
			
 
				+
			
 
				+std::size_t circleNMS(
			
 
				+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
			
 
				+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream)
			
 
				+{
			
 
				+  const auto num_boxes3d = boxes3d.size();
			
 
				+  const auto col_blocks = divup(num_boxes3d, THREADS_PER_BLOCK_NMS);
			
 
				+  thrust::device_vector<std::uint64_t> mask_d(num_boxes3d * col_blocks);
			
 
				+
			
 
				+  CHECK_CUDA_ERROR(
			
 
				+    circleNMS_launch(boxes3d, num_boxes3d, col_blocks, distance_threshold, mask_d, stream));
			
 
				+
			
 
				+  // memcpy device to host
			
 
				+  thrust::host_vector<std::uint64_t> mask_h(mask_d.size());
			
 
				+  thrust::copy(mask_d.begin(), mask_d.end(), mask_h.begin());
			
 
				+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream));
			
 
				+
			
 
				+  // generate keep_mask
			
 
				+  std::vector<std::uint64_t> remv_h(col_blocks);
			
 
				+  thrust::host_vector<bool> keep_mask_h(keep_mask.size());
			
 
				+  std::size_t num_to_keep = 0;
			
 
				+  for (std::size_t i = 0; i < num_boxes3d; i++) {
			
 
				+    auto nblock = i / THREADS_PER_BLOCK_NMS;
			
 
				+    auto inblock = i % THREADS_PER_BLOCK_NMS;
			
 
				+
			
 
				+    if (!(remv_h[nblock] & (1ULL << inblock))) {
			
 
				+      keep_mask_h[i] = true;
			
 
				+      num_to_keep++;
			
 
				+      std::uint64_t * p = &mask_h[0] + i * col_blocks;
			
 
				+      for (std::size_t j = nblock; j < col_blocks; j++) {
			
 
				+        remv_h[j] |= p[j];
			
 
				+      }
			
 
				+    } else {
			
 
				+      keep_mask_h[i] = false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // memcpy host to device
			
 
				+  keep_mask = keep_mask_h;
			
 
				+
			
 
				+  return num_to_keep;
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src1/perception/perception_lidar_centerpoint/postprocess/postprocess_kernel.cu
+++ b/src1/perception/perception_lidar_centerpoint/postprocess/postprocess_kernel.cu
@@ -0,0 +1,160 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/postprocess/postprocess_kernel.hpp>
			
 
				+
			
 
				+#include <thrust/count.h>
			
 
				+#include <thrust/sort.h>
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+const std::size_t THREADS_PER_BLOCK = 32;
			
 
				+}  // namespace
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+struct is_score_greater
			
 
				+{
			
 
				+  is_score_greater(float t) : t_(t) {}
			
 
				+
			
 
				+  __device__ bool operator()(const Box3D & b) { return b.score > t_; }
			
 
				+
			
 
				+private:
			
 
				+  float t_{0.0};
			
 
				+};
			
 
				+
			
 
				+struct is_kept
			
 
				+{
			
 
				+  __device__ bool operator()(const bool keep) { return keep; }
			
 
				+};
			
 
				+
			
 
				+struct score_greater
			
 
				+{
			
 
				+  __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; }
			
 
				+};
			
 
				+
			
 
				+__device__ inline float sigmoid(float x) { return 1.0f / expf(-x); }
			
 
				+
			
 
				+__global__ void generateBoxes3D_kernel(
			
 
				+  const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
			
 
				+  const float * out_rot, const float * out_vel, const float voxel_size_x, const float voxel_size_y,
			
 
				+  const float range_min_x, const float range_min_y, const std::size_t down_grid_size_x,
			
 
				+  const std::size_t down_grid_size_y, const std::size_t downsample_factor, const int class_size,
			
 
				+  Box3D * det_boxes3d)
			
 
				+{
			
 
				+  // generate boxes3d from the outputs of the network.
			
 
				+  // shape of out_*: (N, DOWN_GRID_SIZE_Y, DOWN_GRID_SIZE_X)
			
 
				+  // heatmap: N = class_size, offset: N = 2, z: N = 1, dim: N = 3, rot: N = 2, vel: N = 2
			
 
				+  const auto yi = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
			
 
				+  const auto xi = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
			
 
				+  const auto idx = down_grid_size_x * yi + xi;
			
 
				+  const auto down_grid_size = down_grid_size_y * down_grid_size_x;
			
 
				+
			
 
				+  if (yi >= down_grid_size_y || xi >= down_grid_size_x) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  int label = -1;
			
 
				+  float max_score = -1;
			
 
				+  for (int ci = 0; ci < class_size; ci++) {
			
 
				+    float score = sigmoid(out_heatmap[down_grid_size * ci + idx]);
			
 
				+    if (score > max_score) {
			
 
				+      label = ci;
			
 
				+      max_score = score;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  const float offset_x = out_offset[down_grid_size * 0 + idx];
			
 
				+  const float offset_y = out_offset[down_grid_size * 1 + idx];
			
 
				+  const float x = voxel_size_x * downsample_factor * (xi + offset_x) + range_min_x;
			
 
				+  const float y = voxel_size_y * downsample_factor * (yi + offset_y) + range_min_y;
			
 
				+  const float z = out_z[idx];
			
 
				+  const float w = out_dim[down_grid_size * 0 + idx];
			
 
				+  const float l = out_dim[down_grid_size * 1 + idx];
			
 
				+  const float h = out_dim[down_grid_size * 2 + idx];
			
 
				+  const float yaw_sin = out_rot[down_grid_size * 0 + idx];
			
 
				+  const float yaw_cos = out_rot[down_grid_size * 1 + idx];
			
 
				+  const float vel_x = out_vel[down_grid_size * 0 + idx];
			
 
				+  const float vel_y = out_vel[down_grid_size * 1 + idx];
			
 
				+
			
 
				+  det_boxes3d[idx].label = label;
			
 
				+  det_boxes3d[idx].score = max_score;
			
 
				+  det_boxes3d[idx].x = x;
			
 
				+  det_boxes3d[idx].y = y;
			
 
				+  det_boxes3d[idx].z = z;
			
 
				+  det_boxes3d[idx].length = expf(l);
			
 
				+  det_boxes3d[idx].width = expf(w);
			
 
				+  det_boxes3d[idx].height = expf(h);
			
 
				+  det_boxes3d[idx].yaw = atan2f(yaw_sin, yaw_cos);
			
 
				+  det_boxes3d[idx].vel_x = vel_x;
			
 
				+  det_boxes3d[idx].vel_y = vel_y;
			
 
				+}
			
 
				+
			
 
				+PostProcessCUDA::PostProcessCUDA(const CenterPointConfig & config) : config_(config)
			
 
				+{
			
 
				+  const auto num_raw_boxes3d = config.down_grid_size_y_ * config.down_grid_size_x_;
			
 
				+  boxes3d_d_ = thrust::device_vector<Box3D>(num_raw_boxes3d);
			
 
				+}
			
 
				+
			
 
				+cudaError_t PostProcessCUDA::generateDetectedBoxes3D_launch(
			
 
				+  const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
			
 
				+  const float * out_rot, const float * out_vel, std::vector<Box3D> & det_boxes3d,
			
 
				+  cudaStream_t stream)
			
 
				+{
			
 
				+  dim3 blocks(
			
 
				+    divup(config_.down_grid_size_y_, THREADS_PER_BLOCK),
			
 
				+    divup(config_.down_grid_size_x_, THREADS_PER_BLOCK));
			
 
				+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
			
 
				+  generateBoxes3D_kernel<<<blocks, threads, 0, stream>>>(
			
 
				+    out_heatmap, out_offset, out_z, out_dim, out_rot, out_vel, config_.voxel_size_x_,
			
 
				+    config_.voxel_size_y_, config_.range_min_x_, config_.range_min_y_, config_.down_grid_size_x_,
			
 
				+    config_.down_grid_size_y_, config_.downsample_factor_, config_.class_size_,
			
 
				+    thrust::raw_pointer_cast(boxes3d_d_.data()));
			
 
				+
			
 
				+  // suppress by socre
			
 
				+  const auto num_det_boxes3d = thrust::count_if(
			
 
				+    thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(),
			
 
				+    is_score_greater(config_.score_threshold_));
			
 
				+  if (num_det_boxes3d == 0) {
			
 
				+    return cudaGetLastError();
			
 
				+  }
			
 
				+  thrust::device_vector<Box3D> det_boxes3d_d(num_det_boxes3d);
			
 
				+  thrust::copy_if(
			
 
				+    thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), det_boxes3d_d.begin(),
			
 
				+    is_score_greater(config_.score_threshold_));
			
 
				+
			
 
				+  // sort by score
			
 
				+  thrust::sort(det_boxes3d_d.begin(), det_boxes3d_d.end(), score_greater());
			
 
				+
			
 
				+  // supress by NMS
			
 
				+  thrust::device_vector<bool> final_keep_mask_d(num_det_boxes3d);
			
 
				+  const auto num_final_det_boxes3d =
			
 
				+    circleNMS(det_boxes3d_d, config_.circle_nms_dist_threshold_, final_keep_mask_d, stream);
			
 
				+
			
 
				+  thrust::device_vector<Box3D> final_det_boxes3d_d(num_final_det_boxes3d);
			
 
				+  thrust::copy_if(
			
 
				+    thrust::device, det_boxes3d_d.begin(), det_boxes3d_d.end(), final_keep_mask_d.begin(),
			
 
				+    final_det_boxes3d_d.begin(), is_kept());
			
 
				+
			
 
				+  // memcpy device to host
			
 
				+  det_boxes3d.resize(num_final_det_boxes3d);
			
 
				+  thrust::copy(final_det_boxes3d_d.begin(), final_det_boxes3d_d.end(), det_boxes3d.begin());
			
 
				+
			
 
				+  return cudaGetLastError();
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src1/perception/perception_lidar_centerpoint/preprocess/pointcloud_densification.cpp
+++ b/src1/perception/perception_lidar_centerpoint/preprocess/pointcloud_densification.cpp
@@ -0,0 +1,97 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/preprocess/pointcloud_densification.hpp"
			
 
				+
			
 
				+#include <pcl_ros/transforms.hpp>
			
 
				+
			
 
				+#include <boost/optional.hpp>
			
 
				+
			
 
				+#include <pcl_conversions/pcl_conversions.h>
			
 
				+#ifdef ROS_DISTRO_GALACTIC
			
 
				+#include <tf2_eigen/tf2_eigen.h>
			
 
				+#else
			
 
				+#include <tf2_eigen/tf2_eigen.hpp>
			
 
				+#endif
			
 
				+
			
 
				+#include <string>
			
 
				+#include <utility>
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+boost::optional<geometry_msgs::msg::Transform> getTransform(
			
 
				+  const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id,
			
 
				+  const std::string & source_frame_id, const rclcpp::Time & time)
			
 
				+{
			
 
				+  try {
			
 
				+    geometry_msgs::msg::TransformStamped transform_stamped;
			
 
				+    transform_stamped = tf_buffer.lookupTransform(
			
 
				+      target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5));
			
 
				+    return transform_stamped.transform;
			
 
				+  } catch (tf2::TransformException & ex) {
			
 
				+    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_centerpoint"), ex.what());
			
 
				+    return boost::none;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t)
			
 
				+{
			
 
				+  Eigen::Affine3f a;
			
 
				+  a.matrix() = tf2::transformToEigen(t).matrix().cast<float>();
			
 
				+  return a;
			
 
				+}
			
 
				+
			
 
				+}  // namespace
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+PointCloudDensification::PointCloudDensification(const DensificationParam & param) : param_(param)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+bool PointCloudDensification::enqueuePointCloud(
			
 
				+  const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
			
 
				+{
			
 
				+  const auto header = pointcloud_msg.header;
			
 
				+
			
 
				+  auto transform_world2current =
			
 
				+    getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp);
			
 
				+  if (!transform_world2current) {
			
 
				+    return false;
			
 
				+  }
			
 
				+  auto affine_world2current = transformToEigen(transform_world2current.get());
			
 
				+
			
 
				+  enqueue(pointcloud_msg, affine_world2current);
			
 
				+  dequeue();
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+void PointCloudDensification::enqueue(
			
 
				+  const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current)
			
 
				+{
			
 
				+  affine_world2current_ = affine_world2current;
			
 
				+  current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
			
 
				+  PointCloudWithTransform pointcloud = {msg, affine_world2current.inverse()};
			
 
				+  pointcloud_cache_.push_front(pointcloud);
			
 
				+}
			
 
				+
			
 
				+void PointCloudDensification::dequeue()
			
 
				+{
			
 
				+  if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) {
			
 
				+    pointcloud_cache_.pop_back();
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src1/perception/perception_lidar_centerpoint/preprocess/preprocess_kernel.cu
+++ b/src1/perception/perception_lidar_centerpoint/preprocess/preprocess_kernel.cu
@@ -0,0 +1,159 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+/*
			
 
				+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
			
 
				+ * SPDX-License-Identifier: Apache-2.0
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ * http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+#include "lidar_centerpoint/preprocess/preprocess_kernel.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+const std::size_t MAX_POINT_IN_VOXEL_SIZE = 32;  // the same as max_point_in_voxel_size_ in config
			
 
				+const std::size_t WARPS_PER_BLOCK = 4;
			
 
				+const std::size_t ENCODER_IN_FEATURE_SIZE = 9;  // the same as encoder_in_feature_size_ in config
			
 
				+}  // namespace
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+__global__ void generateFeatures_kernel(
			
 
				+  const float * voxel_features, const float * voxel_num_points, const int * coords,
			
 
				+  const std::size_t num_voxels, const float voxel_x, const float voxel_y, const float voxel_z,
			
 
				+  const float range_min_x, const float range_min_y, const float range_min_z, float * features)
			
 
				+{
			
 
				+  // voxel_features (float): (max_voxel_size, max_point_in_voxel_size, point_feature_size)
			
 
				+  // voxel_num_points (int): (max_voxel_size)
			
 
				+  // coords (int): (max_voxel_size, point_dim_size)
			
 
				+  int pillar_idx = blockIdx.x * WARPS_PER_BLOCK + threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;
			
 
				+  int point_idx = threadIdx.x % MAX_POINT_IN_VOXEL_SIZE;
			
 
				+  int pillar_idx_inBlock = threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;  // max_point_in_voxel_size
			
 
				+
			
 
				+  if (pillar_idx >= num_voxels) return;
			
 
				+
			
 
				+  // load src
			
 
				+  __shared__ float4 pillarSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE];
			
 
				+  __shared__ float3 pillarSumSM[WARPS_PER_BLOCK];
			
 
				+  __shared__ int3 cordsSM[WARPS_PER_BLOCK];
			
 
				+  __shared__ int pointsNumSM[WARPS_PER_BLOCK];
			
 
				+  __shared__ float pillarOutSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE][ENCODER_IN_FEATURE_SIZE];
			
 
				+
			
 
				+  if (threadIdx.x < WARPS_PER_BLOCK) {
			
 
				+    pointsNumSM[threadIdx.x] = voxel_num_points[blockIdx.x * WARPS_PER_BLOCK + threadIdx.x];
			
 
				+    cordsSM[threadIdx.x] = ((int3 *)coords)[blockIdx.x * WARPS_PER_BLOCK + threadIdx.x];
			
 
				+    pillarSumSM[threadIdx.x] = {0, 0, 0};
			
 
				+  }
			
 
				+
			
 
				+  pillarSM[pillar_idx_inBlock][point_idx] =
			
 
				+    ((float4 *)voxel_features)[pillar_idx * MAX_POINT_IN_VOXEL_SIZE + point_idx];
			
 
				+  __syncthreads();
			
 
				+
			
 
				+  // calculate sm in a pillar
			
 
				+  if (point_idx < pointsNumSM[pillar_idx_inBlock]) {
			
 
				+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].x), pillarSM[pillar_idx_inBlock][point_idx].x);
			
 
				+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].y), pillarSM[pillar_idx_inBlock][point_idx].y);
			
 
				+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].z), pillarSM[pillar_idx_inBlock][point_idx].z);
			
 
				+  }
			
 
				+  __syncthreads();
			
 
				+
			
 
				+  // feature-mean
			
 
				+  float3 mean;
			
 
				+  float validPoints = pointsNumSM[pillar_idx_inBlock];
			
 
				+  mean.x = pillarSumSM[pillar_idx_inBlock].x / validPoints;
			
 
				+  mean.y = pillarSumSM[pillar_idx_inBlock].y / validPoints;
			
 
				+  mean.z = pillarSumSM[pillar_idx_inBlock].z / validPoints;
			
 
				+
			
 
				+  mean.x = pillarSM[pillar_idx_inBlock][point_idx].x - mean.x;
			
 
				+  mean.y = pillarSM[pillar_idx_inBlock][point_idx].y - mean.y;
			
 
				+  mean.z = pillarSM[pillar_idx_inBlock][point_idx].z - mean.z;
			
 
				+
			
 
				+  // calculate offset
			
 
				+  float x_offset = voxel_x / 2 + cordsSM[pillar_idx_inBlock].z * voxel_x + range_min_x;
			
 
				+  float y_offset = voxel_y / 2 + cordsSM[pillar_idx_inBlock].y * voxel_y + range_min_y;
			
 
				+  float z_offset = voxel_z / 2 + cordsSM[pillar_idx_inBlock].x * voxel_z + range_min_z;
			
 
				+
			
 
				+  // feature-offset
			
 
				+  float3 center;
			
 
				+  center.x = pillarSM[pillar_idx_inBlock][point_idx].x - x_offset;
			
 
				+  center.y = pillarSM[pillar_idx_inBlock][point_idx].y - y_offset;
			
 
				+  center.z = pillarSM[pillar_idx_inBlock][point_idx].z - z_offset;
			
 
				+
			
 
				+  // store output
			
 
				+  if (point_idx < pointsNumSM[pillar_idx_inBlock]) {
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][0] = pillarSM[pillar_idx_inBlock][point_idx].x;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][1] = pillarSM[pillar_idx_inBlock][point_idx].y;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][2] = pillarSM[pillar_idx_inBlock][point_idx].z;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][3] = pillarSM[pillar_idx_inBlock][point_idx].w;
			
 
				+
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][4] = mean.x;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][5] = mean.y;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][6] = mean.z;
			
 
				+
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][7] = center.x;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][8] = center.y;
			
 
				+
			
 
				+  } else {
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][0] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][1] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][2] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][3] = 0;
			
 
				+
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][4] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][5] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][6] = 0;
			
 
				+
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][7] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][8] = 0;
			
 
				+  }
			
 
				+
			
 
				+  __syncthreads();
			
 
				+
			
 
				+  for (int i = 0; i < ENCODER_IN_FEATURE_SIZE; i++) {
			
 
				+    int outputSMId = pillar_idx_inBlock * MAX_POINT_IN_VOXEL_SIZE * ENCODER_IN_FEATURE_SIZE +
			
 
				+                     i * MAX_POINT_IN_VOXEL_SIZE + point_idx;
			
 
				+    int outputId = pillar_idx * MAX_POINT_IN_VOXEL_SIZE * ENCODER_IN_FEATURE_SIZE +
			
 
				+                   i * MAX_POINT_IN_VOXEL_SIZE + point_idx;
			
 
				+    features[outputId] = ((float *)pillarOutSM)[outputSMId];
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+cudaError_t generateFeatures_launch(
			
 
				+  const float * voxel_features, const float * voxel_num_points, const int * coords,
			
 
				+  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
			
 
				+  const float voxel_size_y, const float voxel_size_z, const float range_min_x,
			
 
				+  const float range_min_y, const float range_min_z, float * features, cudaStream_t stream)
			
 
				+{
			
 
				+  dim3 blocks(divup(max_voxel_size, WARPS_PER_BLOCK));
			
 
				+  dim3 threads(WARPS_PER_BLOCK * MAX_POINT_IN_VOXEL_SIZE);
			
 
				+  generateFeatures_kernel<<<blocks, threads, 0, stream>>>(
			
 
				+    voxel_features, voxel_num_points, coords, num_voxels, voxel_size_x, voxel_size_y, voxel_size_z,
			
 
				+    range_min_x, range_min_y, range_min_z, features);
			
 
				+
			
 
				+  return cudaGetLastError();
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src1/perception/perception_lidar_centerpoint/preprocess/voxel_generator.cpp
+++ b/src1/perception/perception_lidar_centerpoint/preprocess/voxel_generator.cpp
@@ -0,0 +1,130 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/preprocess/voxel_generator.hpp"
			
 
				+
			
 
				+#include <sensor_msgs/point_cloud2_iterator.hpp>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+VoxelGeneratorTemplate::VoxelGeneratorTemplate(
			
 
				+  const DensificationParam & param, const CenterPointConfig & config)
			
 
				+: config_(config)
			
 
				+{
			
 
				+  pd_ptr_ = std::make_unique<PointCloudDensification>(param);
			
 
				+  range_[0] = config.range_min_x_;
			
 
				+  range_[1] = config.range_min_y_;
			
 
				+  range_[2] = config.range_min_z_;
			
 
				+  range_[3] = config.range_max_x_;
			
 
				+  range_[4] = config.range_max_y_;
			
 
				+  range_[5] = config.range_max_z_;
			
 
				+  grid_size_[0] = config.grid_size_x_;
			
 
				+  grid_size_[1] = config.grid_size_y_;
			
 
				+  grid_size_[2] = config.grid_size_z_;
			
 
				+  recip_voxel_size_[0] = 1 / config.voxel_size_x_;
			
 
				+  recip_voxel_size_[1] = 1 / config.voxel_size_y_;
			
 
				+  recip_voxel_size_[2] = 1 / config.voxel_size_z_;
			
 
				+}
			
 
				+
			
 
				+bool VoxelGeneratorTemplate::enqueuePointCloud(
			
 
				+  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
			
 
				+{
			
 
				+  return pd_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer);
			
 
				+}
			
 
				+
			
 
				+std::size_t VoxelGenerator::pointsToVoxels(
			
 
				+  std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				+  std::vector<float> & num_points_per_voxel)
			
 
				+{
			
 
				+  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
			
 
				+  // coordinates (int): (max_voxel_size * point_dim_size)
			
 
				+  // num_points_per_voxel (float): (max_voxel_size)
			
 
				+
			
 
				+  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
			
 
				+  std::vector<int> coord_to_voxel_idx(grid_size, -1);
			
 
				+
			
 
				+  std::size_t voxel_cnt = 0;  // @return
			
 
				+  std::vector<float> point;
			
 
				+  point.resize(config_.point_feature_size_);
			
 
				+  std::vector<float> coord_zyx;
			
 
				+  coord_zyx.resize(config_.point_dim_size_);
			
 
				+  bool out_of_range;
			
 
				+  std::size_t point_cnt;
			
 
				+  int c, coord_idx, voxel_idx;
			
 
				+  Eigen::Vector3f point_current, point_past;
			
 
				+
			
 
				+  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
			
 
				+       pc_cache_iter++) {
			
 
				+    auto pc_msg = pc_cache_iter->pointcloud_msg;
			
 
				+    auto affine_past2current =
			
 
				+      pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world;
			
 
				+    float timelag = static_cast<float>(
			
 
				+      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_msg.header.stamp).seconds());
			
 
				+
			
 
				+    for (sensor_msgs::PointCloud2ConstIterator<float> x_iter(pc_msg, "x"), y_iter(pc_msg, "y"),
			
 
				+         z_iter(pc_msg, "z");
			
 
				+         x_iter != x_iter.end(); ++x_iter, ++y_iter, ++z_iter) {
			
 
				+      point_past << *x_iter, *y_iter, *z_iter;
			
 
				+      point_current = affine_past2current * point_past;
			
 
				+
			
 
				+      point[0] = point_current.x();
			
 
				+      point[1] = point_current.y();
			
 
				+      point[2] = point_current.z();
			
 
				+      point[3] = timelag;
			
 
				+
			
 
				+      out_of_range = false;
			
 
				+      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
			
 
				+        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
			
 
				+        if (c < 0 || c >= grid_size_[di]) {
			
 
				+          out_of_range = true;
			
 
				+          break;
			
 
				+        }
			
 
				+        coord_zyx[config_.point_dim_size_ - di - 1] = c;
			
 
				+      }
			
 
				+      if (out_of_range) {
			
 
				+        continue;
			
 
				+      }
			
 
				+
			
 
				+      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
			
 
				+                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
			
 
				+      voxel_idx = coord_to_voxel_idx[coord_idx];
			
 
				+      if (voxel_idx == -1) {
			
 
				+        voxel_idx = voxel_cnt;
			
 
				+        if (voxel_cnt >= config_.max_voxel_size_) {
			
 
				+          continue;
			
 
				+        }
			
 
				+
			
 
				+        voxel_cnt++;
			
 
				+        coord_to_voxel_idx[coord_idx] = voxel_idx;
			
 
				+        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
			
 
				+          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      point_cnt = num_points_per_voxel[voxel_idx];
			
 
				+      if (point_cnt < config_.max_point_in_voxel_size_) {
			
 
				+        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
			
 
				+          voxels
			
 
				+            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
			
 
				+             point_cnt * config_.point_feature_size_ + fi] = point[fi];
			
 
				+        }
			
 
				+        num_points_per_voxel[voxel_idx]++;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return voxel_cnt;
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
		`@@ -0,0 +1 @@`
		`+from autowrare, but not compile ok in agx xvier.`