Browse Source

Merge branch 'master' of http://10.16.0.29:3000/ADPilot/modularization

chenxiaowei 2 years ago
parent
commit
a7dbd3c58c
49 changed files with 4241 additions and 71 deletions
  1. 2 2
      sh/automake/autogen.sh
  2. 1 1
      sh/automake/automake_agx.sh
  3. 73 0
      src/detection/detection_lidar_centerpoint/.gitignore
  4. 161 0
      src/detection/detection_lidar_centerpoint/detection_lidar_centerpoint.pro
  5. 124 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp
  6. 115 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
  7. 120 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp
  8. 47 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/detection_class_remapper.hpp
  9. 53 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp
  10. 31 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
  11. 68 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp
  12. 70 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/node.hpp
  13. 32 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp
  14. 81 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/non_maximum_suppression.hpp
  15. 47 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp
  16. 100 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
  17. 31 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
  18. 65 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
  19. 43 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp
  20. 63 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/single_inference_node.hpp
  21. 43 0
      src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/utils.hpp
  22. 258 0
      src/detection/detection_lidar_centerpoint/lib/centerpoint_trt.cpp
  23. 71 0
      src/detection/detection_lidar_centerpoint/lib/detection_class_remapper.cpp
  24. 74 0
      src/detection/detection_lidar_centerpoint/lib/network/network_trt.cpp
  25. 68 0
      src/detection/detection_lidar_centerpoint/lib/network/scatter_kernel.cu
  26. 161 0
      src/detection/detection_lidar_centerpoint/lib/network/tensorrt_wrapper.cpp
  27. 145 0
      src/detection/detection_lidar_centerpoint/lib/postprocess/circle_nms_kernel.cu
  28. 104 0
      src/detection/detection_lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp
  29. 166 0
      src/detection/detection_lidar_centerpoint/lib/postprocess/postprocess_kernel.cu
  30. 118 0
      src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp
  31. 160 0
      src/detection/detection_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
  32. 224 0
      src/detection/detection_lidar_centerpoint/lib/preprocess/voxel_generator.cpp
  33. 95 0
      src/detection/detection_lidar_centerpoint/lib/ros_utils.cpp
  34. 34 0
      src/detection/detection_lidar_centerpoint/lib/utils.cpp
  35. 59 0
      src/detection/detection_lidar_centerpoint/main.cpp
  36. 223 0
      src/detection/detection_lidar_centerpoint/tensorrt_common/tensorrt_common.cpp
  37. 142 0
      src/detection/detection_lidar_centerpoint/tensorrt_common/tensorrt_common.hpp
  38. 194 0
      src/detection/detection_lidar_cnn_segmentation/cluster2d.cpp
  39. 16 0
      src/detection/detection_lidar_cnn_segmentation/cluster2d.h
  40. 189 2
      src/detection/detection_lidar_cnn_segmentation/cnn_segmentation.cpp
  41. 62 0
      src/detection/detection_lidar_cnn_segmentation/cnn_segmentation.h
  42. 11 0
      src/detection/detection_lidar_cnn_segmentation/detection_lidar_cnn_segmentation.xml
  43. 4 4
      src/detection/detection_lidar_cnn_segmentation/feature_generator.cpp
  44. 1 1
      src/detection/detection_lidar_cnn_segmentation/feature_generator.h
  45. 190 35
      src/detection/detection_lidar_cnn_segmentation/main.cpp
  46. 3 3
      src/detection/detection_lidar_cnn_segmentation_trt/src/detector.cpp
  47. 47 18
      src/driver/driver_map_xodrload/driver_map_xodrload.xml
  48. 51 5
      src/driver/driver_map_xodrload/main.cpp
  49. 1 0
      src/tool/IVSysMan/IVSysManWithoutWin.pro

+ 2 - 2
sh/automake/autogen.sh

@@ -242,8 +242,8 @@ driver_map_xodrload
 #driver_radio_p900
 driver_ntrip_client
 #driver_odomtogpsimu
-#driver_rpc_client
-#driver_rpc_server
+driver_rpc_client
+driver_rpc_server
 #driver_vbox_gaohong
 driver_ota_client
 #driver_grpc_client

+ 1 - 1
sh/automake/automake_agx.sh

@@ -34,7 +34,7 @@ echo "code version: "$GITVERSIONCODE
 
 
 month=`date +%Y.%m`
-urllink="http://111.33.136.149:3000/adc_pilot/modularization_pro/src/master/agx"
+urllink="http://10.15.0.89:3000/adc_pilot/modularization_pro/src/master/agx"
 
 
 if [ "$GITVERSIONCODE" =  "$VERSIONCODE" ]; then

+ 73 - 0
src/detection/detection_lidar_centerpoint/.gitignore

@@ -0,0 +1,73 @@
+# This file is used to ignore files which are generated
+# ----------------------------------------------------------------------------
+
+*~
+*.autosave
+*.a
+*.core
+*.moc
+*.o
+*.obj
+*.orig
+*.rej
+*.so
+*.so.*
+*_pch.h.cpp
+*_resource.rc
+*.qm
+.#*
+*.*#
+core
+!core/
+tags
+.DS_Store
+.directory
+*.debug
+Makefile*
+*.prl
+*.app
+moc_*.cpp
+ui_*.h
+qrc_*.cpp
+Thumbs.db
+*.res
+*.rc
+/.qmake.cache
+/.qmake.stash
+
+# qtcreator generated files
+*.pro.user*
+
+# xemacs temporary files
+*.flc
+
+# Vim temporary files
+.*.swp
+
+# Visual Studio generated files
+*.ib_pdb_index
+*.idb
+*.ilk
+*.pdb
+*.sln
+*.suo
+*.vcproj
+*vcproj.*.*.user
+*.ncb
+*.sdf
+*.opensdf
+*.vcxproj
+*vcxproj.*
+
+# MinGW generated files
+*.Debug
+*.Release
+
+# Python byte code
+*.pyc
+
+# Binaries
+# --------
+*.dll
+*.exe
+

+ 161 - 0
src/detection/detection_lidar_centerpoint/detection_lidar_centerpoint.pro

@@ -0,0 +1,161 @@
+QT -= gui
+
+CONFIG += c++14 #console
+CONFIG -= app_bundle
+
+# The following define makes your compiler emit warnings if you use
+# any Qt feature that has been marked deprecated (the exact warnings
+# depend on your compiler). Please consult the documentation of the
+# deprecated API in order to know how to port your code away from it.
+DEFINES += QT_DEPRECATED_WARNINGS
+
+# You can also make your code fail to compile if it uses deprecated APIs.
+# In order to do so, uncomment the following line.
+# You can also select to disable deprecated APIs only up to a certain version of Qt.
+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
+
+SOURCES += \
+        lib/centerpoint_trt.cpp \
+        lib/network/network_trt.cpp \
+        lib/network/tensorrt_wrapper.cpp \
+        lib/preprocess/pointcloud_densification.cpp \
+        lib/preprocess/voxel_generator.cpp \
+        lib/utils.cpp \
+        main.cpp \
+        tensorrt_common/tensorrt_common.cpp
+
+# Default rules for deployment.
+qnx: target.path = /tmp/$${TARGET}/bin
+else: unix:!android: target.path = /opt/$${TARGET}/bin
+!isEmpty(target.path): INSTALLS += target
+
+DISTFILES += \
+    lib/network/scatter_kernel.cu \
+    lib/postprocess/circle_nms_kernel.cu \
+    lib/postprocess/postprocess_kernel.cu \
+    lib/preprocess/preprocess_kernel.cu
+
+
+INCLUDEPATH += /usr/include/pcl-1.7
+INCLUDEPATH += /usr/include/pcl-1.8
+INCLUDEPATH += /usr/include/eigen3
+INCLUDEPATH += /usr/include/opencv4
+
+#INCLUDEPATH += /opt/ros/kinetic/include/opencv-3.3.1-dev
+
+INCLUDEPATH += /usr/include/pcl-1.10
+
+
+INCLUDEPATH += /usr/local/cuda-11.4/targets/aarch64-linux/include
+
+LIBS += -L/usr/local/cuda-11.4/targets/aarch64-linux/lib  # -lcublas
+
+INCLUDEPATH += /usr/local/cuda-10.2/targets/aarch64-linux/include
+
+
+INCLUDEPATH += $$PWD/include
+
+INCLUDEPATH +=$$PWD/perception_utils/include
+
+LIBS += -lrt -ldl -lnvinfer -lcudnn  -lcudart -lnvparsers -lnvonnxparser -lnvinfer_plugin -lstdc++fs
+
+
+CUDA_SOURCES += \
+    lib/network/scatter_kernel.cu \
+    lib/postprocess/circle_nms_kernel.cu \
+    lib/postprocess/postprocess_kernel.cu \
+    lib/preprocess/preprocess_kernel.cu
+
+LIBS += -L"/usr/local/lib" \
+        -L"/usr/local/cuda/lib64" \
+        -lcudart \
+        -lcufft
+
+CUDA_SDK = "/usr/local/cuda-11.4/"   # cudaSDK路径
+
+CUDA_DIR = "/usr/local/cuda-11.4/"            # CUDA tookit路径
+
+SYSTEM_NAME = linux         # 自己系统环境 'Win32', 'x64', or 'Win64'
+
+SYSTEM_TYPE = 64           #操作系统位数 '32' or '64',
+
+CUDA_ARCH = sm_72          # cuda架构, for example 'compute_10', 'compute_11', 'sm_10'
+
+NVCC_OPTIONS = --use_fast_math --compiler-options "-fPIC"
+
+
+# include paths
+
+INCLUDEPATH += $$CUDA_DIR/include
+#INCLUDEPATH += /usr/local/cuda-10.0/targets/aarch64-linux/include/crt
+
+# library directories
+
+QMAKE_LIBDIR += $$CUDA_DIR/lib/
+
+CUDA_OBJECTS_DIR = ./
+
+# The following library conflicts with something in Cuda
+
+#QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib
+
+#QMAKE_LFLAGS_DEBUG   = /NODEFAULTLIB:msvcrtd.lib
+
+# Add the necessary libraries
+
+CUDA_LIBS =  cudart cufft
+
+# The following makes sure all path names (which often include spaces) are put between quotation marks
+
+CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
+
+NVCC_LIBS = $$join(CUDA_LIBS,' -l','-l', '')
+
+#LIBS += $$join(CUDA_LIBS,'.so ', '', '.so')
+
+# Configuration of the Cuda compiler
+
+CONFIG(debug, debug|release) {
+
+    # Debug mode
+
+    cuda_d.input = CUDA_SOURCES
+
+    cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
+
+    cuda_d.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
+
+    cuda_d.dependency_type = TYPE_C
+
+    QMAKE_EXTRA_COMPILERS += cuda_d
+
+}
+
+else {
+
+    # Release mode
+
+    cuda.input = CUDA_SOURCES
+
+    cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
+
+    cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -O3 -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
+
+    cuda.dependency_type = TYPE_C
+
+    QMAKE_EXTRA_COMPILERS += cuda
+
+}
+
+HEADERS += \
+    include/lidar_centerpoint/centerpoint_trt.hpp \
+    include/lidar_centerpoint/network/network_trt.hpp \
+    include/lidar_centerpoint/network/scatter_kernel.hpp \
+    include/lidar_centerpoint/network/tensorrt_wrapper.hpp \
+    include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp \
+    include/lidar_centerpoint/postprocess/non_maximum_suppression.hpp \
+    include/lidar_centerpoint/postprocess/postprocess_kernel.hpp \
+    include/lidar_centerpoint/preprocess/pointcloud_densification.hpp \
+    include/lidar_centerpoint/preprocess/preprocess_kernel.hpp \
+    include/lidar_centerpoint/preprocess/voxel_generator.hpp
+

+ 124 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp

@@ -0,0 +1,124 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_
+#define LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_
+
+#include <cstddef>
+#include <vector>
+
+namespace centerpoint
+{
+class CenterPointConfig
+{
+public:
+  explicit CenterPointConfig(
+    const std::size_t class_size, const float point_feature_size, const std::size_t max_voxel_size,
+    const std::vector<double> & point_cloud_range, const std::vector<double> & voxel_size,
+    const std::size_t downsample_factor, const std::size_t encoder_in_feature_size,
+    const float score_threshold, const float circle_nms_dist_threshold,
+    const std::vector<double> yaw_norm_thresholds)
+  {
+    class_size_ = class_size;
+    point_feature_size_ = point_feature_size;
+    max_voxel_size_ = max_voxel_size;
+    if (point_cloud_range.size() == 6) {
+      range_min_x_ = static_cast<float>(point_cloud_range[0]);
+      range_min_y_ = static_cast<float>(point_cloud_range[1]);
+      range_min_z_ = static_cast<float>(point_cloud_range[2]);
+      range_max_x_ = static_cast<float>(point_cloud_range[3]);
+      range_max_y_ = static_cast<float>(point_cloud_range[4]);
+      range_max_z_ = static_cast<float>(point_cloud_range[5]);
+    }
+    if (voxel_size.size() == 3) {
+      voxel_size_x_ = static_cast<float>(voxel_size[0]);
+      voxel_size_y_ = static_cast<float>(voxel_size[1]);
+      voxel_size_z_ = static_cast<float>(voxel_size[2]);
+    }
+    downsample_factor_ = downsample_factor;
+    encoder_in_feature_size_ = encoder_in_feature_size;
+
+    if (score_threshold > 0 && score_threshold < 1) {
+      score_threshold_ = score_threshold;
+    }
+
+    if (circle_nms_dist_threshold > 0) {
+      circle_nms_dist_threshold_ = circle_nms_dist_threshold;
+    }
+
+    yaw_norm_thresholds_ =
+      std::vector<float>(yaw_norm_thresholds.begin(), yaw_norm_thresholds.end());
+
+    for (auto & yaw_norm_threshold : yaw_norm_thresholds_) {
+      yaw_norm_threshold =
+        (yaw_norm_threshold >= 0.f && yaw_norm_threshold < 1.f) ? yaw_norm_threshold : 0.f;
+    }
+
+    grid_size_x_ = static_cast<std::size_t>((range_max_x_ - range_min_x_) / voxel_size_x_);
+    grid_size_y_ = static_cast<std::size_t>((range_max_y_ - range_min_y_) / voxel_size_y_);
+    grid_size_z_ = static_cast<std::size_t>((range_max_z_ - range_min_z_) / voxel_size_z_);
+    offset_x_ = range_min_x_ + voxel_size_x_ / 2;
+    offset_y_ = range_min_y_ + voxel_size_y_ / 2;
+    offset_z_ = range_min_z_ + voxel_size_z_ / 2;
+    down_grid_size_x_ = grid_size_x_ / downsample_factor_;
+    down_grid_size_y_ = grid_size_y_ / downsample_factor_;
+  };
+
+  // input params
+  std::size_t class_size_{3};
+  const std::size_t point_dim_size_{3};  // x, y and z
+  std::size_t point_feature_size_{4};    // x, y, z and time-lag
+  std::size_t max_point_in_voxel_size_{32};
+  std::size_t max_voxel_size_{40000};
+  float range_min_x_{-89.6f};
+  float range_min_y_{-89.6f};
+  float range_min_z_{-3.0f};
+  float range_max_x_{89.6f};
+  float range_max_y_{89.6f};
+  float range_max_z_{5.0f};
+  float voxel_size_x_{0.32f};
+  float voxel_size_y_{0.32f};
+  float voxel_size_z_{8.0f};
+
+  // network params
+  const std::size_t batch_size_{1};
+  std::size_t downsample_factor_{2};
+  std::size_t encoder_in_feature_size_{9};
+  const std::size_t encoder_out_feature_size_{32};
+  const std::size_t head_out_size_{6};
+  const std::size_t head_out_offset_size_{2};
+  const std::size_t head_out_z_size_{1};
+  const std::size_t head_out_dim_size_{3};
+  const std::size_t head_out_rot_size_{2};
+  const std::size_t head_out_vel_size_{2};
+
+  // post-process params
+  float score_threshold_{0.35f};
+  float circle_nms_dist_threshold_{1.5f};
+  std::vector<float> yaw_norm_thresholds_{};
+
+  // calculated params
+  std::size_t grid_size_x_ = (range_max_x_ - range_min_x_) / voxel_size_x_;
+  std::size_t grid_size_y_ = (range_max_y_ - range_min_y_) / voxel_size_y_;
+  std::size_t grid_size_z_ = (range_max_z_ - range_min_z_) / voxel_size_z_;
+  float offset_x_ = range_min_x_ + voxel_size_x_ / 2;
+  float offset_y_ = range_min_y_ + voxel_size_y_ / 2;
+  float offset_z_ = range_min_z_ + voxel_size_z_ / 2;
+  std::size_t down_grid_size_x_ = grid_size_x_ / downsample_factor_;
+  std::size_t down_grid_size_y_ = grid_size_y_ / downsample_factor_;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_

+ 115 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp

@@ -0,0 +1,115 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_
+#define LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_
+
+#include <lidar_centerpoint/cuda_utils.hpp>
+#include <lidar_centerpoint/network/network_trt.hpp>
+#include <lidar_centerpoint/postprocess/postprocess_kernel.hpp>
+#include <lidar_centerpoint/preprocess/voxel_generator.hpp>
+
+//#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace centerpoint
+{
+class NetworkParam
+{
+public:
+  NetworkParam(std::string onnx_path, std::string engine_path, std::string trt_precision)
+  : onnx_path_(std::move(onnx_path)),
+    engine_path_(std::move(engine_path)),
+    trt_precision_(std::move(trt_precision))
+  {
+  }
+
+  std::string onnx_path() const { return onnx_path_; }
+  std::string engine_path() const { return engine_path_; }
+  std::string trt_precision() const { return trt_precision_; }
+
+private:
+  std::string onnx_path_;
+  std::string engine_path_;
+  std::string trt_precision_;
+};
+
+class CenterPointTRT
+{
+public:
+  explicit CenterPointTRT(
+    const NetworkParam & encoder_param, const NetworkParam & head_param,
+    const DensificationParam & densification_param, const CenterPointConfig & config);
+
+  ~CenterPointTRT();
+
+//  bool detect(
+//    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer,
+//    std::vector<Box3D> & det_boxes3d);
+
+  bool detect(
+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr,
+    std::vector<Box3D> & det_boxes3d);
+
+protected:
+  void initPtr();
+
+//  virtual bool preprocess(
+//    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
+
+  virtual bool preprocess(
+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
+
+  void inference();
+
+  void postProcess(std::vector<Box3D> & det_boxes3d);
+
+  std::unique_ptr<VoxelGeneratorTemplate> vg_ptr_{nullptr};
+  std::unique_ptr<VoxelEncoderTRT> encoder_trt_ptr_{nullptr};
+  std::unique_ptr<HeadTRT> head_trt_ptr_{nullptr};
+  std::unique_ptr<PostProcessCUDA> post_proc_ptr_{nullptr};
+  cudaStream_t stream_{nullptr};
+
+  std::size_t class_size_{0};
+  CenterPointConfig config_;
+  std::size_t num_voxels_{0};
+  std::size_t encoder_in_feature_size_{0};
+  std::size_t spatial_features_size_{0};
+  std::vector<float> voxels_;
+  std::vector<int> coordinates_;
+  std::vector<float> num_points_per_voxel_;
+  cuda::unique_ptr<float[]> voxels_d_{nullptr};
+  cuda::unique_ptr<int[]> coordinates_d_{nullptr};
+  cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
+  cuda::unique_ptr<float[]> encoder_in_features_d_{nullptr};
+  cuda::unique_ptr<float[]> pillar_features_d_{nullptr};
+  cuda::unique_ptr<float[]> spatial_features_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_heatmap_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_offset_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_z_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_

+ 120 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp

@@ -0,0 +1,120 @@
+// Copyright 2020 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This code is licensed under CC0 1.0 Universal (Public Domain).
+ * You can use this without any limitation.
+ * https://creativecommons.org/publicdomain/zero/1.0/deed.en
+ */
+
+#ifndef LIDAR_CENTERPOINT__CUDA_UTILS_HPP_
+#define LIDAR_CENTERPOINT__CUDA_UTILS_HPP_
+
+#include <cuda_runtime_api.h>
+
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <type_traits>
+
+#define CHECK_CUDA_ERROR(e) (cuda::check_error(e, __FILE__, __LINE__))
+
+namespace cuda
+{
+inline void check_error(const ::cudaError_t e, const char * f, int n)
+{
+  if (e != ::cudaSuccess) {
+    std::stringstream s;
+    s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": "
+      << ::cudaGetErrorString(e);
+    throw std::runtime_error{s.str()};
+  }
+}
+
+struct deleter
+{
+  void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); }
+};
+
+template <typename T>
+using unique_ptr = std::unique_ptr<T, deleter>;
+
+template <typename T>
+typename std::enable_if<std::is_array<T>::value, cuda::unique_ptr<T>>::type make_unique(
+  const std::size_t n)
+{
+  using U = typename std::remove_extent<T>::type;
+  U * p;
+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(U) * n));
+  return cuda::unique_ptr<T>{p};
+}
+
+template <typename T>
+cuda::unique_ptr<T> make_unique()
+{
+  T * p;
+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(T)));
+  return cuda::unique_ptr<T>{p};
+}
+
+constexpr size_t CUDA_ALIGN = 256;
+
+template <typename T>
+inline size_t get_size_aligned(size_t num_elem)
+{
+  size_t size = num_elem * sizeof(T);
+  size_t extra_align = 0;
+  if (size % CUDA_ALIGN != 0) {
+    extra_align = CUDA_ALIGN - size % CUDA_ALIGN;
+  }
+  return size + extra_align;
+}
+
+template <typename T>
+inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size)
+{
+  size_t size = get_size_aligned<T>(num_elem);
+  if (size > workspace_size) {
+    throw std::runtime_error("Workspace is too small!");
+  }
+  workspace_size -= size;
+  T * ptr = reinterpret_cast<T *>(workspace);
+  workspace = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(workspace) + size);
+  return ptr;
+}
+
+}  // namespace cuda
+
+#endif  // LIDAR_CENTERPOINT__CUDA_UTILS_HPP_

+ 47 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/detection_class_remapper.hpp

@@ -0,0 +1,47 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__DETECTION_CLASS_REMAPPER_HPP_
+#define LIDAR_CENTERPOINT__DETECTION_CLASS_REMAPPER_HPP_
+
+#include <Eigen/Core>
+
+//#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
+//#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
+//#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
+//#include <autoware_auto_perception_msgs/msg/shape.hpp>
+
+#include <vector>
+
+namespace centerpoint
+{
+
+class DetectionClassRemapper
+{
+public:
+  void setParameters(
+    const std::vector<int64_t> & allow_remapping_by_area_matrix,
+    const std::vector<double> & min_area_matrix, const std::vector<double> & max_area_matrix);
+  void mapClasses(autoware_auto_perception_msgs::msg::DetectedObjects & msg);
+
+protected:
+  Eigen::Matrix<bool, Eigen::Dynamic, Eigen::Dynamic> allow_remapping_by_area_matrix_;
+  Eigen::MatrixXd min_area_matrix_;
+  Eigen::MatrixXd max_area_matrix_;
+  int num_labels_;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__DETECTION_CLASS_REMAPPER_HPP_

+ 53 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp

@@ -0,0 +1,53 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_
+#define LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+#include <lidar_centerpoint/network/tensorrt_wrapper.hpp>
+
+#include <vector>
+
+namespace centerpoint
+{
+class VoxelEncoderTRT : public TensorRTWrapper
+{
+public:
+  using TensorRTWrapper::TensorRTWrapper;
+
+protected:
+  bool setProfile(
+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+    nvinfer1::IBuilderConfig & config) override;
+};
+
+class HeadTRT : public TensorRTWrapper
+{
+public:
+  using TensorRTWrapper::TensorRTWrapper;
+
+  HeadTRT(const std::vector<std::size_t> & out_channel_sizes, const CenterPointConfig & config);
+
+protected:
+  bool setProfile(
+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+    nvinfer1::IBuilderConfig & config) override;
+
+  std::vector<std::size_t> out_channel_sizes_;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_

+ 31 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp

@@ -0,0 +1,31 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_
+#define LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+namespace centerpoint
+{
+cudaError_t scatterFeatures_launch(
+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
+  const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
+  const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
+  cudaStream_t stream);
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_

+ 68 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp

@@ -0,0 +1,68 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_
+#define LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+#include <tensorrt_common/tensorrt_common.hpp>
+
+#include <NvInfer.h>
+
+#include <iostream>
+#include <memory>
+#include <string>
+
+namespace centerpoint
+{
+
+class TensorRTWrapper
+{
+public:
+  explicit TensorRTWrapper(const CenterPointConfig & config);
+
+  ~TensorRTWrapper();
+
+  bool init(
+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision);
+
+  tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext> context_{nullptr};
+
+protected:
+  virtual bool setProfile(
+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+    nvinfer1::IBuilderConfig & config) = 0;
+
+  CenterPointConfig config_;
+  tensorrt_common::Logger logger_;
+
+private:
+  bool parseONNX(
+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
+    size_t workspace_size = (1ULL << 30));
+
+  bool saveEngine(const std::string & engine_path);
+
+  bool loadEngine(const std::string & engine_path);
+
+  bool createContext();
+
+  tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime> runtime_{nullptr};
+  tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory> plan_{nullptr};
+  tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine> engine_{nullptr};
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_

+ 70 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/node.hpp

@@ -0,0 +1,70 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__NODE_HPP_
+#define LIDAR_CENTERPOINT__NODE_HPP_
+
+#include "lidar_centerpoint/postprocess/non_maximum_suppression.hpp"
+
+#include <lidar_centerpoint/centerpoint_trt.hpp>
+#include <lidar_centerpoint/detection_class_remapper.hpp>
+#include <rclcpp/rclcpp.hpp>
+#include <tier4_autoware_utils/ros/debug_publisher.hpp>
+#include <tier4_autoware_utils/system/stop_watch.hpp>
+
+#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
+#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
+#include <autoware_auto_perception_msgs/msg/shape.hpp>
+#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace centerpoint
+{
+
+class LidarCenterPointNode : public rclcpp::Node
+{
+public:
+  explicit LidarCenterPointNode(const rclcpp::NodeOptions & node_options);
+
+private:
+  void pointCloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr input_pointcloud_msg);
+
+  tf2_ros::Buffer tf_buffer_;
+  tf2_ros::TransformListener tf_listener_{tf_buffer_};
+
+  rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr pointcloud_sub_;
+  rclcpp::Publisher<autoware_auto_perception_msgs::msg::DetectedObjects>::SharedPtr objects_pub_;
+
+  float score_threshold_{0.0};
+  std::vector<std::string> class_names_;
+  bool has_twist_{false};
+
+  NonMaximumSuppression iou_bev_nms_;
+  DetectionClassRemapper detection_class_remapper_;
+
+  std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
+
+  // debugger
+  std::unique_ptr<tier4_autoware_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_{
+    nullptr};
+  std::unique_ptr<tier4_autoware_utils::DebugPublisher> debug_publisher_ptr_{nullptr};
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__NODE_HPP_

+ 32 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp

@@ -0,0 +1,32 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
+#define LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
+
+#include <lidar_centerpoint/utils.hpp>
+
+#include <thrust/device_vector.h>
+
+namespace centerpoint
+{
+// Non-maximum suppression (NMS) uses the distance on the xy plane instead of
+// intersection over union (IoU) to suppress overlapped objects.
+std::size_t circleNMS(
+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream);
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_

+ 81 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/non_maximum_suppression.hpp

@@ -0,0 +1,81 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
+#define LIDAR_CENTERPOINT__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
+
+//#include "lidar_centerpoint/ros_utils.hpp"
+
+#include <Eigen/Eigen>
+
+//#include "autoware_auto_perception_msgs/msg/detected_object.hpp"
+
+#include <string>
+#include <vector>
+
+namespace centerpoint
+{
+using autoware_auto_perception_msgs::msg::DetectedObject;
+
+// TODO(yukke42): now only support IoU_BEV
+enum class NMS_TYPE {
+  IoU_BEV
+  // IoU_3D
+  // Distance_2D
+  // Distance_3D
+};
+
+struct NMSParams
+{
+  NMS_TYPE nms_type_{};
+  std::vector<std::string> target_class_names_{};
+  double search_distance_2d_{};
+  double iou_threshold_{};
+  // double distance_threshold_{};
+};
+
+std::vector<bool> classNamesToBooleanMask(const std::vector<std::string> & class_names)
+{
+  std::vector<bool> mask;
+  constexpr std::size_t num_object_classification = 8;
+  mask.resize(num_object_classification);
+  for (const auto & class_name : class_names) {
+    const auto semantic_type = getSemanticType(class_name);
+    mask.at(semantic_type) = true;
+  }
+
+  return mask;
+}
+
+class NonMaximumSuppression
+{
+public:
+  void setParameters(const NMSParams &);
+
+  std::vector<DetectedObject> apply(const std::vector<DetectedObject> &);
+
+private:
+  bool isTargetLabel(const std::uint8_t);
+
+  bool isTargetPairObject(const DetectedObject &, const DetectedObject &);
+
+  Eigen::MatrixXd generateIoUMatrix(const std::vector<DetectedObject> &);
+
+  NMSParams params_{};
+  std::vector<bool> target_class_mask_{};
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_

+ 47 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp

@@ -0,0 +1,47 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
+#define LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+#include <lidar_centerpoint/utils.hpp>
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <thrust/device_vector.h>
+
+#include <vector>
+
+namespace centerpoint
+{
+class PostProcessCUDA
+{
+public:
+  explicit PostProcessCUDA(const CenterPointConfig & config);
+
+  cudaError_t generateDetectedBoxes3D_launch(
+    const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
+    const float * out_rot, const float * out_vel, std::vector<Box3D> & det_boxes3d,
+    cudaStream_t stream);
+
+private:
+  CenterPointConfig config_;
+  thrust::device_vector<Box3D> boxes3d_d_;
+  thrust::device_vector<float> yaw_norm_thresholds_d_;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_

+ 100 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp

@@ -0,0 +1,100 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
+#define LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
+
+//#include <tf2_ros/buffer.h>
+//#include <tf2_ros/transform_listener.h>
+#ifdef ROS_DISTRO_GALACTIC
+#include <tf2_sensor_msgs/tf2_sensor_msgs.h>
+#else
+//#include <tf2_sensor_msgs/tf2_sensor_msgs.hpp>
+#endif
+
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
+
+#include <list>
+#include <string>
+#include <utility>
+
+namespace centerpoint
+{
+class DensificationParam
+{
+public:
+  DensificationParam(const std::string & world_frame_id, const unsigned int num_past_frames)
+  : world_frame_id_(std::move(world_frame_id)),
+    pointcloud_cache_size_(num_past_frames + /*current frame*/ 1)
+  {
+  }
+
+  std::string world_frame_id() const { return world_frame_id_; }
+  unsigned int pointcloud_cache_size() const { return pointcloud_cache_size_; }
+
+private:
+  std::string world_frame_id_;
+  unsigned int pointcloud_cache_size_{1};
+};
+
+struct PointCloudWithTransform
+{
+  pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr;
+};
+
+
+//struct PointCloudWithTransform
+//{
+//  sensor_msgs::msg::PointCloud2 pointcloud_msg;
+//  Eigen::Affine3f affine_past2world;
+//};
+
+class PointCloudDensification
+{
+public:
+  explicit PointCloudDensification(const DensificationParam & param);
+
+
+   bool enqueuePointCloud(
+      const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
+//  bool enqueuePointCloud(
+//    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
+
+  double getCurrentTimestamp() const { return current_timestamp_; }
+  Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; }
+  std::list<PointCloudWithTransform>::iterator getPointCloudCacheIter()
+  {
+    return pointcloud_cache_.begin();
+  }
+  bool isCacheEnd(std::list<PointCloudWithTransform>::iterator iter)
+  {
+    return iter == pointcloud_cache_.end();
+  }
+  unsigned int pointcloud_cache_size() const { return param_.pointcloud_cache_size(); }
+
+private:
+//  void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine);
+  void enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
+  void dequeue();
+
+  DensificationParam param_;
+  double current_timestamp_{0.0};
+  Eigen::Affine3f affine_world2current_;
+  std::list<PointCloudWithTransform> pointcloud_cache_;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_

+ 31 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp

@@ -0,0 +1,31 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_
+#define LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+namespace centerpoint
+{
+cudaError_t generateFeatures_launch(
+  const float * voxel_features, const float * voxel_num_points, const int * coords,
+  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
+  const float voxel_size_y, const float voxel_size_z, const float range_min_x,
+  const float range_min_y, const float range_min_z, float * features, cudaStream_t stream);
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_

+ 65 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp

@@ -0,0 +1,65 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_
+#define LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+#include <lidar_centerpoint/preprocess/pointcloud_densification.hpp>
+
+//#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <memory>
+#include <vector>
+
+namespace centerpoint
+{
+class VoxelGeneratorTemplate
+{
+public:
+  explicit VoxelGeneratorTemplate(
+    const DensificationParam & param, const CenterPointConfig & config);
+
+  virtual std::size_t pointsToVoxels(
+    std::vector<float> & voxels, std::vector<int> & coordinates,
+    std::vector<float> & num_points_per_voxel) = 0;
+
+    bool enqueuePointCloud(
+      const pcl::PointCloud<pcl::PointXYZI>::Ptr);
+
+//  bool enqueuePointCloud(
+//    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
+
+protected:
+  std::unique_ptr<PointCloudDensification> pd_ptr_{nullptr};
+
+  CenterPointConfig config_;
+  std::array<float, 6> range_;
+  std::array<int, 3> grid_size_;
+  std::array<float, 3> recip_voxel_size_;
+};
+
+class VoxelGenerator : public VoxelGeneratorTemplate
+{
+public:
+  using VoxelGeneratorTemplate::VoxelGeneratorTemplate;
+
+  std::size_t pointsToVoxels(
+    std::vector<float> & voxels, std::vector<int> & coordinates,
+    std::vector<float> & num_points_per_voxel) override;
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_

+ 43 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp

@@ -0,0 +1,43 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__ROS_UTILS_HPP_
+#define LIDAR_CENTERPOINT__ROS_UTILS_HPP_
+
+// ros packages cannot be included from cuda.
+
+#include <lidar_centerpoint/utils.hpp>
+
+#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
+#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
+#include <autoware_auto_perception_msgs/msg/shape.hpp>
+
+#include <string>
+#include <vector>
+
+namespace centerpoint
+{
+
+void box3DToDetectedObject(
+  const Box3D & box3d, const std::vector<std::string> & class_names, const bool has_twist,
+  autoware_auto_perception_msgs::msg::DetectedObject & obj);
+
+uint8_t getSemanticType(const std::string & class_name);
+
+bool isCarLikeVehicleLabel(const uint8_t label);
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__ROS_UTILS_HPP_

+ 63 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/single_inference_node.hpp

@@ -0,0 +1,63 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__SINGLE_INFERENCE_NODE_HPP_
+#define LIDAR_CENTERPOINT__SINGLE_INFERENCE_NODE_HPP_
+
+#include <Eigen/Core>
+#include <lidar_centerpoint/centerpoint_trt.hpp>
+#include <lidar_centerpoint/detection_class_remapper.hpp>
+#include <rclcpp/rclcpp.hpp>
+
+#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
+#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
+#include <autoware_auto_perception_msgs/msg/shape.hpp>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace centerpoint
+{
+
+class SingleInferenceLidarCenterPointNode : public rclcpp::Node
+{
+public:
+  explicit SingleInferenceLidarCenterPointNode(const rclcpp::NodeOptions & node_options);
+
+private:
+  void detect(const std::string & pcd_path, const std::string & detections_path);
+  std::vector<Eigen::Vector3d> getVertices(
+    const autoware_auto_perception_msgs::msg::Shape & shape, const Eigen::Affine3d & pose) const;
+  void dumpDetectionsAsMesh(
+    const autoware_auto_perception_msgs::msg::DetectedObjects & objects_msg,
+    const std::string & output_path) const;
+
+  tf2_ros::Buffer tf_buffer_;
+  tf2_ros::TransformListener tf_listener_{tf_buffer_};
+
+  float score_threshold_{0.0};
+  std::vector<std::string> class_names_;
+  bool rename_car_to_truck_and_bus_{false};
+  bool has_twist_{false};
+
+  DetectionClassRemapper detection_class_remapper_;
+
+  std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
+};
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__SINGLE_INFERENCE_NODE_HPP_

+ 43 - 0
src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/utils.hpp

@@ -0,0 +1,43 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIDAR_CENTERPOINT__UTILS_HPP_
+#define LIDAR_CENTERPOINT__UTILS_HPP_
+
+#include <cstddef>
+
+namespace centerpoint
+{
+struct Box3D
+{
+  // initializer not allowed for __shared__ variable
+  int label;
+  float score;
+  float x;
+  float y;
+  float z;
+  float length;
+  float width;
+  float height;
+  float yaw;
+  float vel_x;
+  float vel_y;
+};
+
+// cspell: ignore divup
+std::size_t divup(const std::size_t a, const std::size_t b);
+
+}  // namespace centerpoint
+
+#endif  // LIDAR_CENTERPOINT__UTILS_HPP_

+ 258 - 0
src/detection/detection_lidar_centerpoint/lib/centerpoint_trt.cpp

@@ -0,0 +1,258 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/centerpoint_trt.hpp"
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+#include <lidar_centerpoint/network/scatter_kernel.hpp>
+#include <lidar_centerpoint/preprocess/preprocess_kernel.hpp>
+//#include <tier4_autoware_utils/math/constants.hpp>
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace centerpoint
+{
+CenterPointTRT::CenterPointTRT(
+  const NetworkParam & encoder_param, const NetworkParam & head_param,
+  const DensificationParam & densification_param, const CenterPointConfig & config)
+: config_(config)
+{
+  vg_ptr_ = std::make_unique<VoxelGenerator>(densification_param, config_);
+  post_proc_ptr_ = std::make_unique<PostProcessCUDA>(config_);
+
+  // encoder
+  encoder_trt_ptr_ = std::make_unique<VoxelEncoderTRT>(config_);
+  encoder_trt_ptr_->init(
+    encoder_param.onnx_path(), encoder_param.engine_path(), encoder_param.trt_precision());
+  encoder_trt_ptr_->context_->setBindingDimensions(
+    0,
+    nvinfer1::Dims3(
+      config_.max_voxel_size_, config_.max_point_in_voxel_size_, config_.encoder_in_feature_size_));
+
+  // head
+  std::vector<std::size_t> out_channel_sizes = {
+    config_.class_size_,        config_.head_out_offset_size_, config_.head_out_z_size_,
+    config_.head_out_dim_size_, config_.head_out_rot_size_,    config_.head_out_vel_size_};
+  head_trt_ptr_ = std::make_unique<HeadTRT>(out_channel_sizes, config_);
+  head_trt_ptr_->init(head_param.onnx_path(), head_param.engine_path(), head_param.trt_precision());
+  head_trt_ptr_->context_->setBindingDimensions(
+    0, nvinfer1::Dims4(
+         config_.batch_size_, config_.encoder_out_feature_size_, config_.grid_size_y_,
+         config_.grid_size_x_));
+
+  initPtr();
+
+  cudaStreamCreate(&stream_);
+}
+
+CenterPointTRT::~CenterPointTRT()
+{
+  if (stream_) {
+    cudaStreamSynchronize(stream_);
+    cudaStreamDestroy(stream_);
+  }
+}
+
+void CenterPointTRT::initPtr()
+{
+  const auto voxels_size =
+    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
+  const auto coordinates_size = config_.max_voxel_size_ * config_.point_dim_size_;
+  encoder_in_feature_size_ =
+    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_;
+  const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_;
+  spatial_features_size_ =
+    config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_;
+  const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_;
+
+  // host
+  voxels_.resize(voxels_size);
+  coordinates_.resize(coordinates_size);
+  num_points_per_voxel_.resize(config_.max_voxel_size_);
+
+  // device
+  voxels_d_ = cuda::make_unique<float[]>(voxels_size);
+  coordinates_d_ = cuda::make_unique<int[]>(coordinates_size);
+  num_points_per_voxel_d_ = cuda::make_unique<float[]>(config_.max_voxel_size_);
+  encoder_in_features_d_ = cuda::make_unique<float[]>(encoder_in_feature_size_);
+  pillar_features_d_ = cuda::make_unique<float[]>(pillar_features_size);
+  spatial_features_d_ = cuda::make_unique<float[]>(spatial_features_size_);
+  head_out_heatmap_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.class_size_);
+  head_out_offset_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_offset_size_);
+  head_out_z_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_z_size_);
+  head_out_dim_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_dim_size_);
+  head_out_rot_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_rot_size_);
+  head_out_vel_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_vel_size_);
+}
+
+bool CenterPointTRT::detect(
+  const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr,
+  std::vector<Box3D> & det_boxes3d)
+{
+    std::fill(voxels_.begin(), voxels_.end(), 0);
+    std::fill(coordinates_.begin(), coordinates_.end(), -1);
+    std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
+    CHECK_CUDA_ERROR(cudaMemsetAsync(
+      encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
+    CHECK_CUDA_ERROR(
+      cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
+
+    if (!preprocess(pc_ptr)) {
+        std::cout<<"Fail to preprocess and skip to detect."<<std::endl;
+//      RCLCPP_WARN_STREAM(
+//        rclcpp::get_logger("lidar_centerpoint"), "Fail to preprocess and skip to detect.");
+      return false;
+    }
+
+    inference();
+
+    postProcess(det_boxes3d);
+
+    return true;
+}
+
+//bool CenterPointTRT::detect(
+//  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer,
+//  std::vector<Box3D> & det_boxes3d)
+//{
+//  std::fill(voxels_.begin(), voxels_.end(), 0);
+//  std::fill(coordinates_.begin(), coordinates_.end(), -1);
+//  std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
+//  CHECK_CUDA_ERROR(cudaMemsetAsync(
+//    encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
+//  CHECK_CUDA_ERROR(
+//    cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
+
+//  if (!preprocess(input_pointcloud_msg, tf_buffer)) {
+//    RCLCPP_WARN_STREAM(
+//      rclcpp::get_logger("lidar_centerpoint"), "Fail to preprocess and skip to detect.");
+//    return false;
+//  }
+
+//  inference();
+
+//  postProcess(det_boxes3d);
+
+//  return true;
+//}
+
+bool CenterPointTRT::preprocess(
+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
+{
+    bool is_success = vg_ptr_->enqueuePointCloud(pc_ptr);
+    if (!is_success) {
+      return false;
+    }
+    num_voxels_ = vg_ptr_->pointsToVoxels(voxels_, coordinates_, num_points_per_voxel_);
+    if (num_voxels_ == 0) {
+      return false;
+    }
+
+    const auto voxels_size =
+      num_voxels_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
+    const auto coordinates_size = num_voxels_ * config_.point_dim_size_;
+    // memcpy from host to device (not copy empty voxels)
+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
+      voxels_d_.get(), voxels_.data(), voxels_size * sizeof(float), cudaMemcpyHostToDevice));
+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
+      coordinates_d_.get(), coordinates_.data(), coordinates_size * sizeof(int),
+      cudaMemcpyHostToDevice));
+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
+      num_points_per_voxel_d_.get(), num_points_per_voxel_.data(), num_voxels_ * sizeof(float),
+      cudaMemcpyHostToDevice));
+    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+    CHECK_CUDA_ERROR(generateFeatures_launch(
+      voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_,
+      config_.max_voxel_size_, config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_,
+      config_.range_min_x_, config_.range_min_y_, config_.range_min_z_, encoder_in_features_d_.get(),
+      stream_));
+
+    return true;
+}
+
+//bool CenterPointTRT::preprocess(
+//  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
+//{
+//  bool is_success = vg_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer);
+//  if (!is_success) {
+//    return false;
+//  }
+//  num_voxels_ = vg_ptr_->pointsToVoxels(voxels_, coordinates_, num_points_per_voxel_);
+//  if (num_voxels_ == 0) {
+//    return false;
+//  }
+
+//  const auto voxels_size =
+//    num_voxels_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
+//  const auto coordinates_size = num_voxels_ * config_.point_dim_size_;
+//  // memcpy from host to device (not copy empty voxels)
+//  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+//    voxels_d_.get(), voxels_.data(), voxels_size * sizeof(float), cudaMemcpyHostToDevice));
+//  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+//    coordinates_d_.get(), coordinates_.data(), coordinates_size * sizeof(int),
+//    cudaMemcpyHostToDevice));
+//  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+//    num_points_per_voxel_d_.get(), num_points_per_voxel_.data(), num_voxels_ * sizeof(float),
+//    cudaMemcpyHostToDevice));
+//  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+//  CHECK_CUDA_ERROR(generateFeatures_launch(
+//    voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_,
+//    config_.max_voxel_size_, config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_,
+//    config_.range_min_x_, config_.range_min_y_, config_.range_min_z_, encoder_in_features_d_.get(),
+//    stream_));
+
+//  return true;
+//}
+
+void CenterPointTRT::inference()
+{
+  if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) {
+    throw std::runtime_error("Failed to create tensorrt context.");
+  }
+
+  // pillar encoder network
+  std::vector<void *> encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()};
+  encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr);
+
+  // scatter
+  CHECK_CUDA_ERROR(scatterFeatures_launch(
+    pillar_features_d_.get(), coordinates_d_.get(), num_voxels_, config_.max_voxel_size_,
+    config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_,
+    spatial_features_d_.get(), stream_));
+
+  // head network
+  std::vector<void *> head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(),
+                                      head_out_offset_d_.get(),  head_out_z_d_.get(),
+                                      head_out_dim_d_.get(),     head_out_rot_d_.get(),
+                                      head_out_vel_d_.get()};
+  head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr);
+}
+
+void CenterPointTRT::postProcess(std::vector<Box3D> & det_boxes3d)
+{
+  CHECK_CUDA_ERROR(post_proc_ptr_->generateDetectedBoxes3D_launch(
+    head_out_heatmap_d_.get(), head_out_offset_d_.get(), head_out_z_d_.get(), head_out_dim_d_.get(),
+    head_out_rot_d_.get(), head_out_vel_d_.get(), det_boxes3d, stream_));
+  if (det_boxes3d.size() == 0) {
+      std::cout<<"No detected boxes"<<std::endl;
+//    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_centerpoint"), "No detected boxes.");
+  }
+}
+
+}  // namespace centerpoint

+ 71 - 0
src/detection/detection_lidar_centerpoint/lib/detection_class_remapper.cpp

@@ -0,0 +1,71 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <lidar_centerpoint/detection_class_remapper.hpp>
+
+namespace centerpoint
+{
+
+void DetectionClassRemapper::setParameters(
+  const std::vector<int64_t> & allow_remapping_by_area_matrix,
+  const std::vector<double> & min_area_matrix, const std::vector<double> & max_area_matrix)
+{
+  assert(allow_remapping_by_area_matrix.size() == min_area_matrix.size());
+  assert(allow_remapping_by_area_matrix.size() == max_area_matrix.size());
+  assert(
+    std::pow(static_cast<int>(std::sqrt(min_area_matrix.size())), 2) == min_area_matrix.size());
+
+  num_labels_ = static_cast<int>(std::sqrt(min_area_matrix.size()));
+
+  Eigen::Map<const Eigen::Matrix<int64_t, Eigen::Dynamic, Eigen::Dynamic>>
+    allow_remapping_by_area_matrix_tmp(
+      allow_remapping_by_area_matrix.data(), num_labels_, num_labels_);
+  allow_remapping_by_area_matrix_ = allow_remapping_by_area_matrix_tmp.transpose()
+                                      .cast<bool>();  // Eigen is column major by default
+
+  Eigen::Map<const Eigen::MatrixXd> min_area_matrix_tmp(
+    min_area_matrix.data(), num_labels_, num_labels_);
+  min_area_matrix_ = min_area_matrix_tmp.transpose();  // Eigen is column major by default
+
+  Eigen::Map<const Eigen::MatrixXd> max_area_matrix_tmp(
+    max_area_matrix.data(), num_labels_, num_labels_);
+  max_area_matrix_ = max_area_matrix_tmp.transpose();  // Eigen is column major by default
+
+  min_area_matrix_ = min_area_matrix_.unaryExpr(
+    [](double v) { return std::isfinite(v) ? v : std::numeric_limits<double>::max(); });
+  max_area_matrix_ = max_area_matrix_.unaryExpr(
+    [](double v) { return std::isfinite(v) ? v : std::numeric_limits<double>::max(); });
+}
+
+void DetectionClassRemapper::mapClasses(autoware_auto_perception_msgs::msg::DetectedObjects & msg)
+{
+  for (auto & object : msg.objects) {
+    const float bev_area = object.shape.dimensions.x * object.shape.dimensions.y;
+
+    for (auto & classification : object.classification) {
+      auto & label = classification.label;
+
+      for (int i = 0; i < num_labels_; ++i) {
+        if (
+          allow_remapping_by_area_matrix_(label, i) && bev_area >= min_area_matrix_(label, i) &&
+          bev_area <= max_area_matrix_(label, i)) {
+          label = i;
+          break;
+        }
+      }
+    }
+  }
+}
+
+}  // namespace centerpoint

+ 74 - 0
src/detection/detection_lidar_centerpoint/lib/network/network_trt.cpp

@@ -0,0 +1,74 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/network/network_trt.hpp"
+
+namespace centerpoint
+{
+bool VoxelEncoderTRT::setProfile(
+  nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+  nvinfer1::IBuilderConfig & config)
+{
+  auto profile = builder.createOptimizationProfile();
+  auto in_name = network.getInput(0)->getName();
+  auto in_dims = nvinfer1::Dims3(
+    config_.max_voxel_size_, config_.max_point_in_voxel_size_, config_.encoder_in_feature_size_);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMIN, in_dims);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kOPT, in_dims);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMAX, in_dims);
+
+  auto out_name = network.getOutput(0)->getName();
+  auto out_dims = nvinfer1::Dims2(config_.max_voxel_size_, config_.encoder_out_feature_size_);
+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMIN, out_dims);
+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kOPT, out_dims);
+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMAX, out_dims);
+  config.addOptimizationProfile(profile);
+
+  return true;
+}
+
+HeadTRT::HeadTRT(
+  const std::vector<std::size_t> & out_channel_sizes, const CenterPointConfig & config)
+: TensorRTWrapper(config), out_channel_sizes_(out_channel_sizes)
+{
+}
+
+bool HeadTRT::setProfile(
+  nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
+  nvinfer1::IBuilderConfig & config)
+{
+  auto profile = builder.createOptimizationProfile();
+  auto in_name = network.getInput(0)->getName();
+  auto in_dims = nvinfer1::Dims4(
+    config_.batch_size_, config_.encoder_out_feature_size_, config_.grid_size_y_,
+    config_.grid_size_x_);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMIN, in_dims);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kOPT, in_dims);
+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMAX, in_dims);
+
+  for (std::size_t ci = 0; ci < out_channel_sizes_.size(); ci++) {
+    auto out_name = network.getOutput(ci)->getName();
+    auto out_dims = nvinfer1::Dims4(
+      config_.batch_size_, out_channel_sizes_[ci], config_.down_grid_size_y_,
+      config_.down_grid_size_x_);
+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMIN, out_dims);
+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kOPT, out_dims);
+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMAX, out_dims);
+  }
+  config.addOptimizationProfile(profile);
+
+  return true;
+}
+
+}  // namespace centerpoint

+ 68 - 0
src/detection/detection_lidar_centerpoint/lib/network/scatter_kernel.cu

@@ -0,0 +1,68 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/network/scatter_kernel.hpp"
+
+#include <lidar_centerpoint/utils.hpp>
+
+namespace
+{
+const std::size_t THREADS_PER_BLOCK = 32;
+}  // namespace
+
+namespace centerpoint
+{
+__global__ void scatterFeatures_kernel(
+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
+  const std::size_t pillar_feature_size, const std::size_t grid_size_x,
+  const std::size_t grid_size_y, float * scattered_features)
+{
+  // pillar_features: shape of (max_num_pillars, pillar_feature_size)
+  // coords: shape of (max_num_pillars, 3)
+  // scattered_features: shape of (num_pillars, grid_size_y, grid_size_x)
+  const auto pillar_i = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
+  const auto feature_i = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
+
+  if (pillar_i >= num_pillars || feature_i >= pillar_feature_size) {
+    return;
+  }
+
+  const int3 coord = ((int3 *)coords)[pillar_i];  // zyx
+  if (coord.x < 0) {
+    return;
+  }
+
+  const auto feature = pillar_features[pillar_feature_size * pillar_i + feature_i];
+  scattered_features[grid_size_y * grid_size_x * feature_i + grid_size_x * coord.y + coord.z] =
+    feature;
+}
+
+// cspell: ignore divup
+cudaError_t scatterFeatures_launch(
+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
+  const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
+  const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
+  cudaStream_t stream)
+{
+  dim3 blocks(
+    divup(max_voxel_size, THREADS_PER_BLOCK), divup(encoder_out_feature_size, THREADS_PER_BLOCK));
+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
+  scatterFeatures_kernel<<<blocks, threads, 0, stream>>>(
+    pillar_features, coords, num_pillars, encoder_out_feature_size, grid_size_x, grid_size_y,
+    scattered_features);
+
+  return cudaGetLastError();
+}
+
+}  // namespace centerpoint

+ 161 - 0
src/detection/detection_lidar_centerpoint/lib/network/tensorrt_wrapper.cpp

@@ -0,0 +1,161 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/network/tensorrt_wrapper.hpp"
+
+#include <NvOnnxParser.h>
+
+#include <fstream>
+#include <memory>
+#include <string>
+
+namespace centerpoint
+{
+TensorRTWrapper::TensorRTWrapper(const CenterPointConfig & config) : config_(config) {}
+
+TensorRTWrapper::~TensorRTWrapper()
+{
+  context_.reset();
+  runtime_.reset();
+  plan_.reset();
+  engine_.reset();
+}
+
+bool TensorRTWrapper::init(
+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision)
+{
+  runtime_ =
+    tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
+  if (!runtime_) {
+    std::cout << "Fail to create runtime" << std::endl;
+    return false;
+  }
+
+  bool success;
+  std::ifstream engine_file(engine_path);
+  if (engine_file.is_open()) {
+    success = loadEngine(engine_path);
+  } else {
+    success = parseONNX(onnx_path, engine_path, precision);
+  }
+  success &= createContext();
+
+  return success;
+}
+
+bool TensorRTWrapper::createContext()
+{
+  if (!engine_) {
+    std::cout << "Fail to create context: Engine isn't created" << std::endl;
+    return false;
+  }
+
+  context_ =
+    tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
+  if (!context_) {
+    std::cout << "Fail to create context" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool TensorRTWrapper::parseONNX(
+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
+  const size_t workspace_size)
+{
+  auto builder =
+    tensorrt_common::TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
+  if (!builder) {
+    std::cout << "Fail to create builder" << std::endl;
+    return false;
+  }
+
+  auto config =
+    tensorrt_common::TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+  if (!config) {
+    std::cout << "Fail to create config" << std::endl;
+    return false;
+  }
+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
+  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, workspace_size);
+#else
+  config->setMaxWorkspaceSize(workspace_size);
+#endif
+  if (precision == "fp16") {
+    if (builder->platformHasFastFp16()) {
+      std::cout << "use TensorRT FP16 Inference" << std::endl;
+      config->setFlag(nvinfer1::BuilderFlag::kFP16);
+    } else {
+      std::cout << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
+    }
+  }
+
+  const auto flag =
+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
+  auto network =
+    tensorrt_common::TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
+  if (!network) {
+    std::cout << "Fail to create network" << std::endl;
+    return false;
+  }
+
+  auto parser = tensorrt_common::TrtUniquePtr<nvonnxparser::IParser>(
+    nvonnxparser::createParser(*network, logger_));
+  parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR));
+
+  if (!setProfile(*builder, *network, *config)) {
+    std::cout << "Fail to set profile" << std::endl;
+    return false;
+  }
+
+  std::cout << "Applying optimizations and building TRT CUDA engine (" << onnx_path << ") ..."
+            << std::endl;
+  plan_ = tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory>(
+    builder->buildSerializedNetwork(*network, *config));
+  if (!plan_) {
+    std::cout << "Fail to create serialized network" << std::endl;
+    return false;
+  }
+  engine_ = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(
+    runtime_->deserializeCudaEngine(plan_->data(), plan_->size()));
+  if (!engine_) {
+    std::cout << "Fail to create engine" << std::endl;
+    return false;
+  }
+
+  return saveEngine(engine_path);
+}
+
+bool TensorRTWrapper::saveEngine(const std::string & engine_path)
+{
+  std::cout << "Writing to " << engine_path << std::endl;
+  std::ofstream file(engine_path, std::ios::out | std::ios::binary);
+  file.write(reinterpret_cast<const char *>(plan_->data()), plan_->size());
+  return true;
+}
+
+bool TensorRTWrapper::loadEngine(const std::string & engine_path)
+{
+  std::ifstream engine_file(engine_path);
+  std::stringstream engine_buffer;
+  engine_buffer << engine_file.rdbuf();
+  std::string engine_str = engine_buffer.str();
+  engine_ = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(
+    reinterpret_cast<const void *>(engine_str.data()), engine_str.size()));
+  std::cout << "Loaded engine from " << engine_path << std::endl;
+  return true;
+}
+
+}  // namespace centerpoint

+ 145 - 0
src/detection/detection_lidar_centerpoint/lib/postprocess/circle_nms_kernel.cu

@@ -0,0 +1,145 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Modified from
+// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu
+
+/*
+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
+Written by Shaoshuai Shi
+All Rights Reserved 2019-2020.
+*/
+
+#include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp"
+
+#include <lidar_centerpoint/cuda_utils.hpp>
+#include <lidar_centerpoint/utils.hpp>
+
+#include <thrust/host_vector.h>
+
+namespace
+{
+const std::size_t THREADS_PER_BLOCK_NMS = 16;
+}  // namespace
+
+namespace centerpoint
+{
+
+__device__ inline float dist2dPow(const Box3D * a, const Box3D * b)
+{
+  return powf(a->x - b->x, 2) + powf(a->y - b->y, 2);
+}
+
+// cspell: ignore divup
+__global__ void circleNMS_Kernel(
+  const Box3D * boxes, const std::size_t num_boxes3d, const std::size_t col_blocks,
+  const float dist2d_pow_threshold, std::uint64_t * mask)
+{
+  // params: boxes (N,)
+  // params: mask (N, divup(N/THREADS_PER_BLOCK_NMS))
+
+  const auto row_start = blockIdx.y;
+  const auto col_start = blockIdx.x;
+
+  if (row_start > col_start) return;
+
+  const std::size_t row_size =
+    fminf(num_boxes3d - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+  const std::size_t col_size =
+    fminf(num_boxes3d - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
+
+  __shared__ Box3D block_boxes[THREADS_PER_BLOCK_NMS];
+
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x] = boxes[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const std::size_t cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+    const Box3D * cur_box = boxes + cur_box_idx;
+
+    std::uint64_t t = 0;
+    std::size_t start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (std::size_t i = start; i < col_size; i++) {
+      if (dist2dPow(cur_box, block_boxes + i) < dist2d_pow_threshold) {
+        t |= 1ULL << i;
+      }
+    }
+    mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+cudaError_t circleNMS_launch(
+  const thrust::device_vector<Box3D> & boxes3d, const std::size_t num_boxes3d,
+  std::size_t col_blocks, const float distance_threshold,
+  thrust::device_vector<std::uint64_t> & mask, cudaStream_t stream)
+{
+  const float dist2d_pow_thres = powf(distance_threshold, 2);
+
+  dim3 blocks(col_blocks, col_blocks);
+  dim3 threads(THREADS_PER_BLOCK_NMS);
+  circleNMS_Kernel<<<blocks, threads, 0, stream>>>(
+    thrust::raw_pointer_cast(boxes3d.data()), num_boxes3d, col_blocks, dist2d_pow_thres,
+    thrust::raw_pointer_cast(mask.data()));
+
+  return cudaGetLastError();
+}
+
+std::size_t circleNMS(
+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream)
+{
+  const auto num_boxes3d = boxes3d.size();
+  const auto col_blocks = divup(num_boxes3d, THREADS_PER_BLOCK_NMS);
+  thrust::device_vector<std::uint64_t> mask_d(num_boxes3d * col_blocks);
+
+  CHECK_CUDA_ERROR(
+    circleNMS_launch(boxes3d, num_boxes3d, col_blocks, distance_threshold, mask_d, stream));
+
+  // memcpy device to host
+  thrust::host_vector<std::uint64_t> mask_h(mask_d.size());
+  thrust::copy(mask_d.begin(), mask_d.end(), mask_h.begin());
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream));
+
+  // generate keep_mask
+  std::vector<std::uint64_t> remv_h(col_blocks);
+  thrust::host_vector<bool> keep_mask_h(keep_mask.size());
+  std::size_t num_to_keep = 0;
+  for (std::size_t i = 0; i < num_boxes3d; i++) {
+    auto nblock = i / THREADS_PER_BLOCK_NMS;
+    auto inblock = i % THREADS_PER_BLOCK_NMS;
+
+    if (!(remv_h[nblock] & (1ULL << inblock))) {
+      keep_mask_h[i] = true;
+      num_to_keep++;
+      std::uint64_t * p = &mask_h[0] + i * col_blocks;
+      for (std::size_t j = nblock; j < col_blocks; j++) {
+        remv_h[j] |= p[j];
+      }
+    } else {
+      keep_mask_h[i] = false;
+    }
+  }
+
+  // memcpy host to device
+  keep_mask = keep_mask_h;
+
+  return num_to_keep;
+}
+
+}  // namespace centerpoint

+ 104 - 0
src/detection/detection_lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp

@@ -0,0 +1,104 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/postprocess/non_maximum_suppression.hpp"
+
+#include "perception_utils/geometry.hpp"
+#include "perception_utils/perception_utils.hpp"
+//#include "tier4_autoware_utils/tier4_autoware_utils.hpp"
+
+namespace centerpoint
+{
+
+void NonMaximumSuppression::setParameters(const NMSParams & params)
+{
+  assert(params.search_distance_2d_ >= 0.0);
+  assert(params.iou_threshold_ >= 0.0 && params.iou_threshold_ <= 1.0);
+
+  params_ = params;
+  target_class_mask_ = classNamesToBooleanMask(params.target_class_names_);
+}
+
+bool NonMaximumSuppression::isTargetLabel(const uint8_t label)
+{
+  if (label >= target_class_mask_.size()) {
+    return false;
+  }
+  return target_class_mask_.at(label);
+}
+
+bool NonMaximumSuppression::isTargetPairObject(
+  const DetectedObject & object1, const DetectedObject & object2)
+{
+  const auto label1 = perception_utils::getHighestProbLabel(object1.classification);
+  const auto label2 = perception_utils::getHighestProbLabel(object2.classification);
+
+  if (isTargetLabel(label1) && isTargetLabel(label2)) {
+    return true;
+  }
+
+  const auto search_sqr_dist_2d = params_.search_distance_2d_ * params_.search_distance_2d_;
+  const auto sqr_dist_2d = tier4_autoware_utils::calcSquaredDistance2d(
+    perception_utils::getPose(object1), perception_utils::getPose(object2));
+  return sqr_dist_2d <= search_sqr_dist_2d;
+}
+
+Eigen::MatrixXd NonMaximumSuppression::generateIoUMatrix(
+  const std::vector<DetectedObject> & input_objects)
+{
+  // NOTE: row = target objects to be suppressed, col = source objects to be compared
+  Eigen::MatrixXd triangular_matrix =
+    Eigen::MatrixXd::Zero(input_objects.size(), input_objects.size());
+  for (std::size_t target_i = 0; target_i < input_objects.size(); ++target_i) {
+    for (std::size_t source_i = 0; source_i < target_i; ++source_i) {
+      const auto & target_obj = input_objects.at(target_i);
+      const auto & source_obj = input_objects.at(source_i);
+      if (!isTargetPairObject(target_obj, source_obj)) {
+        continue;
+      }
+
+      if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
+        const double iou = perception_utils::get2dIoU(target_obj, source_obj);
+        triangular_matrix(target_i, source_i) = iou;
+        // NOTE: If the target object has any objects with iou > iou_threshold, it
+        // will be suppressed regardless of later results.
+        if (iou > params_.iou_threshold_) {
+          break;
+        }
+      }
+    }
+  }
+
+  return triangular_matrix;
+}
+
+std::vector<DetectedObject> NonMaximumSuppression::apply(
+  const std::vector<DetectedObject> & input_objects)
+{
+  Eigen::MatrixXd iou_matrix = generateIoUMatrix(input_objects);
+
+  std::vector<DetectedObject> output_objects;
+  output_objects.reserve(input_objects.size());
+  for (std::size_t i = 0; i < input_objects.size(); ++i) {
+    const auto value = iou_matrix.row(i).maxCoeff();
+    if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
+      if (value <= params_.iou_threshold_) {
+        output_objects.emplace_back(input_objects.at(i));
+      }
+    }
+  }
+
+  return output_objects;
+}
+}  // namespace centerpoint

+ 166 - 0
src/detection/detection_lidar_centerpoint/lib/postprocess/postprocess_kernel.cu

@@ -0,0 +1,166 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp"
+
+#include <lidar_centerpoint/postprocess/postprocess_kernel.hpp>
+
+#include <thrust/count.h>
+#include <thrust/device_vector.h>
+#include <thrust/sort.h>
+
+namespace
+{
+const std::size_t THREADS_PER_BLOCK = 32;
+}  // namespace
+
+namespace centerpoint
+{
+
+struct is_score_greater
+{
+  is_score_greater(float t) : t_(t) {}
+
+  __device__ bool operator()(const Box3D & b) { return b.score > t_; }
+
+private:
+  float t_{0.0};
+};
+
+struct is_kept
+{
+  __device__ bool operator()(const bool keep) { return keep; }
+};
+
+struct score_greater
+{
+  __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; }
+};
+
+__device__ inline float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); }
+
+__global__ void generateBoxes3D_kernel(
+  const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
+  const float * out_rot, const float * out_vel, const float voxel_size_x, const float voxel_size_y,
+  const float range_min_x, const float range_min_y, const std::size_t down_grid_size_x,
+  const std::size_t down_grid_size_y, const std::size_t downsample_factor, const int class_size,
+  const float * yaw_norm_thresholds, Box3D * det_boxes3d)
+{
+  // generate boxes3d from the outputs of the network.
+  // shape of out_*: (N, DOWN_GRID_SIZE_Y, DOWN_GRID_SIZE_X)
+  // heatmap: N = class_size, offset: N = 2, z: N = 1, dim: N = 3, rot: N = 2, vel: N = 2
+  const auto yi = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
+  const auto xi = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
+  const auto idx = down_grid_size_x * yi + xi;
+  const auto down_grid_size = down_grid_size_y * down_grid_size_x;
+
+  if (yi >= down_grid_size_y || xi >= down_grid_size_x) {
+    return;
+  }
+
+  int label = -1;
+  float max_score = -1;
+  for (int ci = 0; ci < class_size; ci++) {
+    float score = sigmoid(out_heatmap[down_grid_size * ci + idx]);
+    if (score > max_score) {
+      label = ci;
+      max_score = score;
+    }
+  }
+
+  const float offset_x = out_offset[down_grid_size * 0 + idx];
+  const float offset_y = out_offset[down_grid_size * 1 + idx];
+  const float x = voxel_size_x * downsample_factor * (xi + offset_x) + range_min_x;
+  const float y = voxel_size_y * downsample_factor * (yi + offset_y) + range_min_y;
+  const float z = out_z[idx];
+  const float w = out_dim[down_grid_size * 0 + idx];
+  const float l = out_dim[down_grid_size * 1 + idx];
+  const float h = out_dim[down_grid_size * 2 + idx];
+  const float yaw_sin = out_rot[down_grid_size * 0 + idx];
+  const float yaw_cos = out_rot[down_grid_size * 1 + idx];
+  const float yaw_norm = sqrtf(yaw_sin * yaw_sin + yaw_cos * yaw_cos);
+  const float vel_x = out_vel[down_grid_size * 0 + idx];
+  const float vel_y = out_vel[down_grid_size * 1 + idx];
+
+  det_boxes3d[idx].label = label;
+  det_boxes3d[idx].score = yaw_norm >= yaw_norm_thresholds[label] ? max_score : 0.f;
+  det_boxes3d[idx].x = x;
+  det_boxes3d[idx].y = y;
+  det_boxes3d[idx].z = z;
+  det_boxes3d[idx].length = expf(l);
+  det_boxes3d[idx].width = expf(w);
+  det_boxes3d[idx].height = expf(h);
+  det_boxes3d[idx].yaw = atan2f(yaw_sin, yaw_cos);
+  det_boxes3d[idx].vel_x = vel_x;
+  det_boxes3d[idx].vel_y = vel_y;
+}
+
+PostProcessCUDA::PostProcessCUDA(const CenterPointConfig & config) : config_(config)
+{
+  const auto num_raw_boxes3d = config.down_grid_size_y_ * config.down_grid_size_x_;
+  boxes3d_d_ = thrust::device_vector<Box3D>(num_raw_boxes3d);
+  yaw_norm_thresholds_d_ = thrust::device_vector<float>(
+    config_.yaw_norm_thresholds_.begin(), config_.yaw_norm_thresholds_.end());
+}
+
+// cspell: ignore divup
+cudaError_t PostProcessCUDA::generateDetectedBoxes3D_launch(
+  const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
+  const float * out_rot, const float * out_vel, std::vector<Box3D> & det_boxes3d,
+  cudaStream_t stream)
+{
+  dim3 blocks(
+    divup(config_.down_grid_size_y_, THREADS_PER_BLOCK),
+    divup(config_.down_grid_size_x_, THREADS_PER_BLOCK));
+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
+  generateBoxes3D_kernel<<<blocks, threads, 0, stream>>>(
+    out_heatmap, out_offset, out_z, out_dim, out_rot, out_vel, config_.voxel_size_x_,
+    config_.voxel_size_y_, config_.range_min_x_, config_.range_min_y_, config_.down_grid_size_x_,
+    config_.down_grid_size_y_, config_.downsample_factor_, config_.class_size_,
+    thrust::raw_pointer_cast(yaw_norm_thresholds_d_.data()),
+    thrust::raw_pointer_cast(boxes3d_d_.data()));
+
+  // suppress by score
+  const auto num_det_boxes3d = thrust::count_if(
+    thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(),
+    is_score_greater(config_.score_threshold_));
+  if (num_det_boxes3d == 0) {
+    return cudaGetLastError();
+  }
+  thrust::device_vector<Box3D> det_boxes3d_d(num_det_boxes3d);
+  thrust::copy_if(
+    thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), det_boxes3d_d.begin(),
+    is_score_greater(config_.score_threshold_));
+
+  // sort by score
+  thrust::sort(det_boxes3d_d.begin(), det_boxes3d_d.end(), score_greater());
+
+  // supress by NMS
+  thrust::device_vector<bool> final_keep_mask_d(num_det_boxes3d);
+  const auto num_final_det_boxes3d =
+    circleNMS(det_boxes3d_d, config_.circle_nms_dist_threshold_, final_keep_mask_d, stream);
+
+  thrust::device_vector<Box3D> final_det_boxes3d_d(num_final_det_boxes3d);
+  thrust::copy_if(
+    thrust::device, det_boxes3d_d.begin(), det_boxes3d_d.end(), final_keep_mask_d.begin(),
+    final_det_boxes3d_d.begin(), is_kept());
+
+  // memcpy device to host
+  det_boxes3d.resize(num_final_det_boxes3d);
+  thrust::copy(final_det_boxes3d_d.begin(), final_det_boxes3d_d.end(), det_boxes3d.begin());
+
+  return cudaGetLastError();
+}
+
+}  // namespace centerpoint

+ 118 - 0
src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp

@@ -0,0 +1,118 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/preprocess/pointcloud_densification.hpp"
+
+//#include <pcl_ros/transforms.hpp>
+
+#include <boost/optional.hpp>
+
+//#include <pcl_conversions/pcl_conversions.h>
+#ifdef ROS_DISTRO_GALACTIC
+#include <tf2_eigen/tf2_eigen.h>
+#else
+//#include <tf2_eigen/tf2_eigen.hpp>
+#endif
+
+#include <string>
+#include <utility>
+
+namespace
+{
+//boost::optional<geometry_msgs::msg::Transform> getTransform(
+//  const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id,
+//  const std::string & source_frame_id, const rclcpp::Time & time)
+//{
+//  try {
+//    geometry_msgs::msg::TransformStamped transform_stamped;
+//    transform_stamped = tf_buffer.lookupTransform(
+//      target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5));
+//    return transform_stamped.transform;
+//  } catch (tf2::TransformException & ex) {
+//    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_centerpoint"), ex.what());
+//    return boost::none;
+//  }
+//}
+
+//Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t)
+//{
+//  Eigen::Affine3f a;
+//  a.matrix() = tf2::transformToEigen(t).matrix().cast<float>();
+//  return a;
+//}
+
+}  // namespace
+
+namespace centerpoint
+{
+PointCloudDensification::PointCloudDensification(const DensificationParam & param) : param_(param)
+{
+}
+
+
+bool PointCloudDensification::enqueuePointCloud(
+   const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
+{
+    enqueue(pc_ptr);
+    dequeue();
+
+    return true;
+}
+
+//bool PointCloudDensification::enqueuePointCloud(
+//  const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
+//{
+//  const auto header = pointcloud_msg.header;
+
+//  if (param_.pointcloud_cache_size() > 1) {
+//    auto transform_world2current =
+//      getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp);
+//    if (!transform_world2current) {
+//      return false;
+//    }
+//    auto affine_world2current = transformToEigen(transform_world2current.get());
+
+//    enqueue(pointcloud_msg, affine_world2current);
+//  } else {
+//    enqueue(pointcloud_msg, Eigen::Affine3f::Identity());
+//  }
+
+//  dequeue();
+
+//  return true;
+//}
+
+
+void PointCloudDensification::enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
+{
+    PointCloudWithTransform pointcloud = {pc_ptr};
+    pointcloud_cache_.push_front(pointcloud);
+}
+//void PointCloudDensification::enqueue(
+//  const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current)
+//{
+//  affine_world2current_ = affine_world2current;
+//  current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
+//  PointCloudWithTransform pointcloud = {msg, affine_world2current.inverse()};
+//  pointcloud_cache_.push_front(pointcloud);
+//}
+
+void PointCloudDensification::dequeue()
+{
+  if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) {
+    pointcloud_cache_.pop_back();
+  }
+}
+
+}  // namespace centerpoint

+ 160 - 0
src/detection/detection_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu

@@ -0,0 +1,160 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lidar_centerpoint/preprocess/preprocess_kernel.hpp"
+
+#include <lidar_centerpoint/utils.hpp>
+
+namespace
+{
+const std::size_t MAX_POINT_IN_VOXEL_SIZE = 32;  // the same as max_point_in_voxel_size_ in config
+const std::size_t WARPS_PER_BLOCK = 4;
+const std::size_t ENCODER_IN_FEATURE_SIZE = 9;  // the same as encoder_in_feature_size_ in config
+}  // namespace
+
+namespace centerpoint
+{
+__global__ void generateFeatures_kernel(
+  const float * voxel_features, const float * voxel_num_points, const int * coords,
+  const std::size_t num_voxels, const float voxel_x, const float voxel_y, const float voxel_z,
+  const float range_min_x, const float range_min_y, const float range_min_z, float * features)
+{
+  // voxel_features (float): (max_voxel_size, max_point_in_voxel_size, point_feature_size)
+  // voxel_num_points (int): (max_voxel_size)
+  // coords (int): (max_voxel_size, point_dim_size)
+  int pillar_idx = blockIdx.x * WARPS_PER_BLOCK + threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;
+  int point_idx = threadIdx.x % MAX_POINT_IN_VOXEL_SIZE;
+  int pillar_idx_inBlock = threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;  // max_point_in_voxel_size
+
+  if (pillar_idx >= num_voxels) return;
+
+  // load src
+  __shared__ float4 pillarSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE];
+  __shared__ float3 pillarSumSM[WARPS_PER_BLOCK];
+  __shared__ int3 cordsSM[WARPS_PER_BLOCK];
+  __shared__ int pointsNumSM[WARPS_PER_BLOCK];
+  __shared__ float pillarOutSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE][ENCODER_IN_FEATURE_SIZE];
+
+  if (threadIdx.x < WARPS_PER_BLOCK) {
+    pointsNumSM[threadIdx.x] = voxel_num_points[blockIdx.x * WARPS_PER_BLOCK + threadIdx.x];
+    cordsSM[threadIdx.x] = ((int3 *)coords)[blockIdx.x * WARPS_PER_BLOCK + threadIdx.x];
+    pillarSumSM[threadIdx.x] = {0, 0, 0};
+  }
+
+  pillarSM[pillar_idx_inBlock][point_idx] =
+    ((float4 *)voxel_features)[pillar_idx * MAX_POINT_IN_VOXEL_SIZE + point_idx];
+  __syncthreads();
+
+  // calculate sm in a pillar
+  if (point_idx < pointsNumSM[pillar_idx_inBlock]) {
+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].x), pillarSM[pillar_idx_inBlock][point_idx].x);
+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].y), pillarSM[pillar_idx_inBlock][point_idx].y);
+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].z), pillarSM[pillar_idx_inBlock][point_idx].z);
+  }
+  __syncthreads();
+
+  // feature-mean
+  float3 mean;
+  float validPoints = pointsNumSM[pillar_idx_inBlock];
+  mean.x = pillarSumSM[pillar_idx_inBlock].x / validPoints;
+  mean.y = pillarSumSM[pillar_idx_inBlock].y / validPoints;
+  mean.z = pillarSumSM[pillar_idx_inBlock].z / validPoints;
+
+  mean.x = pillarSM[pillar_idx_inBlock][point_idx].x - mean.x;
+  mean.y = pillarSM[pillar_idx_inBlock][point_idx].y - mean.y;
+  mean.z = pillarSM[pillar_idx_inBlock][point_idx].z - mean.z;
+
+  // calculate offset
+  float x_offset = voxel_x / 2 + cordsSM[pillar_idx_inBlock].z * voxel_x + range_min_x;
+  float y_offset = voxel_y / 2 + cordsSM[pillar_idx_inBlock].y * voxel_y + range_min_y;
+  float z_offset = voxel_z / 2 + cordsSM[pillar_idx_inBlock].x * voxel_z + range_min_z;
+
+  // feature-offset
+  float3 center;
+  center.x = pillarSM[pillar_idx_inBlock][point_idx].x - x_offset;
+  center.y = pillarSM[pillar_idx_inBlock][point_idx].y - y_offset;
+  center.z = pillarSM[pillar_idx_inBlock][point_idx].z - z_offset;
+
+  // store output
+  if (point_idx < pointsNumSM[pillar_idx_inBlock]) {
+    pillarOutSM[pillar_idx_inBlock][point_idx][0] = pillarSM[pillar_idx_inBlock][point_idx].x;
+    pillarOutSM[pillar_idx_inBlock][point_idx][1] = pillarSM[pillar_idx_inBlock][point_idx].y;
+    pillarOutSM[pillar_idx_inBlock][point_idx][2] = pillarSM[pillar_idx_inBlock][point_idx].z;
+    pillarOutSM[pillar_idx_inBlock][point_idx][3] = pillarSM[pillar_idx_inBlock][point_idx].w;
+
+    pillarOutSM[pillar_idx_inBlock][point_idx][4] = mean.x;
+    pillarOutSM[pillar_idx_inBlock][point_idx][5] = mean.y;
+    pillarOutSM[pillar_idx_inBlock][point_idx][6] = mean.z;
+
+    pillarOutSM[pillar_idx_inBlock][point_idx][7] = center.x;
+    pillarOutSM[pillar_idx_inBlock][point_idx][8] = center.y;
+
+  } else {
+    pillarOutSM[pillar_idx_inBlock][point_idx][0] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][1] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][2] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][3] = 0;
+
+    pillarOutSM[pillar_idx_inBlock][point_idx][4] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][5] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][6] = 0;
+
+    pillarOutSM[pillar_idx_inBlock][point_idx][7] = 0;
+    pillarOutSM[pillar_idx_inBlock][point_idx][8] = 0;
+  }
+
+  __syncthreads();
+
+  for (int i = 0; i < ENCODER_IN_FEATURE_SIZE; i++) {
+    int outputSMId = pillar_idx_inBlock * MAX_POINT_IN_VOXEL_SIZE * ENCODER_IN_FEATURE_SIZE +
+                     i * MAX_POINT_IN_VOXEL_SIZE + point_idx;
+    int outputId = pillar_idx * MAX_POINT_IN_VOXEL_SIZE * ENCODER_IN_FEATURE_SIZE +
+                   i * MAX_POINT_IN_VOXEL_SIZE + point_idx;
+    features[outputId] = ((float *)pillarOutSM)[outputSMId];
+  }
+}
+
+// cspell: ignore divup
+cudaError_t generateFeatures_launch(
+  const float * voxel_features, const float * voxel_num_points, const int * coords,
+  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
+  const float voxel_size_y, const float voxel_size_z, const float range_min_x,
+  const float range_min_y, const float range_min_z, float * features, cudaStream_t stream)
+{
+  dim3 blocks(divup(max_voxel_size, WARPS_PER_BLOCK));
+  dim3 threads(WARPS_PER_BLOCK * MAX_POINT_IN_VOXEL_SIZE);
+  generateFeatures_kernel<<<blocks, threads, 0, stream>>>(
+    voxel_features, voxel_num_points, coords, num_voxels, voxel_size_x, voxel_size_y, voxel_size_z,
+    range_min_x, range_min_y, range_min_z, features);
+
+  return cudaGetLastError();
+}
+
+}  // namespace centerpoint

+ 224 - 0
src/detection/detection_lidar_centerpoint/lib/preprocess/voxel_generator.cpp

@@ -0,0 +1,224 @@
+// Copyright 2021 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/preprocess/voxel_generator.hpp"
+
+//#include <sensor_msgs/point_cloud2_iterator.hpp>
+
+#include <memory>
+
+#include <Eigen/Eigen>
+
+namespace centerpoint
+{
+VoxelGeneratorTemplate::VoxelGeneratorTemplate(
+  const DensificationParam & param, const CenterPointConfig & config)
+: config_(config)
+{
+  pd_ptr_ = std::make_unique<PointCloudDensification>(param);
+  range_[0] = config.range_min_x_;
+  range_[1] = config.range_min_y_;
+  range_[2] = config.range_min_z_;
+  range_[3] = config.range_max_x_;
+  range_[4] = config.range_max_y_;
+  range_[5] = config.range_max_z_;
+  grid_size_[0] = config.grid_size_x_;
+  grid_size_[1] = config.grid_size_y_;
+  grid_size_[2] = config.grid_size_z_;
+  recip_voxel_size_[0] = 1 / config.voxel_size_x_;
+  recip_voxel_size_[1] = 1 / config.voxel_size_y_;
+  recip_voxel_size_[2] = 1 / config.voxel_size_z_;
+}
+
+bool VoxelGeneratorTemplate::enqueuePointCloud(
+  const pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr)
+{
+    return pd_ptr_->enqueuePointCloud(pc_ptr);
+}
+
+//bool VoxelGeneratorTemplate::enqueuePointCloud(
+//  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
+//{
+//  return pd_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer);
+//}
+
+
+std::size_t VoxelGenerator::pointsToVoxels(
+  std::vector<float> & voxels, std::vector<int> & coordinates,
+  std::vector<float> & num_points_per_voxel)
+{
+  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
+  // coordinates (int): (max_voxel_size * point_dim_size)
+  // num_points_per_voxel (float): (max_voxel_size)
+
+  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
+  std::vector<int> coord_to_voxel_idx(grid_size, -1);
+
+  std::size_t voxel_cnt = 0;  // @return
+  std::vector<float> point;
+  point.resize(config_.point_feature_size_);
+  std::vector<float> coord_zyx;
+  coord_zyx.resize(config_.point_dim_size_);
+  bool out_of_range;
+  std::size_t point_cnt;
+  int c, coord_idx, voxel_idx;
+  Eigen::Vector3f point_current, point_past;
+
+  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
+       pc_cache_iter++) {
+    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
+
+    int nsize = pc_ptr->points.size();
+    int i;
+
+    for (i=0;i<nsize;i++) {
+
+
+      point[0] = pc_ptr->points.at(i).x;
+      point[1] = pc_ptr->points.at(i).y;
+      point[2] = pc_ptr->points.at(i).z;
+      point[3] = 0;
+
+      out_of_range = false;
+      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
+        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
+        if (c < 0 || c >= grid_size_[di]) {
+          out_of_range = true;
+          break;
+        }
+        coord_zyx[config_.point_dim_size_ - di - 1] = c;
+      }
+      if (out_of_range) {
+        continue;
+      }
+
+      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
+                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
+      voxel_idx = coord_to_voxel_idx[coord_idx];
+      if (voxel_idx == -1) {
+        voxel_idx = voxel_cnt;
+        if (voxel_cnt >= config_.max_voxel_size_) {
+          continue;
+        }
+
+        voxel_cnt++;
+        coord_to_voxel_idx[coord_idx] = voxel_idx;
+        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
+          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
+        }
+      }
+
+      point_cnt = num_points_per_voxel[voxel_idx];
+      if (point_cnt < config_.max_point_in_voxel_size_) {
+        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
+          voxels
+            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
+             point_cnt * config_.point_feature_size_ + fi] = point[fi];
+        }
+        num_points_per_voxel[voxel_idx]++;
+      }
+    }
+  }
+
+  return voxel_cnt;
+}
+
+
+//std::size_t VoxelGenerator::pointsToVoxels(
+//  std::vector<float> & voxels, std::vector<int> & coordinates,
+//  std::vector<float> & num_points_per_voxel)
+//{
+//  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
+//  // coordinates (int): (max_voxel_size * point_dim_size)
+//  // num_points_per_voxel (float): (max_voxel_size)
+
+//  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
+//  std::vector<int> coord_to_voxel_idx(grid_size, -1);
+
+//  std::size_t voxel_cnt = 0;  // @return
+//  std::vector<float> point;
+//  point.resize(config_.point_feature_size_);
+//  std::vector<float> coord_zyx;
+//  coord_zyx.resize(config_.point_dim_size_);
+//  bool out_of_range;
+//  std::size_t point_cnt;
+//  int c, coord_idx, voxel_idx;
+//  Eigen::Vector3f point_current, point_past;
+
+//  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
+//       pc_cache_iter++) {
+//    auto pc_msg = pc_cache_iter->pointcloud_msg;
+//    auto affine_past2current =
+//      pd_ptr_->pointcloud_cache_size() > 1
+//        ? pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world
+//        : Eigen::Affine3f::Identity();
+//    float time_lag = static_cast<float>(
+//      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_msg.header.stamp).seconds());
+
+//    for (sensor_msgs::PointCloud2ConstIterator<float> x_iter(pc_msg, "x"), y_iter(pc_msg, "y"),
+//         z_iter(pc_msg, "z");
+//         x_iter != x_iter.end(); ++x_iter, ++y_iter, ++z_iter) {
+//      point_past << *x_iter, *y_iter, *z_iter;
+//      point_current = affine_past2current * point_past;
+
+//      point[0] = point_current.x();
+//      point[1] = point_current.y();
+//      point[2] = point_current.z();
+//      point[3] = time_lag;
+
+//      out_of_range = false;
+//      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
+//        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
+//        if (c < 0 || c >= grid_size_[di]) {
+//          out_of_range = true;
+//          break;
+//        }
+//        coord_zyx[config_.point_dim_size_ - di - 1] = c;
+//      }
+//      if (out_of_range) {
+//        continue;
+//      }
+
+//      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
+//                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
+//      voxel_idx = coord_to_voxel_idx[coord_idx];
+//      if (voxel_idx == -1) {
+//        voxel_idx = voxel_cnt;
+//        if (voxel_cnt >= config_.max_voxel_size_) {
+//          continue;
+//        }
+
+//        voxel_cnt++;
+//        coord_to_voxel_idx[coord_idx] = voxel_idx;
+//        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
+//          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
+//        }
+//      }
+
+//      point_cnt = num_points_per_voxel[voxel_idx];
+//      if (point_cnt < config_.max_point_in_voxel_size_) {
+//        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
+//          voxels
+//            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
+//             point_cnt * config_.point_feature_size_ + fi] = point[fi];
+//        }
+//        num_points_per_voxel[voxel_idx]++;
+//      }
+//    }
+//  }
+
+//  return voxel_cnt;
+//}
+
+}  // namespace centerpoint

+ 95 - 0
src/detection/detection_lidar_centerpoint/lib/ros_utils.cpp

@@ -0,0 +1,95 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/ros_utils.hpp"
+
+#include "perception_utils/perception_utils.hpp"
+#include "tier4_autoware_utils/geometry/geometry.hpp"
+#include "tier4_autoware_utils/math/constants.hpp"
+
+namespace centerpoint
+{
+
+using Label = autoware_auto_perception_msgs::msg::ObjectClassification;
+
+void box3DToDetectedObject(
+  const Box3D & box3d, const std::vector<std::string> & class_names, const bool has_twist,
+  autoware_auto_perception_msgs::msg::DetectedObject & obj)
+{
+  // TODO(yukke42): the value of classification confidence of DNN, not probability.
+  obj.existence_probability = box3d.score;
+
+  // classification
+  autoware_auto_perception_msgs::msg::ObjectClassification classification;
+  classification.probability = 1.0f;
+  if (box3d.label >= 0 && static_cast<size_t>(box3d.label) < class_names.size()) {
+    classification.label = getSemanticType(class_names[box3d.label]);
+  } else {
+    classification.label = Label::UNKNOWN;
+    RCLCPP_WARN_STREAM(
+      rclcpp::get_logger("lidar_centerpoint"), "Unexpected label: UNKNOWN is set.");
+  }
+
+  if (perception_utils::isCarLikeVehicle(classification.label)) {
+    obj.kinematics.orientation_availability =
+      autoware_auto_perception_msgs::msg::DetectedObjectKinematics::SIGN_UNKNOWN;
+  }
+
+  obj.classification.emplace_back(classification);
+
+  // pose and shape
+  // mmdet3d yaw format to ros yaw format
+  float yaw = -box3d.yaw - tier4_autoware_utils::pi / 2;
+  obj.kinematics.pose_with_covariance.pose.position =
+    tier4_autoware_utils::createPoint(box3d.x, box3d.y, box3d.z);
+  obj.kinematics.pose_with_covariance.pose.orientation =
+    tier4_autoware_utils::createQuaternionFromYaw(yaw);
+  obj.shape.type = autoware_auto_perception_msgs::msg::Shape::BOUNDING_BOX;
+  obj.shape.dimensions =
+    tier4_autoware_utils::createTranslation(box3d.length, box3d.width, box3d.height);
+
+  // twist
+  if (has_twist) {
+    float vel_x = box3d.vel_x;
+    float vel_y = box3d.vel_y;
+    geometry_msgs::msg::Twist twist;
+    twist.linear.x = std::sqrt(std::pow(vel_x, 2) + std::pow(vel_y, 2));
+    twist.angular.z = 2 * (std::atan2(vel_y, vel_x) - yaw);
+    obj.kinematics.twist_with_covariance.twist = twist;
+    obj.kinematics.has_twist = has_twist;
+  }
+}
+
+uint8_t getSemanticType(const std::string & class_name)
+{
+  if (class_name == "CAR") {
+    return Label::CAR;
+  } else if (class_name == "TRUCK") {
+    return Label::TRUCK;
+  } else if (class_name == "BUS") {
+    return Label::BUS;
+  } else if (class_name == "TRAILER") {
+    return Label::TRAILER;
+  } else if (class_name == "BICYCLE") {
+    return Label::BICYCLE;
+  } else if (class_name == "MOTORBIKE") {
+    return Label::MOTORCYCLE;
+  } else if (class_name == "PEDESTRIAN") {
+    return Label::PEDESTRIAN;
+  } else {
+    return Label::UNKNOWN;
+  }
+}
+
+}  // namespace centerpoint

+ 34 - 0
src/detection/detection_lidar_centerpoint/lib/utils.cpp

@@ -0,0 +1,34 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lidar_centerpoint/utils.hpp"
+
+#include <stdexcept>
+
+namespace centerpoint
+{
+// cspell: ignore divup
+std::size_t divup(const std::size_t a, const std::size_t b)
+{
+  if (a == 0) {
+    throw std::runtime_error("A dividend of divup isn't positive.");
+  }
+  if (b == 0) {
+    throw std::runtime_error("A divisor of divup isn't positive.");
+  }
+
+  return (a + b - 1) / b;
+}
+
+}  // namespace centerpoint

+ 59 - 0
src/detection/detection_lidar_centerpoint/main.cpp

@@ -0,0 +1,59 @@
+#include <QCoreApplication>
+
+
+
+#include <lidar_centerpoint/centerpoint_config.hpp>
+#include <lidar_centerpoint/centerpoint_trt.hpp>
+#include <lidar_centerpoint/preprocess/pointcloud_densification.hpp>
+
+using namespace centerpoint;
+
+std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
+
+void init()
+{
+    const float score_threshold = 0.35;
+    const float circle_nms_dist_threshold =1.5;
+  //    static_cast<float>(this->declare_parameter<double>("circle_nms_dist_threshold"));
+    std::vector<double> yaw_norm_thresholds ;
+    const std::string densification_world_frame_id = "map";
+    const int densification_num_past_frames = 1;
+    const std::string trt_precision = "fp16";
+    const std::string encoder_onnx_path = "/home/nvidia/models/pts_voxel_encoder_centerpoint.onnx";//this->declare_parameter<std::string>("encoder_onnx_path");
+    const std::string encoder_engine_path ="/home/nvidia/models/pts_voxel_encoder_centerpoint.eng";//this->declare_parameter<std::string>("encoder_engine_path");
+    const std::string head_onnx_path = "/home/nvidia/models/pts_backbone_neck_head_centerpoint.onnx";//this->declare_parameter<std::string>("head_onnx_path");
+    const std::string head_engine_path ="/home/nvidia/models/pts_backbone_neck_head_centerpoint.eng" ;//this->declare_parameter<std::string>("head_engine_path");
+    const std::size_t point_feature_size =4;
+    const std::size_t max_voxel_size =40000;
+    std::vector<double> point_cloud_range ;
+    std::vector<double>voxel_size ;
+    const std::size_t downsample_factor =2;
+    const std::size_t encoder_in_feature_size = 9;
+    std::vector<int64_t> allow_remapping_by_area_matrix;
+    std::vector<double> min_area_matrix ;
+    std::vector<double> max_area_matrix ;
+
+
+    NetworkParam encoder_param(encoder_onnx_path, encoder_engine_path, trt_precision);
+    NetworkParam head_param(head_onnx_path, head_engine_path, trt_precision);
+    DensificationParam densification_param(
+      densification_world_frame_id, densification_num_past_frames);
+
+    CenterPointConfig config(
+      3, point_feature_size, max_voxel_size, point_cloud_range, voxel_size,
+      downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold,
+      yaw_norm_thresholds);
+    detector_ptr_ =
+      std::make_unique<CenterPointTRT>(encoder_param, head_param, densification_param, config);
+
+    std::cout<<"init  complete."<<std::endl;
+
+}
+
+int main(int argc, char *argv[])
+{
+    QCoreApplication a(argc, argv);
+    init();
+
+    return a.exec();
+}

+ 223 - 0
src/detection/detection_lidar_centerpoint/tensorrt_common/tensorrt_common.cpp

@@ -0,0 +1,223 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <tensorrt_common/tensorrt_common.hpp>
+
+#include <NvInferPlugin.h>
+#include <dlfcn.h>
+
+#include <fstream>
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+
+namespace tensorrt_common
+{
+
+TrtCommon::TrtCommon(
+  const std::string & model_path, const std::string & precision,
+  std::unique_ptr<nvinfer1::IInt8EntropyCalibrator2> calibrator,
+  const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size,
+  const std::vector<std::string> & plugin_paths)
+: model_file_path_(model_path),
+  calibrator_(std::move(calibrator)),
+  precision_(precision),
+  batch_config_(batch_config),
+  max_workspace_size_(max_workspace_size)
+{
+  for (const auto & plugin_path : plugin_paths) {
+    int32_t flags{RTLD_LAZY};
+// cspell: ignore asan
+#if ENABLE_ASAN
+    // https://github.com/google/sanitizers/issues/89
+    // asan doesn't handle module unloading correctly and there are no plans on doing
+    // so. In order to get proper stack traces, don't delete the shared library on
+    // close so that asan can resolve the symbols correctly.
+    flags |= RTLD_NODELETE;
+#endif  // ENABLE_ASAN
+    void * handle = dlopen(plugin_path.c_str(), flags);
+    if (!handle) {
+      logger_.log(nvinfer1::ILogger::Severity::kERROR, "Could not load plugin library");
+    }
+  }
+  runtime_ = TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
+  initLibNvInferPlugins(&logger_, "");
+}
+
+void TrtCommon::setup()
+{
+  if (!fs::exists(model_file_path_)) {
+    is_initialized_ = false;
+    return;
+  }
+  if (model_file_path_.extension() == ".engine") {
+    loadEngine(model_file_path_);
+  } else if (model_file_path_.extension() == ".onnx") {
+    fs::path cache_engine_path{model_file_path_};
+    cache_engine_path.replace_extension("engine");
+    if (fs::exists(cache_engine_path)) {
+      loadEngine(cache_engine_path);
+    } else {
+      logger_.log(nvinfer1::ILogger::Severity::kINFO, "Start build engine");
+      buildEngineFromOnnx(model_file_path_, cache_engine_path);
+      logger_.log(nvinfer1::ILogger::Severity::kINFO, "End build engine");
+    }
+  } else {
+    is_initialized_ = false;
+    return;
+  }
+
+  context_ = TrtUniquePtr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
+  if (!context_) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create context");
+    is_initialized_ = false;
+    return;
+  }
+
+  is_initialized_ = true;
+}
+
+bool TrtCommon::loadEngine(const std::string & engine_file_path)
+{
+  std::ifstream engine_file(engine_file_path);
+  std::stringstream engine_buffer;
+  engine_buffer << engine_file.rdbuf();
+  std::string engine_str = engine_buffer.str();
+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(
+    reinterpret_cast<const void *>(engine_str.data()), engine_str.size()));
+  return true;
+}
+
+bool TrtCommon::buildEngineFromOnnx(
+  const std::string & onnx_file_path, const std::string & output_engine_file_path)
+{
+  auto builder = TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
+  if (!builder) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder");
+    return false;
+  }
+
+  const auto explicitBatch =
+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
+
+  auto network =
+    TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch));
+  if (!network) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create network");
+    return false;
+  }
+
+  auto config = TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+  if (!config) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder config");
+    return false;
+  }
+
+  if (precision_ == "fp16" || precision_ == "int8") {
+    config->setFlag(nvinfer1::BuilderFlag::kFP16);
+  }
+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
+  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_);
+#else
+  config->setMaxWorkspaceSize(max_workspace_size_);
+#endif
+
+  auto parser = TrtUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger_));
+  if (!parser->parseFromFile(
+        onnx_file_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR))) {
+    return false;
+  }
+
+  const auto input = network->getInput(0);
+  const auto input_dims = input->getDimensions();
+  const auto input_channel = input_dims.d[1];
+  const auto input_height = input_dims.d[2];
+  const auto input_width = input_dims.d[3];
+
+  auto profile = builder->createOptimizationProfile();
+  profile->setDimensions(
+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMIN,
+    nvinfer1::Dims4{batch_config_.at(0), input_channel, input_height, input_width});
+  profile->setDimensions(
+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kOPT,
+    nvinfer1::Dims4{batch_config_.at(1), input_channel, input_height, input_width});
+  profile->setDimensions(
+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMAX,
+    nvinfer1::Dims4{batch_config_.at(2), input_channel, input_height, input_width});
+  config->addOptimizationProfile(profile);
+
+  if (precision_ == "int8" && calibrator_) {
+    config->setFlag(nvinfer1::BuilderFlag::kINT8);
+    config->setInt8Calibrator(calibrator_.get());
+  }
+
+#if TENSORRT_VERSION_MAJOR >= 8
+  auto plan =
+    TrtUniquePtr<nvinfer1::IHostMemory>(builder->buildSerializedNetwork(*network, *config));
+  if (!plan) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create host memory");
+    return false;
+  }
+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(
+    runtime_->deserializeCudaEngine(plan->data(), plan->size()));
+#else
+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(builder->buildEngineWithConfig(*network, *config));
+#endif
+
+  if (!engine_) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create engine");
+    return false;
+  }
+
+  // save engine
+#if TENSORRT_VERSION_MAJOR < 8
+  auto data = TrtUniquePtr<nvinfer1::IHostMemory>(engine_->serialize());
+#endif
+  std::ofstream file;
+  file.open(output_engine_file_path, std::ios::binary | std::ios::out);
+  if (!file.is_open()) {
+    return false;
+  }
+#if TENSORRT_VERSION_MAJOR < 8
+  file.write(reinterpret_cast<const char *>(data->data()), data->size());
+#else
+  file.write(reinterpret_cast<const char *>(plan->data()), plan->size());
+#endif
+
+  file.close();
+
+  return true;
+}
+
+bool TrtCommon::isInitialized() { return is_initialized_; }
+
+nvinfer1::Dims TrtCommon::getBindingDimensions(const int32_t index) const
+{
+  return context_->getBindingDimensions(index);
+}
+
+int32_t TrtCommon::getNbBindings() { return engine_->getNbBindings(); }
+
+bool TrtCommon::setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const
+{
+  return context_->setBindingDimensions(index, dimensions);
+}
+
+bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed)
+{
+  return context_->enqueueV2(bindings, stream, input_consumed);
+}
+
+}  // namespace tensorrt_common

+ 142 - 0
src/detection/detection_lidar_centerpoint/tensorrt_common/tensorrt_common.hpp

@@ -0,0 +1,142 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TENSORRT_COMMON__TENSORRT_COMMON_HPP_
+#define TENSORRT_COMMON__TENSORRT_COMMON_HPP_
+
+//#include <rclcpp/rclcpp.hpp>
+
+#include <NvInfer.h>
+#include <NvOnnxParser.h>
+
+#if (defined(_MSC_VER) or (defined(__GNUC__) and (7 <= __GNUC_MAJOR__)))
+#include <filesystem>
+namespace fs = ::std::filesystem;
+#else
+#include <experimental/filesystem>
+namespace fs = ::std::experimental::filesystem;
+#endif
+
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <iostream>
+
+namespace tensorrt_common
+{
+class Logger : public nvinfer1::ILogger  // NOLINT
+{
+public:
+  Logger() : Logger(Severity::kINFO) {}
+
+  explicit Logger(Severity severity) : reportable_severity_(severity) {}
+
+  void log(Severity severity, const char * msg) noexcept override
+  {
+    // suppress messages with severity enum value greater than the reportable
+    if (severity > reportable_severity_) {
+      return;
+    }
+
+    switch (severity) {
+      case Severity::kINTERNAL_ERROR:
+ //       RCLCPP_ERROR_STREAM(logger_, msg);
+        std::cout<<msg<<std::endl;
+        break;
+      case Severity::kERROR:
+    //    RCLCPP_ERROR_STREAM(logger_, msg);
+        std::cout<<msg<<std::endl;
+        break;
+      case Severity::kWARNING:
+    //    RCLCPP_WARN_STREAM(logger_, msg);
+        std::cout<<msg<<std::endl;
+        break;
+      case Severity::kINFO:
+  //      RCLCPP_INFO_STREAM(logger_, msg);
+        std::cout<<msg<<std::endl;
+        break;
+      default:
+  //      RCLCPP_INFO_STREAM(logger_, msg);
+        std::cout<<msg<<std::endl;
+        break;
+    }
+  }
+
+  Severity reportable_severity_{Severity::kWARNING};
+ // rclcpp::Logger logger_{rclcpp::get_logger("tensorrt_common")};
+
+};
+
+template <typename T>
+struct InferDeleter  // NOLINT
+{
+  void operator()(T * obj) const
+  {
+    if (obj) {
+#if TENSORRT_VERSION_MAJOR >= 8
+      delete obj;
+#else
+      obj->destroy();
+#endif
+    }
+  }
+};
+
+template <typename T>
+using TrtUniquePtr = std::unique_ptr<T, InferDeleter<T>>;
+
+using BatchConfig = std::array<int32_t, 3>;
+
+class TrtCommon  // NOLINT
+{
+public:
+  TrtCommon(
+    const std::string & model_path, const std::string & precision,
+    std::unique_ptr<nvinfer1::IInt8EntropyCalibrator2> calibrator = nullptr,
+    const BatchConfig & batch_config = {1, 1, 1}, const size_t max_workspace_size = (16 << 20),
+    const std::vector<std::string> & plugin_paths = {});
+
+  bool loadEngine(const std::string & engine_file_path);
+  bool buildEngineFromOnnx(
+    const std::string & onnx_file_path, const std::string & output_engine_file_path);
+  void setup();
+
+  bool isInitialized();
+
+  nvinfer1::Dims getBindingDimensions(const int32_t index) const;
+  int32_t getNbBindings();
+  bool setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const;
+  bool enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed);
+
+private:
+  Logger logger_;
+  fs::path model_file_path_;
+  TrtUniquePtr<nvinfer1::IRuntime> runtime_;
+  TrtUniquePtr<nvinfer1::ICudaEngine> engine_;
+  TrtUniquePtr<nvinfer1::IExecutionContext> context_;
+  std::unique_ptr<nvinfer1::IInt8EntropyCalibrator2> calibrator_;
+
+  nvinfer1::Dims input_dims_;
+  nvinfer1::Dims output_dims_;
+  std::string precision_;
+  BatchConfig batch_config_;
+  size_t max_workspace_size_;
+  bool is_initialized_{false};
+};
+
+}  // namespace tensorrt_common
+
+#endif  // TENSORRT_COMMON__TENSORRT_COMMON_HPP_

+ 194 - 0
src/detection/detection_lidar_cnn_segmentation/cluster2d.cpp

@@ -44,6 +44,186 @@ void Cluster2D::traverse(Node *x)
     }
 }
 
+void Cluster2D::cluster(const float * category_pt_data,
+             const float *instance_pt_x_data,
+             const float *instance_pt_y_data,
+             const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
+             const pcl::PointIndices &valid_indices,
+             float objectness_thresh, bool use_all_grids_for_clustering)
+{
+    pc_ptr_ = pc_ptr;
+    std::vector<std::vector<Node>> nodes(rows_,
+                                         std::vector<Node>(cols_, Node()));
+
+    // map points into grids
+    size_t tot_point_num = pc_ptr_->size();
+    valid_indices_in_pc_ = &(valid_indices.indices);
+    CHECK_LE(valid_indices_in_pc_->size(), tot_point_num);
+    point2grid_.assign(valid_indices_in_pc_->size(), -1);
+
+    int64 time1 = std::chrono::system_clock::now().time_since_epoch().count();
+    for (size_t i = 0; i < valid_indices_in_pc_->size(); ++i)
+    {
+        int point_id = valid_indices_in_pc_->at(i);
+        CHECK_GE(point_id, 0);
+        CHECK_LT(point_id, static_cast<int>(tot_point_num));
+        const auto &point = pc_ptr_->points[point_id];
+        // * the coordinates of x and y have been exchanged in feature generation
+        // step,
+        // so we swap them back here.
+        int pos_x = F2I(point.y, range_, inv_res_x_);  // col
+        int pos_y = F2I(point.x, range_, inv_res_y_);  // row
+        if (IsValidRowCol(pos_y, pos_x))
+        {
+            // get grid index and count point number for corresponding node
+            point2grid_[i] = RowCol2Grid(pos_y, pos_x);
+            nodes[pos_y][pos_x].point_num++;
+        }
+    }
+
+    int64 time2 = std::chrono::system_clock::now().time_since_epoch().count();
+    // construct graph with center offset prediction and objectness
+    for (int row = 0; row < rows_; ++row)
+    {
+        for (int col = 0; col < cols_; ++col)
+        {
+            int grid = RowCol2Grid(row, col);
+            Node *node = &nodes[row][col];
+            DisjointSetMakeSet(node);
+            node->is_object =
+                    (use_all_grids_for_clustering || nodes[row][col].point_num > 0) &&
+                    (*(category_pt_data + grid) >= objectness_thresh);
+            int center_row = std::round(row + instance_pt_x_data[grid] * scale_);
+            int center_col = std::round(col + instance_pt_y_data[grid] * scale_);
+            center_row = std::min(std::max(center_row, 0), rows_ - 1);
+            center_col = std::min(std::max(center_col, 0), cols_ - 1);
+            node->center_node = &nodes[center_row][center_col];
+        }
+    }
+
+    int64 time3 = std::chrono::system_clock::now().time_since_epoch().count();
+    // traverse nodes
+    for (int row = 0; row < rows_; ++row)
+    {
+        for (int col = 0; col < cols_; ++col)
+        {
+            Node *node = &nodes[row][col];
+            if (node->is_object && node->traversed == 0)
+            {
+                traverse(node);
+            }
+        }
+    }
+
+    int64 time4 = std::chrono::system_clock::now().time_since_epoch().count();
+    for (int row = 0; row < rows_; ++row)
+    {
+        for (int col = 0; col < cols_; ++col)
+        {
+            Node *node = &nodes[row][col];
+            if (!node->is_center)
+            {
+                continue;
+            }
+            for (int row2 = row - 1; row2 <= row + 1; ++row2)
+            {
+                for (int col2 = col - 1; col2 <= col + 1; ++col2)
+                {
+                    if ((row2 == row || col2 == col) && IsValidRowCol(row2, col2))
+                    {
+                        Node *node2 = &nodes[row2][col2];
+                        if (node2->is_center)
+                        {
+                            DisjointSetUnion(node, node2);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    int64 time5 = std::chrono::system_clock::now().time_since_epoch().count();
+    int count_obstacles = 0;
+    obstacles_.clear();
+    id_img_.assign(grids_, -1);
+    for (int row = 0; row < rows_; ++row)
+    {
+        for (int col = 0; col < cols_; ++col)
+        {
+            Node *node = &nodes[row][col];
+            if (!node->is_object)
+            {
+                continue;
+            }
+            Node *root = DisjointSetFind(node);
+            if (root->obstacle_id < 0)
+            {
+                root->obstacle_id = count_obstacles++;
+                CHECK_EQ(static_cast<int>(obstacles_.size()), count_obstacles - 1);
+                obstacles_.push_back(Obstacle());
+            }
+            int grid = RowCol2Grid(row, col);
+            CHECK_GE(root->obstacle_id, 0);
+            id_img_[grid] = root->obstacle_id;
+            obstacles_[root->obstacle_id].grids.push_back(grid);
+        }
+    }
+    CHECK_EQ(static_cast<size_t>(count_obstacles), obstacles_.size());
+    int64 time6 = std::chrono::system_clock::now().time_since_epoch().count();
+
+//    std::cout<<" t1: "<<(time2 - time1)/1000<<" t2: "<<(time3 - time2)/1000<<" t3: "<<(time4 - time3)/1000
+//            <<" t4: "<<(time5 - time4)/1000<<" t5: "<<(time6 - time5)/1000<<std::endl;
+}
+
+void Cluster2D::filter(const float *confidence_pt_data,
+            const float *height_pt_data)
+{
+    for (size_t obstacle_id = 0; obstacle_id < obstacles_.size();
+         obstacle_id++)
+    {
+        Obstacle *obs = &obstacles_[obstacle_id];
+        CHECK_GT(obs->grids.size(), 0);
+        double score = 0.0;
+        double height = 0.0;
+        for (int grid : obs->grids)
+        {
+            score += static_cast<double>(confidence_pt_data[grid]);
+            height += static_cast<double>(height_pt_data[grid]);
+        }
+        obs->score = score / static_cast<double>(obs->grids.size());
+        obs->height = height / static_cast<double>(obs->grids.size());
+        obs->cloud_ptr.reset(new pcl::PointCloud<pcl::PointXYZI>);
+    }
+}
+
+void Cluster2D::classify(const float *classify_pt_data,int num_classes )
+{
+    CHECK_EQ(num_classes, MAX_META_TYPE);
+    for (size_t obs_id = 0; obs_id < obstacles_.size(); obs_id++)
+    {
+        Obstacle *obs = &obstacles_[obs_id];
+        for (size_t grid_id = 0; grid_id < obs->grids.size(); grid_id++)
+        {
+            int grid = obs->grids[grid_id];
+            for (int k = 0; k < num_classes; k++)
+            {
+                obs->meta_type_probs[k] += classify_pt_data[k * grids_ + grid];
+            }
+        }
+        int meta_type_id = 0;
+        for (int k = 0; k < num_classes; k++)
+        {
+            obs->meta_type_probs[k] /= obs->grids.size();
+            if (obs->meta_type_probs[k] > obs->meta_type_probs[meta_type_id])
+            {
+                meta_type_id = k;
+            }
+        }
+        obs->meta_type = static_cast<MetaType>(meta_type_id);
+    }
+}
+
+#include <chrono>
 void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
                         const caffe::Blob<float> &instance_pt_blob,
                         const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
@@ -55,6 +235,7 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
     const float *instance_pt_y_data =
             instance_pt_blob.cpu_data() + instance_pt_blob.offset(0, 1);
 
+
     pc_ptr_ = pc_ptr;
     std::vector<std::vector<Node>> nodes(rows_,
                                          std::vector<Node>(cols_, Node()));
@@ -65,6 +246,7 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
     CHECK_LE(valid_indices_in_pc_->size(), tot_point_num);
     point2grid_.assign(valid_indices_in_pc_->size(), -1);
 
+    int64 time1 = std::chrono::system_clock::now().time_since_epoch().count();
     for (size_t i = 0; i < valid_indices_in_pc_->size(); ++i)
     {
         int point_id = valid_indices_in_pc_->at(i);
@@ -84,6 +266,7 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
         }
     }
 
+    int64 time2 = std::chrono::system_clock::now().time_since_epoch().count();
     // construct graph with center offset prediction and objectness
     for (int row = 0; row < rows_; ++row)
     {
@@ -103,6 +286,7 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
         }
     }
 
+    int64 time3 = std::chrono::system_clock::now().time_since_epoch().count();
     // traverse nodes
     for (int row = 0; row < rows_; ++row)
     {
@@ -115,6 +299,8 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
             }
         }
     }
+
+    int64 time4 = std::chrono::system_clock::now().time_since_epoch().count();
     for (int row = 0; row < rows_; ++row)
     {
         for (int col = 0; col < cols_; ++col)
@@ -141,6 +327,7 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
         }
     }
 
+    int64 time5 = std::chrono::system_clock::now().time_since_epoch().count();
     int count_obstacles = 0;
     obstacles_.clear();
     id_img_.assign(grids_, -1);
@@ -167,6 +354,10 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
         }
     }
     CHECK_EQ(static_cast<size_t>(count_obstacles), obstacles_.size());
+    int64 time6 = std::chrono::system_clock::now().time_since_epoch().count();
+
+    std::cout<<" t1: "<<(time2 - time1)/1000<<" t2: "<<(time3 - time2)/1000<<" t3: "<<(time4 - time3)/1000
+            <<" t4: "<<(time5 - time4)/1000<<" t5: "<<(time6 - time5)/1000<<std::endl;
 }
 
 void Cluster2D::filter(const caffe::Blob<float> &confidence_pt_blob,
@@ -194,7 +385,10 @@ void Cluster2D::filter(const caffe::Blob<float> &confidence_pt_blob,
 
 void Cluster2D::classify(const caffe::Blob<float> &classify_pt_blob)
 {
+//    int64_t time1 = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
     const float *classify_pt_data = classify_pt_blob.cpu_data();
+//    int64_t time2 = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
+//    std::cout<<"cpu data use: "<<(time2-time1)<<std::endl;
     int num_classes = classify_pt_blob.channels();
     CHECK_EQ(num_classes, MAX_META_TYPE);
     for (size_t obs_id = 0; obs_id < obstacles_.size(); obs_id++)

+ 16 - 0
src/detection/detection_lidar_cnn_segmentation/cluster2d.h

@@ -93,6 +93,10 @@ public:
                const pcl::PointIndices &valid_indices,
                float objectness_thresh, bool use_all_grids_for_clustering);
 
+
+
+
+
   void filter(const caffe::Blob<float> &confidence_pt_blob,
               const caffe::Blob<float> &height_pt_blob);
 
@@ -102,6 +106,18 @@ public:
                       const float height_thresh,
                       const int min_pts_num,std::vector<Obstacle> & objvec);
 
+ void cluster(const float * category_pt_data,
+              const float *instance_pt_x_data,
+              const float *instance_pt_y_data,
+              const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
+              const pcl::PointIndices &valid_indices,
+              float objectness_thresh, bool use_all_grids_for_clustering);
+
+ void filter(const float *confidence_pt_data,
+             const float *height_pt_data);
+
+ void classify(const float *classify_pt_data,int num_classes );
+
 //  void getObjects(const float confidence_thresh,
 //                  const float height_thresh,
 //                  const int min_pts_num,

+ 189 - 2
src/detection/detection_lidar_cnn_segmentation/cnn_segmentation.cpp

@@ -1,7 +1,14 @@
 #include "cnn_segmentation.h"
 
+
+
 CNNSegmentation::CNNSegmentation()
 {
+
+    minferbuffer.mbNew = false;
+    mbThreadRun = true;
+    mcnnres.mbNew = false;
+
 }
 
 
@@ -80,15 +87,99 @@ bool CNNSegmentation::init(std::string proto_file,std::string weight_file,double
      }
 
      feature_generator_.reset(new FeatureGenerator());
-     if (!feature_generator_->init(feature_blob_.get(), use_constant_feature_))
+     if (!feature_generator_->init(feature_blob_.get(), use_constant_feature_,range_,width_,height_))
      {
   //     ROS_ERROR("[%s] Fail to Initialize feature generator for CNNSegmentation", __APP_NAME__);
        return false;
      }
 
+
+ int i;
+ for(i=0;i<3;i++)
+ {
+     std::thread * pt = new std::thread(&CNNSegmentation::threadCluster,this);
+     mpthreadv.push_back(pt);
+ }
+
   return true;
 }
 
+void CNNSegmentation::threadCluster()
+{
+    std::shared_ptr<Cluster2D> xcluster2d;
+
+    xcluster2d.reset(new Cluster2D());
+    if (!xcluster2d->init(width_,height_,range_))
+//    if (!cluster2d_->init(512,512,60))
+    {
+        std::cout<<" CNNSegmentation::threadCluster "<<" cluster2d init fail."<<std::endl;
+ //     ROS_ERROR("[%s] Fail to Initialize cluster2d for CNNSegmentation", __APP_NAME__);
+      return ;
+    }
+    float objectness_thresh = 0.5;
+    bool use_all_grids_for_clustering = true;
+    float confidence_thresh = score_threshold_;
+    float height_thresh = 3.5;// 0.5;//3.5;//0.5;
+    int min_pts_num = 3;
+    while(mbThreadRun)
+    {
+        bool bNew = false;
+        iv::Cluster2DData xCluster2DData;
+        mmutexbuffer.lock();
+        if(minferbuffer.mbNew)
+        {
+            xCluster2DData = minferbuffer.mData;
+            minferbuffer.mbNew = false;
+            bNew = true;
+        }
+        mmutexbuffer.unlock();
+        if(bNew == false)
+        {
+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
+            continue;
+        }
+
+        xcluster2d->cluster(xCluster2DData.mcategory_pt_data_ptr.get(),
+                             xCluster2DData.minstance_pt_x_data_ptr.get(),
+                             xCluster2DData.minstance_pt_y_data_ptr.get(),
+                             xCluster2DData.mpointcloud,
+                             *xCluster2DData.mvalid_idx_ptr, objectness_thresh,
+                             use_all_grids_for_clustering);
+        xcluster2d->filter(xCluster2DData.mconfidence_pt_data_ptr.get(),xCluster2DData.mheight_pt_data_ptr.get());
+        xcluster2d->classify(xCluster2DData.mclassify_pt_data_ptr.get(),xCluster2DData.mnum_classes);
+
+        int64 nnow = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
+        std::cout<<"time "<<xCluster2DData.mPointCloudUpdateTime<<" infer use: "<<(xCluster2DData.mInferUpdateTime - xCluster2DData.mPointCloudUpdateTime)
+                <<" cluster use: "<<(nnow - xCluster2DData.mInferUpdateTime)<<" total use:"<<(nnow - xCluster2DData.mPointCloudUpdateTime)<<std::endl;
+//        xcluster2d_->filter(*confidence_pt_blob_, *height_pt_blob_);
+//        xcluster2d_->classify(*class_pt_blob_);
+
+        std::vector<Obstacle> objvec;
+        xcluster2d->getObjectVector(confidence_thresh, height_thresh, min_pts_num,objvec);
+
+        mmutexcnnres.lock();
+        mcnnres.mobjvec = objvec;
+        mcnnres.mcluster2ddata = xCluster2DData;
+        mcnnres.mbNew = true;
+        mmutexcnnres.unlock();
+
+    }
+}
+
+int CNNSegmentation::GetRes(iv::cnnres & xres)
+{
+    int nrtn = 0;
+    mmutexcnnres.lock();
+    if(mcnnres.mbNew)
+    {
+        xres = mcnnres;
+        mcnnres.mbNew = false;
+        nrtn = 1;
+    }
+    mmutexcnnres.unlock();
+    return nrtn;
+}
+
 bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
                               const pcl::PointIndices &valid_idx,
                               std::vector<Obstacle> & objvec)
@@ -110,8 +201,13 @@ bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr
 
     int64_t time3 = std::chrono::system_clock::now().time_since_epoch().count();
 
+
+
     float objectness_thresh = 0.5;
     bool use_all_grids_for_clustering = true;
+
+//    boost::shared_ptr<caffe::Blob<float>> xinstance_pt_blob = boost::shared_ptr<caffe::Blob<float>>(new caffe::Blob<float>());
+//    xinstance_pt_blob->CopyFrom(*instance_pt_blob_);
     cluster2d_->cluster(*category_pt_blob_, *instance_pt_blob_, pc_ptr,
                         valid_idx, objectness_thresh,
                         use_all_grids_for_clustering);
@@ -120,10 +216,13 @@ bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr
     float confidence_thresh = score_threshold_;
     float height_thresh = 3.5;// 0.5;//3.5;//0.5;
     int min_pts_num = 3;
+    int64_t time4 = std::chrono::system_clock::now().time_since_epoch().count();
 
     cluster2d_->getObjectVector(confidence_thresh, height_thresh, min_pts_num,objvec);
 
-    int64_t time4 = std::chrono::system_clock::now().time_since_epoch().count();
+
+
+
 
 
     std::cout<<"infer time: "<<(time3-time2)/1000000<<" generate: "<<(time2-time1)/1000000<<" cluster: "<<(time4-time3)/1000000<<std::endl;
@@ -137,6 +236,94 @@ bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr
 
 }
 
+
+bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
+                              pcl::PointIndicesPtr & valid_idx_ptr)
+{
+
+    int64 npctime = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
+    if(use_gpu_)
+    {
+        caffe::Caffe::SetDevice(gpu_device_id_);
+        caffe::Caffe::set_mode(caffe::Caffe::GPU);
+    }
+
+    int64_t time1 = std::chrono::system_clock::now().time_since_epoch().count();
+
+    feature_generator_->generate(pc_ptr);
+
+    int64_t time2 = std::chrono::system_clock::now().time_since_epoch().count();
+
+    caffe_net_->Forward();
+
+    int64_t time3 = std::chrono::system_clock::now().time_since_epoch().count();
+
+
+
+    float objectness_thresh = 0.5;
+    bool use_all_grids_for_clustering = true;
+
+
+    iv::Cluster2DData xCluster2DData;
+
+    int nSize = width_*height_;
+    xCluster2DData.mcategory_pt_data_ptr = std::shared_ptr<float>(new float[nSize]);
+    xCluster2DData.minstance_pt_x_data_ptr = std::shared_ptr<float>(new float[nSize]);
+    xCluster2DData.minstance_pt_y_data_ptr = std::shared_ptr<float>(new float[nSize]);
+    xCluster2DData.mconfidence_pt_data_ptr = std::shared_ptr<float>(new float[nSize]);
+    xCluster2DData.mheight_pt_data_ptr = std::shared_ptr<float>(new float[nSize]);
+    xCluster2DData.mnum_classes = class_pt_blob_->channels();
+    xCluster2DData.mclassify_pt_data_ptr = std::shared_ptr<float>(new float[nSize * xCluster2DData.mnum_classes]);
+
+    int64_t time4 = std::chrono::system_clock::now().time_since_epoch().count();
+
+    memcpy(xCluster2DData.mcategory_pt_data_ptr.get(),category_pt_blob_->cpu_data(),nSize * sizeof(float) );
+    memcpy(xCluster2DData.minstance_pt_x_data_ptr.get(),instance_pt_blob_->cpu_data(),nSize* sizeof(float));
+    memcpy(xCluster2DData.minstance_pt_y_data_ptr.get(),instance_pt_blob_->cpu_data()+instance_pt_blob_->offset(0, 1),nSize* sizeof(float));
+    memcpy(xCluster2DData.mconfidence_pt_data_ptr.get(),confidence_pt_blob_->cpu_data(),nSize* sizeof(float));
+    memcpy(xCluster2DData.mheight_pt_data_ptr.get(),height_pt_blob_->cpu_data(),nSize* sizeof(float));
+    memcpy(xCluster2DData.mclassify_pt_data_ptr.get(),class_pt_blob_->cpu_data(),nSize*xCluster2DData.mnum_classes* sizeof(float));
+    xCluster2DData.mInferUpdateTime = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
+    xCluster2DData.mpointcloud = pc_ptr;
+    xCluster2DData.mvalid_idx_ptr = valid_idx_ptr;
+    xCluster2DData.mPointCloudUpdateTime = npctime;
+
+    int64_t time5 = std::chrono::system_clock::now().time_since_epoch().count();
+
+    mmutexbuffer.lock();
+    minferbuffer.mData = xCluster2DData;
+    minferbuffer.mbNew = true;
+    mmutexbuffer.unlock();
+
+    std::cout<<" feature use: "<<(time2-time1)/1000000<<" infer use: "<<(time3 - time2)/1000000
+            <<" copy use:"<<(time5-time4)/1000000<<std::endl;
+
+//    cluster2d_->cluster(*category_pt_blob_, *instance_pt_blob_, pc_ptr,
+//                        valid_idx, objectness_thresh,
+//                        use_all_grids_for_clustering);
+//    cluster2d_->filter(*confidence_pt_blob_, *height_pt_blob_);
+//    cluster2d_->classify(*class_pt_blob_);
+//    float confidence_thresh = score_threshold_;
+//    float height_thresh = 3.5;// 0.5;//3.5;//0.5;
+//    int min_pts_num = 3;
+
+//    cluster2d_->getObjectVector(confidence_thresh, height_thresh, min_pts_num,objvec);
+
+//    int64_t time4 = std::chrono::system_clock::now().time_since_epoch().count();
+
+
+ //   std::cout<<"infer time: "<<(time3-time2)/1000000<<" generate: "<<(time2-time1)/1000000<<" cluster: "<<(time4-time3)/1000000<<std::endl;
+
+//    cluster2d_->getObjects(confidence_thresh, height_thresh, min_pts_num,
+//                           objects, message_header_);
+
+
+
+    return true;
+
+}
+
+
 //bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
 //                              const pcl::PointIndices &valid_idx,
 //                              rockauto_msgs::DetectedObjectArray &objects)

+ 62 - 0
src/detection/detection_lidar_cnn_segmentation/cnn_segmentation.h

@@ -26,6 +26,9 @@
 
 #define __APP_NAME__ "lidar_cnn_seg_detect"
 
+
+#include <thread>
+
 struct CellStat
 {
   CellStat() : point_num(0), valid_point_num(0)
@@ -36,6 +39,47 @@ struct CellStat
   int valid_point_num;
 };
 
+
+namespace iv {
+struct Cluster2DData
+{
+    std::shared_ptr<float>  mcategory_pt_data_ptr;
+    std::shared_ptr<float>  minstance_pt_x_data_ptr;
+    std::shared_ptr<float>  minstance_pt_y_data_ptr;
+    std::shared_ptr<float>  mconfidence_pt_data_ptr;
+    std::shared_ptr<float>  mheight_pt_data_ptr;
+    std::shared_ptr<float>  mclassify_pt_data_ptr;
+    int mnum_classes;
+    int64 mPointCloudUpdateTime; //ms since epoch
+    int64 mInferUpdateTime; //ms since epoch
+    pcl::PointCloud<pcl::PointXYZI>::Ptr mpointcloud;
+    pcl::PointIndicesPtr mvalid_idx_ptr;
+
+};
+
+}
+
+namespace iv {
+struct pcbuffer
+{
+    pcl::PointCloud<pcl::PointXYZI>::Ptr mpointcloud;
+    bool mbNew;
+    int64 mPointCloudUpdateTime; //ms since epoch
+};
+struct inferresbuffer
+{
+    Cluster2DData mData;
+    bool mbNew;
+};
+
+struct cnnres
+{
+    std::vector<Obstacle> mobjvec;
+    Cluster2DData mcluster2ddata;
+    bool mbNew;
+};
+}
+
 class CNNSegmentation
 {
 public:
@@ -78,6 +122,20 @@ private:
   // bird-view raw feature generator
   std::shared_ptr<FeatureGenerator> feature_generator_;
 
+private:
+  void threadCluster();
+
+  iv::inferresbuffer minferbuffer;
+  std::mutex mmutexbuffer;
+
+  iv::cnnres mcnnres;
+  std::mutex mmutexcnnres;
+
+  bool mbThreadRun;
+
+  std::vector<std::thread *> mpthreadv;
+
+
 
 public:
   bool init(  std::string proto_file,std::string weight_file,double rangemax,double score,
@@ -86,6 +144,10 @@ public:
   bool segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
                const pcl::PointIndices &valid_idx,
                std::vector<Obstacle> & objvec);
+  bool segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
+               pcl::PointIndicesPtr & valid_idx_ptr);
+
+  int GetRes(iv::cnnres & xres);
 
 
 //  bool segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,

+ 11 - 0
src/detection/detection_lidar_cnn_segmentation/detection_lidar_cnn_segmentation.xml

@@ -0,0 +1,11 @@
+<xml>	
+	<node name="detection_ndt_matching_gpu_multi.xml">
+		<param name="prototxt" value="/home/nvidia/models/lidar/model.prototxt" />
+		<param name="caffemodel" value="/home/nvidia/models/lidar/model.caffemodel" />
+		<param name="input" value="lidarpc_center" />
+		<param name="output" value="lidar_track" />
+		<param name="range" value="30.0" />
+		<param name="width" value="512" />
+		<param name="height" value="512" />
+	</node>
+</xml>

+ 4 - 4
src/detection/detection_lidar_cnn_segmentation/feature_generator.cpp

@@ -1,14 +1,14 @@
 #include "feature_generator.h"
 
 
-bool FeatureGenerator::init(caffe::Blob<float>* out_blob, bool use_constant_feature)
+bool FeatureGenerator::init(caffe::Blob<float>* out_blob, bool use_constant_feature,double range,int width,int height)
 {
   out_blob_ = out_blob;
 
   // raw feature parameters
-  range_ = 40;
-  width_ = 512;
-  height_ = 512;
+  range_ = range;
+  width_ = width;
+  height_ = height;
   min_height_ = -5.0;
   max_height_ = 5.0;
   CHECK_EQ(width_, height_)

+ 1 - 1
src/detection/detection_lidar_cnn_segmentation/feature_generator.h

@@ -16,7 +16,7 @@ public:
   FeatureGenerator(){}
   ~FeatureGenerator(){}
 
-  bool init(caffe::Blob<float>* out_blob, bool use_constant_feature);
+  bool init(caffe::Blob<float>* out_blob, bool use_constant_feature,double range,int width,int height);
   void generate(
       const pcl::PointCloud<pcl::PointXYZI>::Ptr& pc_ptr);
 private:

+ 190 - 35
src/detection/detection_lidar_cnn_segmentation/main.cpp

@@ -19,6 +19,12 @@
 //#include "alog.h"
 
 CNNSegmentation gcnn;
+CNNSegmentation gcnnv[10];
+
+
+
+iv::pcbuffer gpcbuffer;
+std::mutex gmutexpcbuffer;
 
 void * gpa;
 void * gpdetect;
@@ -30,6 +36,8 @@ std::thread * gpthread;
 std::string gstrinput;
 std::string gstroutput;
 
+bool gbUseMultiCNN = false;
+
 static iv::lidar::PointXYZ pcltoprotopoint(pcl::PointXYZ x)
 {
     iv::lidar::PointXYZ lx;
@@ -245,48 +253,117 @@ static void GetLidarObj(std::vector<Obstacle> objvec,iv::lidar::objectarray & li
 void DectectOnePCD(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr)
 {
     std::cout<<"pcd size is "<<pc_ptr->size()<<std::endl;
-    pcl::PointIndices valid_idx;
-    auto &indices = valid_idx.indices;
-    indices.resize(pc_ptr->size());
-    std::iota(indices.begin(), indices.end(), 0);
 
-    std::vector<Obstacle> objvec;
 
-    gcnn.segment(pc_ptr, valid_idx, objvec);
-    givlog->verbose("obj size is %d", objvec.size());
-    std::cout<<"obj size is "<<objvec.size()<<std::endl;
+    bool bUseMutliCluster = true;
 
-//    std::vector<iv::lidar::lidarobject> lidarobjvec;
-    iv::lidar::objectarray lidarobjvec;
-    GetLidarObj(objvec,lidarobjvec);
+    if(bUseMutliCluster == false)
+    {
+        pcl::PointIndices valid_idx;
+        auto &indices = valid_idx.indices;
+        indices.resize(pc_ptr->size());
+        std::iota(indices.begin(), indices.end(), 0);
+        std::vector<Obstacle> objvec;
+        gcnn.segment(pc_ptr, valid_idx, objvec);
+        givlog->verbose("obj size is %d", objvec.size());
+        std::cout<<"obj size is "<<objvec.size()<<std::endl;
+
+        //    std::vector<iv::lidar::lidarobject> lidarobjvec;
+        iv::lidar::objectarray lidarobjvec;
+        GetLidarObj(objvec,lidarobjvec);
+
+        double timex = pc_ptr->header.stamp;
+        timex = timex/1000.0;
+        lidarobjvec.set_timestamp(pc_ptr->header.stamp);
+
+        int ntlen;
+        std::string out = lidarobjvec.SerializeAsString();
+        //   char * strout = lidarobjtostr(lidarobjvec,ntlen);
+        iv::modulecomm::ModuleSendMsg(gpdetect,out.data(),out.length());
+        givlog->verbose("lenth is %d",out.length());
+        //  delete strout;
+
+        //    int i;
+        //    for(i=0;i<objvec.size();i++)
+        //    {
+        //        Obstacle x;
+        //        x = objvec.at(i);
+
+        //        std::cout<<"obj "<<i<<" x is "<<x.GetTypeString()<<std::endl;
+
+        //        iv::lidar::lidarobject y;
+        //        y = lidarobjvec.at(i);
+        //        std::cout<<"     "<<" x is"<<y.position.x<<" y is "<<y.position.y<<" len x is "<<y.dimensions.x<<" len y is "<<y.dimensions.y<<std::endl;
+        //        std::cout<<"     "<<" type is "<<y.mnType<<std::endl;
+        //    }
 
-    double timex = pc_ptr->header.stamp;
-    timex = timex/1000.0;
-    lidarobjvec.set_timestamp(pc_ptr->header.stamp);
+    }
+    else
+    {
+        pcl::PointIndicesPtr valid_idx_ptr = boost::shared_ptr<pcl::PointIndices>(new pcl::PointIndices());
+        auto &indices = valid_idx_ptr->indices;
+        indices.resize(pc_ptr->size());
+        std::iota(indices.begin(), indices.end(), 0);
+        gcnn.segment(pc_ptr, valid_idx_ptr);
+    }
+}
 
-    int ntlen;
-    std::string out = lidarobjvec.SerializeAsString();
- //   char * strout = lidarobjtostr(lidarobjvec,ntlen);
-    iv::modulecomm::ModuleSendMsg(gpdetect,out.data(),out.length());
-    givlog->verbose("lenth is %d",out.length());
-  //  delete strout;
 
-//    int i;
-//    for(i=0;i<objvec.size();i++)
-//    {
-//        Obstacle x;
-//        x = objvec.at(i);
+void threadgetres()
+{
+    while(1)
+    {
+        iv::cnnres xcnnres;
+        int nrtn = gcnn.GetRes(xcnnres);
+        if(nrtn == 1)
+        {
+            iv::lidar::objectarray lidarobjvec;
+            GetLidarObj(xcnnres.mobjvec,lidarobjvec);
+            std::string out = lidarobjvec.SerializeAsString();
+            //   char * strout = lidarobjtostr(lidarobjvec,ntlen);
+            iv::modulecomm::ModuleSendMsg(gpdetect,out.data(),out.length());
+
+            int64 now =  std::chrono::system_clock::now().time_since_epoch().count()/1000000;
+            std::cout<<" pc time : "<<xcnnres.mcluster2ddata.mPointCloudUpdateTime<<" cnn use: "<<(now - xcnnres.mcluster2ddata.mPointCloudUpdateTime)
+                    <<" obj size: "<<xcnnres.mobjvec.size()<<std::endl;
+        }
+        else
+        {
+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
+        }
+    }
+}
 
-//        std::cout<<"obj "<<i<<" x is "<<x.GetTypeString()<<std::endl;
+void DectectOnePCD(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,CNNSegmentation * pcnn)
+{
+    std::cout<<"pcd size is "<<pc_ptr->size()<<std::endl;
 
-//        iv::lidar::lidarobject y;
-//        y = lidarobjvec.at(i);
-//        std::cout<<"     "<<" x is"<<y.position.x<<" y is "<<y.position.y<<" len x is "<<y.dimensions.x<<" len y is "<<y.dimensions.y<<std::endl;
-//        std::cout<<"     "<<" type is "<<y.mnType<<std::endl;
-//    }
-}
 
 
+        pcl::PointIndices valid_idx;
+        auto &indices = valid_idx.indices;
+        indices.resize(pc_ptr->size());
+        std::iota(indices.begin(), indices.end(), 0);
+        std::vector<Obstacle> objvec;
+        pcnn->segment(pc_ptr, valid_idx, objvec);
+        givlog->verbose("obj size is %d", objvec.size());
+        std::cout<<"obj size is "<<objvec.size()<<std::endl;
+
+        //    std::vector<iv::lidar::lidarobject> lidarobjvec;
+        iv::lidar::objectarray lidarobjvec;
+        GetLidarObj(objvec,lidarobjvec);
+
+        double timex = pc_ptr->header.stamp;
+        timex = timex/1000.0;
+        lidarobjvec.set_timestamp(pc_ptr->header.stamp);
+
+        int ntlen;
+        std::string out = lidarobjvec.SerializeAsString();
+        //   char * strout = lidarobjtostr(lidarobjvec,ntlen);
+        iv::modulecomm::ModuleSendMsg(gpdetect,out.data(),out.length());
+        givlog->verbose("lenth is %d",out.length());
+}
+
 int gnothavedatatime = 0;
 
 void ListenPointCloud(const char * strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
@@ -341,12 +418,62 @@ void ListenPointCloud(const char * strdata,const unsigned int nSize,const unsign
 
 //    std::cout<<"pcl time is "<<xTime.elapsed()<<std::endl;
 
-    DectectOnePCD(point_cloud);
-    std::cout<<"time is "<<(QDateTime::currentMSecsSinceEpoch() % 1000)<<" "<<xTime.elapsed()<<std::endl;
+    bool bUseMultiCNN = gbUseMultiCNN;
+    if(bUseMultiCNN)
+    {
+        gmutexpcbuffer.lock();
+        gpcbuffer.mbNew = true;
+        gpcbuffer.mpointcloud = point_cloud;
+        gpcbuffer.mPointCloudUpdateTime = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
+        gmutexpcbuffer.unlock();
+    }
+    else
+    {
+        DectectOnePCD(point_cloud);
+    }
+
+//    std::cout<<"time is "<<(QDateTime::currentMSecsSinceEpoch() % 1000)<<" "<<xTime.elapsed()<<std::endl;
     gfault->SetFaultState(0, 0, "ok");
 
 }
 
+void threadcnn(std::string protofile,std::string weightfile,double range,int width,int height)
+{
+    CNNSegmentation cnn;
+    cnn.init(protofile,weightfile,60.0,0.6,1024,1024,false,true,0);
+
+    pcl::PointCloud<pcl::PointXYZI>::Ptr point_cloud;
+
+    while(1)
+    {
+        bool bNew = false;
+        int64 npctime;
+        gmutexpcbuffer.lock();
+        if(gpcbuffer.mbNew)
+        {
+            gpcbuffer.mbNew = false;
+            bNew = true;
+            point_cloud = gpcbuffer.mpointcloud;
+            npctime = gpcbuffer.mPointCloudUpdateTime;
+        }
+        gmutexpcbuffer.unlock();
+
+        if(bNew == false)
+        {
+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
+            continue;
+        }
+
+
+        DectectOnePCD(point_cloud,&cnn);
+        std::cout<<"pc time: is "<<npctime<<" res use time: "<<(std::chrono::system_clock::now().time_since_epoch().count()/1000000 - npctime)<<std::endl;
+
+
+    }
+
+
+}
+
 bool gbstate = true;
 void statethread()
 {
@@ -449,9 +576,37 @@ int main(int argc, char *argv[])
     gstrinput = xparam.GetParam("input","lidar_pc");
     gstroutput = xparam.GetParam("output","lidar_cnn_dectect");
 
+    double range,width,height;
+    range = xparam.GetParam("range",60.0);
+    width = xparam.GetParam("width",1024);
+    height = xparam.GetParam("height",1024);
+    int ncnn = xparam.GetParam("ncnn",2);
+
 //    std::string protofile = "/home/yuchuli/qt/bq/models/lidar/model.prototxt";
 //    std::string weightfile = "/home/yuchuli/qt/bq/models/lidar/model.caffemodel";
-    gcnn.init(protofile,weightfile,60.0,0.6,512,512,false,true,0);
+    gcnn.init(protofile,weightfile,range,0.6,width,height,false,true,0);
+
+//    int i;
+//    for(i=0;i<3;i++)
+//    {
+//        gcnnv[i].init(protofile,weightfile,60.0,0.6,1024,1024,false,true,0);
+
+//    }
+
+    std::thread * pthreadv[10];
+      if(gbUseMultiCNN)
+      {
+          int i;
+          if(ncnn>10)ncnn = 10;
+          for(i=0;i<ncnn;i++)
+          {
+              pthreadv[i] = new std::thread(threadcnn,protofile,weightfile,range,width,height);
+          }
+      }
+
+
+      std::thread * pthreadsend = new std::thread(threadgetres);
+
 
 
 

+ 3 - 3
src/detection/detection_lidar_cnn_segmentation_trt/src/detector.cpp

@@ -30,9 +30,9 @@ LidarApolloInstanceSegmentation::LidarApolloInstanceSegmentation()
   std::string prototxt_file;
   std::string caffemodel_file;
   score_threshold_ = 0.4;//node_->declare_parameter("score_threshold", 0.8);
-  range = 60;//node_->declare_parameter("range", 60);
-  width = 640;// 640;//node_->declare_parameter("width", 640);
-  height = 640; //640;//node_->declare_parameter("height", 640);
+  range = 30;//node_->declare_parameter("range", 60);
+  width = 512;// 640;//node_->declare_parameter("width", 640);
+  height = 512; //640;//node_->declare_parameter("height", 640);
   engine_file =  "deploy.engine";//node_->declare_parameter("engine_file", "vls-128.engine");
   prototxt_file = "hdl-64.prototxt";//node_->declare_parameter("prototxt_file", "vls-128.prototxt");
   caffemodel_file = "deploy.caffemodel";// node_->declare_parameter("caffemodel_file", "vls-128.caffemodel");

+ 47 - 18
src/driver/driver_map_xodrload/driver_map_xodrload.xml

@@ -2451,21 +2451,42 @@ static void AddSignalMark(Road * pRoad, int nlane,std::vector<PlanPoint> & xvect
         Signal * pSig = pRoad->GetSignal(i);
         int nfromlane = -100;
         int ntolane = 100;
-        signal_laneValidity * pSig_laneValidity = pSig->GetlaneValidity();
-        if(pSig_laneValidity != 0)
-        {
-            nfromlane = pSig_laneValidity->GetfromLane();
-            ntolane = pSig_laneValidity->GettoLane();
-        }
-        if((nlane < 0)&&(nfromlane >= 0))
-        {
-            continue;
-        }
-        if((nlane > 0)&&(ntolane<=0))
+
+        if(pSig->GetlaneValidityCount()>0)
         {
-            continue;
+            bool bValid = false;
+            unsigned int j;
+            std::vector<signal_laneValidity> * pvectorlanevalid = pSig->GetlaneValidityVector();
+            unsigned int nsize = static_cast<unsigned int>(pvectorlanevalid->size());
+            for(j=0;j<nsize;j++)
+            {
+                signal_laneValidity * pSig_laneValidity = &pvectorlanevalid->at(j);
+                nfromlane = pSig_laneValidity->GetfromLane();
+                ntolane = pSig_laneValidity->GettoLane();
+                if((nlane >= nfromlane)&&(nlane<=ntolane))
+                {
+                    bValid = true;
+                    break;
+                }
+            }
+            if(bValid == false)continue;
         }
 
+//        signal_laneValidity * pSig_laneValidity = pSig->GetlaneValidity();
+//        if(pSig_laneValidity != 0)
+//        {
+//            nfromlane = pSig_laneValidity->GetfromLane();
+//            ntolane = pSig_laneValidity->GettoLane();
+//        }
+//        if((nlane < 0)&&(nfromlane >= 0))
+//        {
+//            continue;
+//        }
+//        if((nlane > 0)&&(ntolane<=0))
+//        {
+//            continue;
+//        }
+
         double s = pSig->Gets();
         double fpos = s/pRoad->GetRoadLength();
         if(nlane > 0)fpos = 1.0 -fpos;
@@ -2779,18 +2800,20 @@ void ChangeLane(std::vector<PlanPoint> & xvectorPP)
         else
         {
             int k;
-            for(k=i;k<(nsize-1);k++)
+            for(k=i;k<=(nsize-1);k++)
             {
-                if((xvectorPP[k].mfSecx == xvectorPP[k].x)&&(xvectorPP[k].mfSecy == xvectorPP[k].y))
+                if((fabs(xvectorPP[k].mfSecx - xvectorPP[k].x)<0.001)&&(fabs(xvectorPP[k].mfSecy - xvectorPP[k].y)<0.001))
                 {
                     break;
                 }
             }
+            if(k>(nsize-1))k=nsize-1;
             int nnum = k-i;
             int nchangepoint = 300;
             double froadlen = sqrt(pow(xvectorPP[k].x - xvectorPP[i].x,2)
                                    +pow(xvectorPP[k].y - xvectorPP[i].y,2));
             const double fMAXCHANGE = 50;
+
             if(froadlen<fMAXCHANGE)
             {
                 nchangepoint = nnum;
@@ -2817,23 +2840,29 @@ void ChangeLane(std::vector<PlanPoint> & xvectorPP)
                 xvectorPP[j].y = xvectorPP[j].mfSecy;
             }
             nnum = nend - nstart;
-            for(j=nstart;j<nend;j++)
+            int nlast = k-1;
+            if(k==(nsize-1))nlast = k;
+            for(j=nstart;j<=nlast;j++)
             {
                 double fdis = sqrt(pow(xvectorPP[j].x - xvectorPP[j].mfSecx,2)
                                    +pow(xvectorPP[j].y - xvectorPP[j].mfSecy,2));
-                double foff = fdis *(j-nstart)/nnum;
+                double foff = 0;
+                if(j<nend)
+                    foff = fdis *(j-nstart)/nnum;
+                else
+                    foff = fdis;
                 if(xvectorPP[j].nlrchange == 1)
                 {
 //                    std::cout<<"foff is "<<foff<<std::endl;
                     xvectorPP[j].x = xvectorPP[j].mfSecx + foff *cos(xvectorPP[j].hdg + M_PI/2.0);
                     xvectorPP[j].y = xvectorPP[j].mfSecy + foff *sin(xvectorPP[j].hdg + M_PI/2.0);
-                    xvectorPP[j].mfDisToRoadLeft = xvectorPP[j].mfDisToRoadLeft + fdis - foff;
+                    xvectorPP[j].mfDisToRoadLeft = xvectorPP[j].mfDisToRoadLeft - foff; //+ fdis
                 }
                 else
                 {
                     xvectorPP[j].x = xvectorPP[j].mfSecx + foff *cos(xvectorPP[j].hdg - M_PI/2.0);
                     xvectorPP[j].y = xvectorPP[j].mfSecy + foff *sin(xvectorPP[j].hdg - M_PI/2.0);
-                    xvectorPP[j].mfDisToRoadLeft = xvectorPP[j].mfDisToRoadLeft  - fdis +foff;
+                    xvectorPP[j].mfDisToRoadLeft = xvectorPP[j].mfDisToRoadLeft  +foff;//- fdis
                 }
             }
             i =k;

+ 51 - 5
src/driver/driver_map_xodrload/main.cpp

@@ -860,6 +860,14 @@ void SetPlan(xodrobj xo)
     xfile_1.setFileName(strpath_1);
     xfile_1.open(QIODevice::ReadWrite);
 
+    double fpointdis = 0.1;
+    if(nSize > 2)
+    {
+        fpointdis = sqrt(pow(xPlan[0].x - xPlan[1].x,2)+pow(xPlan[0].y - xPlan[1].y,2));
+    }
+
+    if(fpointdis < 0.1)fpointdis = 0.1;
+
     for(i=0;i<nSize;i++)
     {
         iv::map::mappoint * pmappoint =  xtrace.add_point();
@@ -907,15 +915,27 @@ void SetPlan(xodrobj xo)
         iv::GPSData data(new iv::GPS_INS);
         data->roadMode = 0;
 
- //       std::cout<<"i:"<<i<<" lr:"<<xPlan[i].nlrchange<<std::endl;
-        if(xPlan[i].nlrchange == 1)
+        static int nChangeCount = 0;
+        static int nlrmode = -1;
+
+        if(nChangeCount == 0)
         {
-            data->roadMode = 14;
+
         }
-        if(xPlan[i].nlrchange == 2)
+        else
         {
-            data->roadMode = 15;
+            data->roadMode = nlrmode;
+            nChangeCount--;
         }
+ //       std::cout<<"i:"<<i<<" lr:"<<xPlan[i].nlrchange<<std::endl;
+//        if(xPlan[i].nlrchange == 1)
+//        {
+//            data->roadMode = 14;
+//        }
+//        if(xPlan[i].nlrchange == 2)
+//        {
+//            data->roadMode = 15;
+//        }
 
         if(i>50)
         {
@@ -934,6 +954,32 @@ void SetPlan(xodrobj xo)
                     }
                     if(j <=0)break;
                 }
+                int nChangeCount = 0;
+                for(j=i;j<(nSize-1);j++)
+                {
+                    if((xPlan[j].nlrchange != 0)&&(xPlan[j+1].nlrchange != 0))
+                    {
+                        nChangeCount++;
+                    }
+                }
+                if(nChangeCount < static_cast<int>(10.0/fpointdis))   //Fast Change road.
+                {
+                    nChangeCount = nChangeCount + static_cast<int>(3.0/fpointdis);
+                }
+                else
+                {
+                    nChangeCount =  static_cast<int>(5.0/fpointdis);
+                }
+
+                    if(xPlan[i].nlrchange == 1)
+                    {
+                        nlrmode = 14;
+                    }
+                    if(xPlan[i].nlrchange == 2)
+                    {
+                        nlrmode = 15;
+                    }
+
             }
         }
 

+ 1 - 0
src/tool/IVSysMan/IVSysManWithoutWin.pro

@@ -52,6 +52,7 @@ contains(QMAKE_HOST.os, Windows){
 SOURCES += \
     ../../include/msgtype/switch.pb.cc \
         mainwithoutwin.cpp \
+    procmemstat.cpp \
     sysman.cpp \
     progmon.cpp \
     cpumem.cpp \