2 years ago · a7dbd3c58c
--- a/sh/automake/autogen.sh
+++ b/sh/automake/autogen.sh
@@ -242,8 +242,8 @@ driver_map_xodrload
 
				 #driver_radio_p900
			
 
				 driver_ntrip_client
			
 
				 #driver_odomtogpsimu
			
 
				-#driver_rpc_client
			
 
				-#driver_rpc_server
			
 
				+driver_rpc_client
			
 
				+driver_rpc_server
			
 
				 #driver_vbox_gaohong
			
 
				 driver_ota_client
			
 
				 #driver_grpc_client
			
--- a/sh/automake/automake_agx.sh
+++ b/sh/automake/automake_agx.sh
@@ -34,7 +34,7 @@ echo "code version: "$GITVERSIONCODE
 
				 
			
 
				 
			
 
				 month=`date +%Y.%m`
			
 
				-urllink="http://111.33.136.149:3000/adc_pilot/modularization_pro/src/master/agx"
			
 
				+urllink="http://10.15.0.89:3000/adc_pilot/modularization_pro/src/master/agx"
			
 
				 
			
 
				 
			
 
				 if [ "$GITVERSIONCODE" =  "$VERSIONCODE" ]; then
			
--- a/src/detection/detection_lidar_centerpoint/.gitignore
+++ b/src/detection/detection_lidar_centerpoint/.gitignore
@@ -0,0 +1,73 @@
 
				+# This file is used to ignore files which are generated
			
 
				+# ----------------------------------------------------------------------------
			
 
				+
			
 
				+*~
			
 
				+*.autosave
			
 
				+*.a
			
 
				+*.core
			
 
				+*.moc
			
 
				+*.o
			
 
				+*.obj
			
 
				+*.orig
			
 
				+*.rej
			
 
				+*.so
			
 
				+*.so.*
			
 
				+*_pch.h.cpp
			
 
				+*_resource.rc
			
 
				+*.qm
			
 
				+.#*
			
 
				+*.*#
			
 
				+core
			
 
				+!core/
			
 
				+tags
			
 
				+.DS_Store
			
 
				+.directory
			
 
				+*.debug
			
 
				+Makefile*
			
 
				+*.prl
			
 
				+*.app
			
 
				+moc_*.cpp
			
 
				+ui_*.h
			
 
				+qrc_*.cpp
			
 
				+Thumbs.db
			
 
				+*.res
			
 
				+*.rc
			
 
				+/.qmake.cache
			
 
				+/.qmake.stash
			
 
				+
			
 
				+# qtcreator generated files
			
 
				+*.pro.user*
			
 
				+
			
 
				+# xemacs temporary files
			
 
				+*.flc
			
 
				+
			
 
				+# Vim temporary files
			
 
				+.*.swp
			
 
				+
			
 
				+# Visual Studio generated files
			
 
				+*.ib_pdb_index
			
 
				+*.idb
			
 
				+*.ilk
			
 
				+*.pdb
			
 
				+*.sln
			
 
				+*.suo
			
 
				+*.vcproj
			
 
				+*vcproj.*.*.user
			
 
				+*.ncb
			
 
				+*.sdf
			
 
				+*.opensdf
			
 
				+*.vcxproj
			
 
				+*vcxproj.*
			
 
				+
			
 
				+# MinGW generated files
			
 
				+*.Debug
			
 
				+*.Release
			
 
				+
			
 
				+# Python byte code
			
 
				+*.pyc
			
 
				+
			
 
				+# Binaries
			
 
				+# --------
			
 
				+*.dll
			
 
				+*.exe
			
 
				+
			
--- a/src/detection/detection_lidar_centerpoint/detection_lidar_centerpoint.pro
+++ b/src/detection/detection_lidar_centerpoint/detection_lidar_centerpoint.pro
@@ -0,0 +1,161 @@
 
				+QT -= gui
			
 
				+
			
 
				+CONFIG += c++14 #console
			
 
				+CONFIG -= app_bundle
			
 
				+
			
 
				+# The following define makes your compiler emit warnings if you use
			
 
				+# any Qt feature that has been marked deprecated (the exact warnings
			
 
				+# depend on your compiler). Please consult the documentation of the
			
 
				+# deprecated API in order to know how to port your code away from it.
			
 
				+DEFINES += QT_DEPRECATED_WARNINGS
			
 
				+
			
 
				+# You can also make your code fail to compile if it uses deprecated APIs.
			
 
				+# In order to do so, uncomment the following line.
			
 
				+# You can also select to disable deprecated APIs only up to a certain version of Qt.
			
 
				+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
			
 
				+
			
 
				+SOURCES += \
			
 
				+        lib/centerpoint_trt.cpp \
			
 
				+        lib/network/network_trt.cpp \
			
 
				+        lib/network/tensorrt_wrapper.cpp \
			
 
				+        lib/preprocess/pointcloud_densification.cpp \
			
 
				+        lib/preprocess/voxel_generator.cpp \
			
 
				+        lib/utils.cpp \
			
 
				+        main.cpp \
			
 
				+        tensorrt_common/tensorrt_common.cpp
			
 
				+
			
 
				+# Default rules for deployment.
			
 
				+qnx: target.path = /tmp/$${TARGET}/bin
			
 
				+else: unix:!android: target.path = /opt/$${TARGET}/bin
			
 
				+!isEmpty(target.path): INSTALLS += target
			
 
				+
			
 
				+DISTFILES += \
			
 
				+    lib/network/scatter_kernel.cu \
			
 
				+    lib/postprocess/circle_nms_kernel.cu \
			
 
				+    lib/postprocess/postprocess_kernel.cu \
			
 
				+    lib/preprocess/preprocess_kernel.cu
			
 
				+
			
 
				+
			
 
				+INCLUDEPATH += /usr/include/pcl-1.7
			
 
				+INCLUDEPATH += /usr/include/pcl-1.8
			
 
				+INCLUDEPATH += /usr/include/eigen3
			
 
				+INCLUDEPATH += /usr/include/opencv4
			
 
				+
			
 
				+#INCLUDEPATH += /opt/ros/kinetic/include/opencv-3.3.1-dev
			
 
				+
			
 
				+INCLUDEPATH += /usr/include/pcl-1.10
			
 
				+
			
 
				+
			
 
				+INCLUDEPATH += /usr/local/cuda-11.4/targets/aarch64-linux/include
			
 
				+
			
 
				+LIBS += -L/usr/local/cuda-11.4/targets/aarch64-linux/lib  # -lcublas
			
 
				+
			
 
				+INCLUDEPATH += /usr/local/cuda-10.2/targets/aarch64-linux/include
			
 
				+
			
 
				+
			
 
				+INCLUDEPATH += $$PWD/include
			
 
				+
			
 
				+INCLUDEPATH +=$$PWD/perception_utils/include
			
 
				+
			
 
				+LIBS += -lrt -ldl -lnvinfer -lcudnn  -lcudart -lnvparsers -lnvonnxparser -lnvinfer_plugin -lstdc++fs
			
 
				+
			
 
				+
			
 
				+CUDA_SOURCES += \
			
 
				+    lib/network/scatter_kernel.cu \
			
 
				+    lib/postprocess/circle_nms_kernel.cu \
			
 
				+    lib/postprocess/postprocess_kernel.cu \
			
 
				+    lib/preprocess/preprocess_kernel.cu
			
 
				+
			
 
				+LIBS += -L"/usr/local/lib" \
			
 
				+        -L"/usr/local/cuda/lib64" \
			
 
				+        -lcudart \
			
 
				+        -lcufft
			
 
				+
			
 
				+CUDA_SDK = "/usr/local/cuda-11.4/"   # cudaSDK路径
			
 
				+
			
 
				+CUDA_DIR = "/usr/local/cuda-11.4/"            # CUDA tookit路径
			
 
				+
			
 
				+SYSTEM_NAME = linux         # 自己系统环境 'Win32', 'x64', or 'Win64'
			
 
				+
			
 
				+SYSTEM_TYPE = 64           #操作系统位数 '32' or '64',
			
 
				+
			
 
				+CUDA_ARCH = sm_72          # cuda架构, for example 'compute_10', 'compute_11', 'sm_10'
			
 
				+
			
 
				+NVCC_OPTIONS = --use_fast_math --compiler-options "-fPIC"
			
 
				+
			
 
				+
			
 
				+# include paths
			
 
				+
			
 
				+INCLUDEPATH += $$CUDA_DIR/include
			
 
				+#INCLUDEPATH += /usr/local/cuda-10.0/targets/aarch64-linux/include/crt
			
 
				+
			
 
				+# library directories
			
 
				+
			
 
				+QMAKE_LIBDIR += $$CUDA_DIR/lib/
			
 
				+
			
 
				+CUDA_OBJECTS_DIR = ./
			
 
				+
			
 
				+# The following library conflicts with something in Cuda
			
 
				+
			
 
				+#QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib
			
 
				+
			
 
				+#QMAKE_LFLAGS_DEBUG   = /NODEFAULTLIB:msvcrtd.lib
			
 
				+
			
 
				+# Add the necessary libraries
			
 
				+
			
 
				+CUDA_LIBS =  cudart cufft
			
 
				+
			
 
				+# The following makes sure all path names (which often include spaces) are put between quotation marks
			
 
				+
			
 
				+CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
			
 
				+
			
 
				+NVCC_LIBS = $$join(CUDA_LIBS,' -l','-l', '')
			
 
				+
			
 
				+#LIBS += $$join(CUDA_LIBS,'.so ', '', '.so')
			
 
				+
			
 
				+# Configuration of the Cuda compiler
			
 
				+
			
 
				+CONFIG(debug, debug|release) {
			
 
				+
			
 
				+    # Debug mode
			
 
				+
			
 
				+    cuda_d.input = CUDA_SOURCES
			
 
				+
			
 
				+    cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
			
 
				+
			
 
				+    cuda_d.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
			
 
				+
			
 
				+    cuda_d.dependency_type = TYPE_C
			
 
				+
			
 
				+    QMAKE_EXTRA_COMPILERS += cuda_d
			
 
				+
			
 
				+}
			
 
				+
			
 
				+else {
			
 
				+
			
 
				+    # Release mode
			
 
				+
			
 
				+    cuda.input = CUDA_SOURCES
			
 
				+
			
 
				+    cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
			
 
				+
			
 
				+    cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -O3 -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
			
 
				+
			
 
				+    cuda.dependency_type = TYPE_C
			
 
				+
			
 
				+    QMAKE_EXTRA_COMPILERS += cuda
			
 
				+
			
 
				+}
			
 
				+
			
 
				+HEADERS += \
			
 
				+    include/lidar_centerpoint/centerpoint_trt.hpp \
			
 
				+    include/lidar_centerpoint/network/network_trt.hpp \
			
 
				+    include/lidar_centerpoint/network/scatter_kernel.hpp \
			
 
				+    include/lidar_centerpoint/network/tensorrt_wrapper.hpp \
			
 
				+    include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp \
			
 
				+    include/lidar_centerpoint/postprocess/non_maximum_suppression.hpp \
			
 
				+    include/lidar_centerpoint/postprocess/postprocess_kernel.hpp \
			
 
				+    include/lidar_centerpoint/preprocess/pointcloud_densification.hpp \
			
 
				+    include/lidar_centerpoint/preprocess/preprocess_kernel.hpp \
			
 
				+    include/lidar_centerpoint/preprocess/voxel_generator.hpp
			
 
				+
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp
@@ -0,0 +1,124 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_
			
 
				+#define LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_
			
 
				+
			
 
				+#include <cstddef>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class CenterPointConfig
			
 
				+{
			
 
				+public:
			
 
				+  explicit CenterPointConfig(
			
 
				+    const std::size_t class_size, const float point_feature_size, const std::size_t max_voxel_size,
			
 
				+    const std::vector<double> & point_cloud_range, const std::vector<double> & voxel_size,
			
 
				+    const std::size_t downsample_factor, const std::size_t encoder_in_feature_size,
			
 
				+    const float score_threshold, const float circle_nms_dist_threshold,
			
 
				+    const std::vector<double> yaw_norm_thresholds)
			
 
				+  {
			
 
				+    class_size_ = class_size;
			
 
				+    point_feature_size_ = point_feature_size;
			
 
				+    max_voxel_size_ = max_voxel_size;
			
 
				+    if (point_cloud_range.size() == 6) {
			
 
				+      range_min_x_ = static_cast<float>(point_cloud_range[0]);
			
 
				+      range_min_y_ = static_cast<float>(point_cloud_range[1]);
			
 
				+      range_min_z_ = static_cast<float>(point_cloud_range[2]);
			
 
				+      range_max_x_ = static_cast<float>(point_cloud_range[3]);
			
 
				+      range_max_y_ = static_cast<float>(point_cloud_range[4]);
			
 
				+      range_max_z_ = static_cast<float>(point_cloud_range[5]);
			
 
				+    }
			
 
				+    if (voxel_size.size() == 3) {
			
 
				+      voxel_size_x_ = static_cast<float>(voxel_size[0]);
			
 
				+      voxel_size_y_ = static_cast<float>(voxel_size[1]);
			
 
				+      voxel_size_z_ = static_cast<float>(voxel_size[2]);
			
 
				+    }
			
 
				+    downsample_factor_ = downsample_factor;
			
 
				+    encoder_in_feature_size_ = encoder_in_feature_size;
			
 
				+
			
 
				+    if (score_threshold > 0 && score_threshold < 1) {
			
 
				+      score_threshold_ = score_threshold;
			
 
				+    }
			
 
				+
			
 
				+    if (circle_nms_dist_threshold > 0) {
			
 
				+      circle_nms_dist_threshold_ = circle_nms_dist_threshold;
			
 
				+    }
			
 
				+
			
 
				+    yaw_norm_thresholds_ =
			
 
				+      std::vector<float>(yaw_norm_thresholds.begin(), yaw_norm_thresholds.end());
			
 
				+
			
 
				+    for (auto & yaw_norm_threshold : yaw_norm_thresholds_) {
			
 
				+      yaw_norm_threshold =
			
 
				+        (yaw_norm_threshold >= 0.f && yaw_norm_threshold < 1.f) ? yaw_norm_threshold : 0.f;
			
 
				+    }
			
 
				+
			
 
				+    grid_size_x_ = static_cast<std::size_t>((range_max_x_ - range_min_x_) / voxel_size_x_);
			
 
				+    grid_size_y_ = static_cast<std::size_t>((range_max_y_ - range_min_y_) / voxel_size_y_);
			
 
				+    grid_size_z_ = static_cast<std::size_t>((range_max_z_ - range_min_z_) / voxel_size_z_);
			
 
				+    offset_x_ = range_min_x_ + voxel_size_x_ / 2;
			
 
				+    offset_y_ = range_min_y_ + voxel_size_y_ / 2;
			
 
				+    offset_z_ = range_min_z_ + voxel_size_z_ / 2;
			
 
				+    down_grid_size_x_ = grid_size_x_ / downsample_factor_;
			
 
				+    down_grid_size_y_ = grid_size_y_ / downsample_factor_;
			
 
				+  };
			
 
				+
			
 
				+  // input params
			
 
				+  std::size_t class_size_{3};
			
 
				+  const std::size_t point_dim_size_{3};  // x, y and z
			
 
				+  std::size_t point_feature_size_{4};    // x, y, z and time-lag
			
 
				+  std::size_t max_point_in_voxel_size_{32};
			
 
				+  std::size_t max_voxel_size_{40000};
			
 
				+  float range_min_x_{-89.6f};
			
 
				+  float range_min_y_{-89.6f};
			
 
				+  float range_min_z_{-3.0f};
			
 
				+  float range_max_x_{89.6f};
			
 
				+  float range_max_y_{89.6f};
			
 
				+  float range_max_z_{5.0f};
			
 
				+  float voxel_size_x_{0.32f};
			
 
				+  float voxel_size_y_{0.32f};
			
 
				+  float voxel_size_z_{8.0f};
			
 
				+
			
 
				+  // network params
			
 
				+  const std::size_t batch_size_{1};
			
 
				+  std::size_t downsample_factor_{2};
			
 
				+  std::size_t encoder_in_feature_size_{9};
			
 
				+  const std::size_t encoder_out_feature_size_{32};
			
 
				+  const std::size_t head_out_size_{6};
			
 
				+  const std::size_t head_out_offset_size_{2};
			
 
				+  const std::size_t head_out_z_size_{1};
			
 
				+  const std::size_t head_out_dim_size_{3};
			
 
				+  const std::size_t head_out_rot_size_{2};
			
 
				+  const std::size_t head_out_vel_size_{2};
			
 
				+
			
 
				+  // post-process params
			
 
				+  float score_threshold_{0.35f};
			
 
				+  float circle_nms_dist_threshold_{1.5f};
			
 
				+  std::vector<float> yaw_norm_thresholds_{};
			
 
				+
			
 
				+  // calculated params
			
 
				+  std::size_t grid_size_x_ = (range_max_x_ - range_min_x_) / voxel_size_x_;
			
 
				+  std::size_t grid_size_y_ = (range_max_y_ - range_min_y_) / voxel_size_y_;
			
 
				+  std::size_t grid_size_z_ = (range_max_z_ - range_min_z_) / voxel_size_z_;
			
 
				+  float offset_x_ = range_min_x_ + voxel_size_x_ / 2;
			
 
				+  float offset_y_ = range_min_y_ + voxel_size_y_ / 2;
			
 
				+  float offset_z_ = range_min_z_ + voxel_size_z_ / 2;
			
 
				+  std::size_t down_grid_size_x_ = grid_size_x_ / downsample_factor_;
			
 
				+  std::size_t down_grid_size_y_ = grid_size_y_ / downsample_factor_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__CENTERPOINT_CONFIG_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/centerpoint_trt.hpp
@@ -0,0 +1,115 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_
			
 
				+#define LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/cuda_utils.hpp>
			
 
				+#include <lidar_centerpoint/network/network_trt.hpp>
			
 
				+#include <lidar_centerpoint/postprocess/postprocess_kernel.hpp>
			
 
				+#include <lidar_centerpoint/preprocess/voxel_generator.hpp>
			
 
				+
			
 
				+//#include <sensor_msgs/msg/point_cloud2.hpp>
			
 
				+
			
 
				+#include <pcl/point_cloud.h>
			
 
				+#include <pcl/point_types.h>
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <utility>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class NetworkParam
			
 
				+{
			
 
				+public:
			
 
				+  NetworkParam(std::string onnx_path, std::string engine_path, std::string trt_precision)
			
 
				+  : onnx_path_(std::move(onnx_path)),
			
 
				+    engine_path_(std::move(engine_path)),
			
 
				+    trt_precision_(std::move(trt_precision))
			
 
				+  {
			
 
				+  }
			
 
				+
			
 
				+  std::string onnx_path() const { return onnx_path_; }
			
 
				+  std::string engine_path() const { return engine_path_; }
			
 
				+  std::string trt_precision() const { return trt_precision_; }
			
 
				+
			
 
				+private:
			
 
				+  std::string onnx_path_;
			
 
				+  std::string engine_path_;
			
 
				+  std::string trt_precision_;
			
 
				+};
			
 
				+
			
 
				+class CenterPointTRT
			
 
				+{
			
 
				+public:
			
 
				+  explicit CenterPointTRT(
			
 
				+    const NetworkParam & encoder_param, const NetworkParam & head_param,
			
 
				+    const DensificationParam & densification_param, const CenterPointConfig & config);
			
 
				+
			
 
				+  ~CenterPointTRT();
			
 
				+
			
 
				+//  bool detect(
			
 
				+//    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer,
			
 
				+//    std::vector<Box3D> & det_boxes3d);
			
 
				+
			
 
				+  bool detect(
			
 
				+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr,
			
 
				+    std::vector<Box3D> & det_boxes3d);
			
 
				+
			
 
				+protected:
			
 
				+  void initPtr();
			
 
				+
			
 
				+//  virtual bool preprocess(
			
 
				+//    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
			
 
				+
			
 
				+  virtual bool preprocess(
			
 
				+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
			
 
				+
			
 
				+  void inference();
			
 
				+
			
 
				+  void postProcess(std::vector<Box3D> & det_boxes3d);
			
 
				+
			
 
				+  std::unique_ptr<VoxelGeneratorTemplate> vg_ptr_{nullptr};
			
 
				+  std::unique_ptr<VoxelEncoderTRT> encoder_trt_ptr_{nullptr};
			
 
				+  std::unique_ptr<HeadTRT> head_trt_ptr_{nullptr};
			
 
				+  std::unique_ptr<PostProcessCUDA> post_proc_ptr_{nullptr};
			
 
				+  cudaStream_t stream_{nullptr};
			
 
				+
			
 
				+  std::size_t class_size_{0};
			
 
				+  CenterPointConfig config_;
			
 
				+  std::size_t num_voxels_{0};
			
 
				+  std::size_t encoder_in_feature_size_{0};
			
 
				+  std::size_t spatial_features_size_{0};
			
 
				+  std::vector<float> voxels_;
			
 
				+  std::vector<int> coordinates_;
			
 
				+  std::vector<float> num_points_per_voxel_;
			
 
				+  cuda::unique_ptr<float[]> voxels_d_{nullptr};
			
 
				+  cuda::unique_ptr<int[]> coordinates_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> encoder_in_features_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> pillar_features_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> spatial_features_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_heatmap_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_offset_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_z_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
			
 
				+  cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__CENTERPOINT_TRT_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/cuda_utils.hpp
@@ -0,0 +1,120 @@
 
				+// Copyright 2020 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+/*
			
 
				+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a
			
 
				+ * copy of this software and associated documentation files (the "Software"),
			
 
				+ * to deal in the Software without restriction, including without limitation
			
 
				+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
			
 
				+ * and/or sell copies of the Software, and to permit persons to whom the
			
 
				+ * Software is furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
			
 
				+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * This code is licensed under CC0 1.0 Universal (Public Domain).
			
 
				+ * You can use this without any limitation.
			
 
				+ * https://creativecommons.org/publicdomain/zero/1.0/deed.en
			
 
				+ */
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__CUDA_UTILS_HPP_
			
 
				+#define LIDAR_CENTERPOINT__CUDA_UTILS_HPP_
			
 
				+
			
 
				+#include <cuda_runtime_api.h>
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <sstream>
			
 
				+#include <stdexcept>
			
 
				+#include <type_traits>
			
 
				+
			
 
				+#define CHECK_CUDA_ERROR(e) (cuda::check_error(e, __FILE__, __LINE__))
			
 
				+
			
 
				+namespace cuda
			
 
				+{
			
 
				+inline void check_error(const ::cudaError_t e, const char * f, int n)
			
 
				+{
			
 
				+  if (e != ::cudaSuccess) {
			
 
				+    std::stringstream s;
			
 
				+    s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": "
			
 
				+      << ::cudaGetErrorString(e);
			
 
				+    throw std::runtime_error{s.str()};
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+struct deleter
			
 
				+{
			
 
				+  void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); }
			
 
				+};
			
 
				+
			
 
				+template <typename T>
			
 
				+using unique_ptr = std::unique_ptr<T, deleter>;
			
 
				+
			
 
				+template <typename T>
			
 
				+typename std::enable_if<std::is_array<T>::value, cuda::unique_ptr<T>>::type make_unique(
			
 
				+  const std::size_t n)
			
 
				+{
			
 
				+  using U = typename std::remove_extent<T>::type;
			
 
				+  U * p;
			
 
				+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(U) * n));
			
 
				+  return cuda::unique_ptr<T>{p};
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+cuda::unique_ptr<T> make_unique()
			
 
				+{
			
 
				+  T * p;
			
 
				+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(T)));
			
 
				+  return cuda::unique_ptr<T>{p};
			
 
				+}
			
 
				+
			
 
				+constexpr size_t CUDA_ALIGN = 256;
			
 
				+
			
 
				+template <typename T>
			
 
				+inline size_t get_size_aligned(size_t num_elem)
			
 
				+{
			
 
				+  size_t size = num_elem * sizeof(T);
			
 
				+  size_t extra_align = 0;
			
 
				+  if (size % CUDA_ALIGN != 0) {
			
 
				+    extra_align = CUDA_ALIGN - size % CUDA_ALIGN;
			
 
				+  }
			
 
				+  return size + extra_align;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size)
			
 
				+{
			
 
				+  size_t size = get_size_aligned<T>(num_elem);
			
 
				+  if (size > workspace_size) {
			
 
				+    throw std::runtime_error("Workspace is too small!");
			
 
				+  }
			
 
				+  workspace_size -= size;
			
 
				+  T * ptr = reinterpret_cast<T *>(workspace);
			
 
				+  workspace = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(workspace) + size);
			
 
				+  return ptr;
			
 
				+}
			
 
				+
			
 
				+}  // namespace cuda
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__CUDA_UTILS_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/detection_class_remapper.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/detection_class_remapper.hpp
@@ -0,0 +1,47 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__DETECTION_CLASS_REMAPPER_HPP_
			
 
				+#define LIDAR_CENTERPOINT__DETECTION_CLASS_REMAPPER_HPP_
			
 
				+
			
 
				+#include <Eigen/Core>
			
 
				+
			
 
				+//#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
			
 
				+//#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
			
 
				+//#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
			
 
				+//#include <autoware_auto_perception_msgs/msg/shape.hpp>
			
 
				+
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+class DetectionClassRemapper
			
 
				+{
			
 
				+public:
			
 
				+  void setParameters(
			
 
				+    const std::vector<int64_t> & allow_remapping_by_area_matrix,
			
 
				+    const std::vector<double> & min_area_matrix, const std::vector<double> & max_area_matrix);
			
 
				+  void mapClasses(autoware_auto_perception_msgs::msg::DetectedObjects & msg);
			
 
				+
			
 
				+protected:
			
 
				+  Eigen::Matrix<bool, Eigen::Dynamic, Eigen::Dynamic> allow_remapping_by_area_matrix_;
			
 
				+  Eigen::MatrixXd min_area_matrix_;
			
 
				+  Eigen::MatrixXd max_area_matrix_;
			
 
				+  int num_labels_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__DETECTION_CLASS_REMAPPER_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/network_trt.hpp
@@ -0,0 +1,53 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_
			
 
				+#define LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+#include <lidar_centerpoint/network/tensorrt_wrapper.hpp>
			
 
				+
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class VoxelEncoderTRT : public TensorRTWrapper
			
 
				+{
			
 
				+public:
			
 
				+  using TensorRTWrapper::TensorRTWrapper;
			
 
				+
			
 
				+protected:
			
 
				+  bool setProfile(
			
 
				+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
			
 
				+    nvinfer1::IBuilderConfig & config) override;
			
 
				+};
			
 
				+
			
 
				+class HeadTRT : public TensorRTWrapper
			
 
				+{
			
 
				+public:
			
 
				+  using TensorRTWrapper::TensorRTWrapper;
			
 
				+
			
 
				+  HeadTRT(const std::vector<std::size_t> & out_channel_sizes, const CenterPointConfig & config);
			
 
				+
			
 
				+protected:
			
 
				+  bool setProfile(
			
 
				+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
			
 
				+    nvinfer1::IBuilderConfig & config) override;
			
 
				+
			
 
				+  std::vector<std::size_t> out_channel_sizes_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__NETWORK__NETWORK_TRT_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/scatter_kernel.hpp
@@ -0,0 +1,31 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_
			
 
				+#define LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_
			
 
				+
			
 
				+#include <cuda.h>
			
 
				+#include <cuda_runtime_api.h>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+cudaError_t scatterFeatures_launch(
			
 
				+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
			
 
				+  const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
			
 
				+  const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
			
 
				+  cudaStream_t stream);
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__NETWORK__SCATTER_KERNEL_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/network/tensorrt_wrapper.hpp
@@ -0,0 +1,68 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_
			
 
				+#define LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+#include <tensorrt_common/tensorrt_common.hpp>
			
 
				+
			
 
				+#include <NvInfer.h>
			
 
				+
			
 
				+#include <iostream>
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+class TensorRTWrapper
			
 
				+{
			
 
				+public:
			
 
				+  explicit TensorRTWrapper(const CenterPointConfig & config);
			
 
				+
			
 
				+  ~TensorRTWrapper();
			
 
				+
			
 
				+  bool init(
			
 
				+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision);
			
 
				+
			
 
				+  tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext> context_{nullptr};
			
 
				+
			
 
				+protected:
			
 
				+  virtual bool setProfile(
			
 
				+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
			
 
				+    nvinfer1::IBuilderConfig & config) = 0;
			
 
				+
			
 
				+  CenterPointConfig config_;
			
 
				+  tensorrt_common::Logger logger_;
			
 
				+
			
 
				+private:
			
 
				+  bool parseONNX(
			
 
				+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
			
 
				+    size_t workspace_size = (1ULL << 30));
			
 
				+
			
 
				+  bool saveEngine(const std::string & engine_path);
			
 
				+
			
 
				+  bool loadEngine(const std::string & engine_path);
			
 
				+
			
 
				+  bool createContext();
			
 
				+
			
 
				+  tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime> runtime_{nullptr};
			
 
				+  tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory> plan_{nullptr};
			
 
				+  tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine> engine_{nullptr};
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__NETWORK__TENSORRT_WRAPPER_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/node.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/node.hpp
@@ -0,0 +1,70 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__NODE_HPP_
			
 
				+#define LIDAR_CENTERPOINT__NODE_HPP_
			
 
				+
			
 
				+#include "lidar_centerpoint/postprocess/non_maximum_suppression.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_trt.hpp>
			
 
				+#include <lidar_centerpoint/detection_class_remapper.hpp>
			
 
				+#include <rclcpp/rclcpp.hpp>
			
 
				+#include <tier4_autoware_utils/ros/debug_publisher.hpp>
			
 
				+#include <tier4_autoware_utils/system/stop_watch.hpp>
			
 
				+
			
 
				+#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/shape.hpp>
			
 
				+#include <sensor_msgs/msg/point_cloud2.hpp>
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+class LidarCenterPointNode : public rclcpp::Node
			
 
				+{
			
 
				+public:
			
 
				+  explicit LidarCenterPointNode(const rclcpp::NodeOptions & node_options);
			
 
				+
			
 
				+private:
			
 
				+  void pointCloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr input_pointcloud_msg);
			
 
				+
			
 
				+  tf2_ros::Buffer tf_buffer_;
			
 
				+  tf2_ros::TransformListener tf_listener_{tf_buffer_};
			
 
				+
			
 
				+  rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr pointcloud_sub_;
			
 
				+  rclcpp::Publisher<autoware_auto_perception_msgs::msg::DetectedObjects>::SharedPtr objects_pub_;
			
 
				+
			
 
				+  float score_threshold_{0.0};
			
 
				+  std::vector<std::string> class_names_;
			
 
				+  bool has_twist_{false};
			
 
				+
			
 
				+  NonMaximumSuppression iou_bev_nms_;
			
 
				+  DetectionClassRemapper detection_class_remapper_;
			
 
				+
			
 
				+  std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
			
 
				+
			
 
				+  // debugger
			
 
				+  std::unique_ptr<tier4_autoware_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_{
			
 
				+    nullptr};
			
 
				+  std::unique_ptr<tier4_autoware_utils::DebugPublisher> debug_publisher_ptr_{nullptr};
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__NODE_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/circle_nms_kernel.hpp
@@ -0,0 +1,32 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
			
 
				+#define LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+#include <thrust/device_vector.h>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+// Non-maximum suppression (NMS) uses the distance on the xy plane instead of
			
 
				+// intersection over union (IoU) to suppress overlapped objects.
			
 
				+std::size_t circleNMS(
			
 
				+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
			
 
				+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream);
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/non_maximum_suppression.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/non_maximum_suppression.hpp
@@ -0,0 +1,81 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
			
 
				+#define LIDAR_CENTERPOINT__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
			
 
				+
			
 
				+//#include "lidar_centerpoint/ros_utils.hpp"
			
 
				+
			
 
				+#include <Eigen/Eigen>
			
 
				+
			
 
				+//#include "autoware_auto_perception_msgs/msg/detected_object.hpp"
			
 
				+
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+using autoware_auto_perception_msgs::msg::DetectedObject;
			
 
				+
			
 
				+// TODO(yukke42): now only support IoU_BEV
			
 
				+enum class NMS_TYPE {
			
 
				+  IoU_BEV
			
 
				+  // IoU_3D
			
 
				+  // Distance_2D
			
 
				+  // Distance_3D
			
 
				+};
			
 
				+
			
 
				+struct NMSParams
			
 
				+{
			
 
				+  NMS_TYPE nms_type_{};
			
 
				+  std::vector<std::string> target_class_names_{};
			
 
				+  double search_distance_2d_{};
			
 
				+  double iou_threshold_{};
			
 
				+  // double distance_threshold_{};
			
 
				+};
			
 
				+
			
 
				+std::vector<bool> classNamesToBooleanMask(const std::vector<std::string> & class_names)
			
 
				+{
			
 
				+  std::vector<bool> mask;
			
 
				+  constexpr std::size_t num_object_classification = 8;
			
 
				+  mask.resize(num_object_classification);
			
 
				+  for (const auto & class_name : class_names) {
			
 
				+    const auto semantic_type = getSemanticType(class_name);
			
 
				+    mask.at(semantic_type) = true;
			
 
				+  }
			
 
				+
			
 
				+  return mask;
			
 
				+}
			
 
				+
			
 
				+class NonMaximumSuppression
			
 
				+{
			
 
				+public:
			
 
				+  void setParameters(const NMSParams &);
			
 
				+
			
 
				+  std::vector<DetectedObject> apply(const std::vector<DetectedObject> &);
			
 
				+
			
 
				+private:
			
 
				+  bool isTargetLabel(const std::uint8_t);
			
 
				+
			
 
				+  bool isTargetPairObject(const DetectedObject &, const DetectedObject &);
			
 
				+
			
 
				+  Eigen::MatrixXd generateIoUMatrix(const std::vector<DetectedObject> &);
			
 
				+
			
 
				+  NMSParams params_{};
			
 
				+  std::vector<bool> target_class_mask_{};
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/postprocess/postprocess_kernel.hpp
@@ -0,0 +1,47 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
			
 
				+#define LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+#include <cuda.h>
			
 
				+#include <cuda_runtime_api.h>
			
 
				+#include <thrust/device_vector.h>
			
 
				+
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class PostProcessCUDA
			
 
				+{
			
 
				+public:
			
 
				+  explicit PostProcessCUDA(const CenterPointConfig & config);
			
 
				+
			
 
				+  cudaError_t generateDetectedBoxes3D_launch(
			
 
				+    const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
			
 
				+    const float * out_rot, const float * out_vel, std::vector<Box3D> & det_boxes3d,
			
 
				+    cudaStream_t stream);
			
 
				+
			
 
				+private:
			
 
				+  CenterPointConfig config_;
			
 
				+  thrust::device_vector<Box3D> boxes3d_d_;
			
 
				+  thrust::device_vector<float> yaw_norm_thresholds_d_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/pointcloud_densification.hpp
@@ -0,0 +1,100 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
			
 
				+#define LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
			
 
				+
			
 
				+//#include <tf2_ros/buffer.h>
			
 
				+//#include <tf2_ros/transform_listener.h>
			
 
				+#ifdef ROS_DISTRO_GALACTIC
			
 
				+#include <tf2_sensor_msgs/tf2_sensor_msgs.h>
			
 
				+#else
			
 
				+//#include <tf2_sensor_msgs/tf2_sensor_msgs.hpp>
			
 
				+#endif
			
 
				+
			
 
				+#include <pcl/point_cloud.h>
			
 
				+#include <pcl/point_types.h>
			
 
				+
			
 
				+#include <list>
			
 
				+#include <string>
			
 
				+#include <utility>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class DensificationParam
			
 
				+{
			
 
				+public:
			
 
				+  DensificationParam(const std::string & world_frame_id, const unsigned int num_past_frames)
			
 
				+  : world_frame_id_(std::move(world_frame_id)),
			
 
				+    pointcloud_cache_size_(num_past_frames + /*current frame*/ 1)
			
 
				+  {
			
 
				+  }
			
 
				+
			
 
				+  std::string world_frame_id() const { return world_frame_id_; }
			
 
				+  unsigned int pointcloud_cache_size() const { return pointcloud_cache_size_; }
			
 
				+
			
 
				+private:
			
 
				+  std::string world_frame_id_;
			
 
				+  unsigned int pointcloud_cache_size_{1};
			
 
				+};
			
 
				+
			
 
				+struct PointCloudWithTransform
			
 
				+{
			
 
				+  pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+//struct PointCloudWithTransform
			
 
				+//{
			
 
				+//  sensor_msgs::msg::PointCloud2 pointcloud_msg;
			
 
				+//  Eigen::Affine3f affine_past2world;
			
 
				+//};
			
 
				+
			
 
				+class PointCloudDensification
			
 
				+{
			
 
				+public:
			
 
				+  explicit PointCloudDensification(const DensificationParam & param);
			
 
				+
			
 
				+
			
 
				+   bool enqueuePointCloud(
			
 
				+      const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
			
 
				+//  bool enqueuePointCloud(
			
 
				+//    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
			
 
				+
			
 
				+  double getCurrentTimestamp() const { return current_timestamp_; }
			
 
				+  Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; }
			
 
				+  std::list<PointCloudWithTransform>::iterator getPointCloudCacheIter()
			
 
				+  {
			
 
				+    return pointcloud_cache_.begin();
			
 
				+  }
			
 
				+  bool isCacheEnd(std::list<PointCloudWithTransform>::iterator iter)
			
 
				+  {
			
 
				+    return iter == pointcloud_cache_.end();
			
 
				+  }
			
 
				+  unsigned int pointcloud_cache_size() const { return param_.pointcloud_cache_size(); }
			
 
				+
			
 
				+private:
			
 
				+//  void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine);
			
 
				+  void enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
			
 
				+  void dequeue();
			
 
				+
			
 
				+  DensificationParam param_;
			
 
				+  double current_timestamp_{0.0};
			
 
				+  Eigen::Affine3f affine_world2current_;
			
 
				+  std::list<PointCloudWithTransform> pointcloud_cache_;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
@@ -0,0 +1,31 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_
			
 
				+#define LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_
			
 
				+
			
 
				+#include <cuda.h>
			
 
				+#include <cuda_runtime_api.h>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+cudaError_t generateFeatures_launch(
			
 
				+  const float * voxel_features, const float * voxel_num_points, const int * coords,
			
 
				+  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
			
 
				+  const float voxel_size_y, const float voxel_size_z, const float range_min_x,
			
 
				+  const float range_min_y, const float range_min_z, float * features, cudaStream_t stream);
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__PREPROCESS__PREPROCESS_KERNEL_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/preprocess/voxel_generator.hpp
@@ -0,0 +1,65 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_
			
 
				+#define LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+#include <lidar_centerpoint/preprocess/pointcloud_densification.hpp>
			
 
				+
			
 
				+//#include <sensor_msgs/msg/point_cloud2.hpp>
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+class VoxelGeneratorTemplate
			
 
				+{
			
 
				+public:
			
 
				+  explicit VoxelGeneratorTemplate(
			
 
				+    const DensificationParam & param, const CenterPointConfig & config);
			
 
				+
			
 
				+  virtual std::size_t pointsToVoxels(
			
 
				+    std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				+    std::vector<float> & num_points_per_voxel) = 0;
			
 
				+
			
 
				+    bool enqueuePointCloud(
			
 
				+      const pcl::PointCloud<pcl::PointXYZI>::Ptr);
			
 
				+
			
 
				+//  bool enqueuePointCloud(
			
 
				+//    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
			
 
				+
			
 
				+protected:
			
 
				+  std::unique_ptr<PointCloudDensification> pd_ptr_{nullptr};
			
 
				+
			
 
				+  CenterPointConfig config_;
			
 
				+  std::array<float, 6> range_;
			
 
				+  std::array<int, 3> grid_size_;
			
 
				+  std::array<float, 3> recip_voxel_size_;
			
 
				+};
			
 
				+
			
 
				+class VoxelGenerator : public VoxelGeneratorTemplate
			
 
				+{
			
 
				+public:
			
 
				+  using VoxelGeneratorTemplate::VoxelGeneratorTemplate;
			
 
				+
			
 
				+  std::size_t pointsToVoxels(
			
 
				+    std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				+    std::vector<float> & num_points_per_voxel) override;
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__PREPROCESS__VOXEL_GENERATOR_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp
@@ -0,0 +1,43 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__ROS_UTILS_HPP_
			
 
				+#define LIDAR_CENTERPOINT__ROS_UTILS_HPP_
			
 
				+
			
 
				+// ros packages cannot be included from cuda.
			
 
				+
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/shape.hpp>
			
 
				+
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+void box3DToDetectedObject(
			
 
				+  const Box3D & box3d, const std::vector<std::string> & class_names, const bool has_twist,
			
 
				+  autoware_auto_perception_msgs::msg::DetectedObject & obj);
			
 
				+
			
 
				+uint8_t getSemanticType(const std::string & class_name);
			
 
				+
			
 
				+bool isCarLikeVehicleLabel(const uint8_t label);
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__ROS_UTILS_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/single_inference_node.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/single_inference_node.hpp
@@ -0,0 +1,63 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__SINGLE_INFERENCE_NODE_HPP_
			
 
				+#define LIDAR_CENTERPOINT__SINGLE_INFERENCE_NODE_HPP_
			
 
				+
			
 
				+#include <Eigen/Core>
			
 
				+#include <lidar_centerpoint/centerpoint_trt.hpp>
			
 
				+#include <lidar_centerpoint/detection_class_remapper.hpp>
			
 
				+#include <rclcpp/rclcpp.hpp>
			
 
				+
			
 
				+#include <autoware_auto_perception_msgs/msg/detected_object_kinematics.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/detected_objects.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
			
 
				+#include <autoware_auto_perception_msgs/msg/shape.hpp>
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+class SingleInferenceLidarCenterPointNode : public rclcpp::Node
			
 
				+{
			
 
				+public:
			
 
				+  explicit SingleInferenceLidarCenterPointNode(const rclcpp::NodeOptions & node_options);
			
 
				+
			
 
				+private:
			
 
				+  void detect(const std::string & pcd_path, const std::string & detections_path);
			
 
				+  std::vector<Eigen::Vector3d> getVertices(
			
 
				+    const autoware_auto_perception_msgs::msg::Shape & shape, const Eigen::Affine3d & pose) const;
			
 
				+  void dumpDetectionsAsMesh(
			
 
				+    const autoware_auto_perception_msgs::msg::DetectedObjects & objects_msg,
			
 
				+    const std::string & output_path) const;
			
 
				+
			
 
				+  tf2_ros::Buffer tf_buffer_;
			
 
				+  tf2_ros::TransformListener tf_listener_{tf_buffer_};
			
 
				+
			
 
				+  float score_threshold_{0.0};
			
 
				+  std::vector<std::string> class_names_;
			
 
				+  bool rename_car_to_truck_and_bus_{false};
			
 
				+  bool has_twist_{false};
			
 
				+
			
 
				+  DetectionClassRemapper detection_class_remapper_;
			
 
				+
			
 
				+  std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
			
 
				+};
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__SINGLE_INFERENCE_NODE_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/utils.hpp
+++ b/src/detection/detection_lidar_centerpoint/include/lidar_centerpoint/utils.hpp
@@ -0,0 +1,43 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef LIDAR_CENTERPOINT__UTILS_HPP_
			
 
				+#define LIDAR_CENTERPOINT__UTILS_HPP_
			
 
				+
			
 
				+#include <cstddef>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+struct Box3D
			
 
				+{
			
 
				+  // initializer not allowed for __shared__ variable
			
 
				+  int label;
			
 
				+  float score;
			
 
				+  float x;
			
 
				+  float y;
			
 
				+  float z;
			
 
				+  float length;
			
 
				+  float width;
			
 
				+  float height;
			
 
				+  float yaw;
			
 
				+  float vel_x;
			
 
				+  float vel_y;
			
 
				+};
			
 
				+
			
 
				+// cspell: ignore divup
			
 
				+std::size_t divup(const std::size_t a, const std::size_t b);
			
 
				+
			
 
				+}  // namespace centerpoint
			
 
				+
			
 
				+#endif  // LIDAR_CENTERPOINT__UTILS_HPP_
			
--- a/src/detection/detection_lidar_centerpoint/lib/centerpoint_trt.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/centerpoint_trt.cpp
@@ -0,0 +1,258 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/centerpoint_trt.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+#include <lidar_centerpoint/network/scatter_kernel.hpp>
			
 
				+#include <lidar_centerpoint/preprocess/preprocess_kernel.hpp>
			
 
				+//#include <tier4_autoware_utils/math/constants.hpp>
			
 
				+
			
 
				+#include <iostream>
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+CenterPointTRT::CenterPointTRT(
			
 
				+  const NetworkParam & encoder_param, const NetworkParam & head_param,
			
 
				+  const DensificationParam & densification_param, const CenterPointConfig & config)
			
 
				+: config_(config)
			
 
				+{
			
 
				+  vg_ptr_ = std::make_unique<VoxelGenerator>(densification_param, config_);
			
 
				+  post_proc_ptr_ = std::make_unique<PostProcessCUDA>(config_);
			
 
				+
			
 
				+  // encoder
			
 
				+  encoder_trt_ptr_ = std::make_unique<VoxelEncoderTRT>(config_);
			
 
				+  encoder_trt_ptr_->init(
			
 
				+    encoder_param.onnx_path(), encoder_param.engine_path(), encoder_param.trt_precision());
			
 
				+  encoder_trt_ptr_->context_->setBindingDimensions(
			
 
				+    0,
			
 
				+    nvinfer1::Dims3(
			
 
				+      config_.max_voxel_size_, config_.max_point_in_voxel_size_, config_.encoder_in_feature_size_));
			
 
				+
			
 
				+  // head
			
 
				+  std::vector<std::size_t> out_channel_sizes = {
			
 
				+    config_.class_size_,        config_.head_out_offset_size_, config_.head_out_z_size_,
			
 
				+    config_.head_out_dim_size_, config_.head_out_rot_size_,    config_.head_out_vel_size_};
			
 
				+  head_trt_ptr_ = std::make_unique<HeadTRT>(out_channel_sizes, config_);
			
 
				+  head_trt_ptr_->init(head_param.onnx_path(), head_param.engine_path(), head_param.trt_precision());
			
 
				+  head_trt_ptr_->context_->setBindingDimensions(
			
 
				+    0, nvinfer1::Dims4(
			
 
				+         config_.batch_size_, config_.encoder_out_feature_size_, config_.grid_size_y_,
			
 
				+         config_.grid_size_x_));
			
 
				+
			
 
				+  initPtr();
			
 
				+
			
 
				+  cudaStreamCreate(&stream_);
			
 
				+}
			
 
				+
			
 
				+CenterPointTRT::~CenterPointTRT()
			
 
				+{
			
 
				+  if (stream_) {
			
 
				+    cudaStreamSynchronize(stream_);
			
 
				+    cudaStreamDestroy(stream_);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void CenterPointTRT::initPtr()
			
 
				+{
			
 
				+  const auto voxels_size =
			
 
				+    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
			
 
				+  const auto coordinates_size = config_.max_voxel_size_ * config_.point_dim_size_;
			
 
				+  encoder_in_feature_size_ =
			
 
				+    config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_;
			
 
				+  const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_;
			
 
				+  spatial_features_size_ =
			
 
				+    config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_;
			
 
				+  const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_;
			
 
				+
			
 
				+  // host
			
 
				+  voxels_.resize(voxels_size);
			
 
				+  coordinates_.resize(coordinates_size);
			
 
				+  num_points_per_voxel_.resize(config_.max_voxel_size_);
			
 
				+
			
 
				+  // device
			
 
				+  voxels_d_ = cuda::make_unique<float[]>(voxels_size);
			
 
				+  coordinates_d_ = cuda::make_unique<int[]>(coordinates_size);
			
 
				+  num_points_per_voxel_d_ = cuda::make_unique<float[]>(config_.max_voxel_size_);
			
 
				+  encoder_in_features_d_ = cuda::make_unique<float[]>(encoder_in_feature_size_);
			
 
				+  pillar_features_d_ = cuda::make_unique<float[]>(pillar_features_size);
			
 
				+  spatial_features_d_ = cuda::make_unique<float[]>(spatial_features_size_);
			
 
				+  head_out_heatmap_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.class_size_);
			
 
				+  head_out_offset_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_offset_size_);
			
 
				+  head_out_z_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_z_size_);
			
 
				+  head_out_dim_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_dim_size_);
			
 
				+  head_out_rot_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_rot_size_);
			
 
				+  head_out_vel_d_ = cuda::make_unique<float[]>(grid_xy_size * config_.head_out_vel_size_);
			
 
				+}
			
 
				+
			
 
				+bool CenterPointTRT::detect(
			
 
				+  const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr,
			
 
				+  std::vector<Box3D> & det_boxes3d)
			
 
				+{
			
 
				+    std::fill(voxels_.begin(), voxels_.end(), 0);
			
 
				+    std::fill(coordinates_.begin(), coordinates_.end(), -1);
			
 
				+    std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
			
 
				+    CHECK_CUDA_ERROR(cudaMemsetAsync(
			
 
				+      encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
			
 
				+    CHECK_CUDA_ERROR(
			
 
				+      cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
			
 
				+
			
 
				+    if (!preprocess(pc_ptr)) {
			
 
				+        std::cout<<"Fail to preprocess and skip to detect."<<std::endl;
			
 
				+//      RCLCPP_WARN_STREAM(
			
 
				+//        rclcpp::get_logger("lidar_centerpoint"), "Fail to preprocess and skip to detect.");
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    inference();
			
 
				+
			
 
				+    postProcess(det_boxes3d);
			
 
				+
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+//bool CenterPointTRT::detect(
			
 
				+//  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer,
			
 
				+//  std::vector<Box3D> & det_boxes3d)
			
 
				+//{
			
 
				+//  std::fill(voxels_.begin(), voxels_.end(), 0);
			
 
				+//  std::fill(coordinates_.begin(), coordinates_.end(), -1);
			
 
				+//  std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0);
			
 
				+//  CHECK_CUDA_ERROR(cudaMemsetAsync(
			
 
				+//    encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_));
			
 
				+//  CHECK_CUDA_ERROR(
			
 
				+//    cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_));
			
 
				+
			
 
				+//  if (!preprocess(input_pointcloud_msg, tf_buffer)) {
			
 
				+//    RCLCPP_WARN_STREAM(
			
 
				+//      rclcpp::get_logger("lidar_centerpoint"), "Fail to preprocess and skip to detect.");
			
 
				+//    return false;
			
 
				+//  }
			
 
				+
			
 
				+//  inference();
			
 
				+
			
 
				+//  postProcess(det_boxes3d);
			
 
				+
			
 
				+//  return true;
			
 
				+//}
			
 
				+
			
 
				+bool CenterPointTRT::preprocess(
			
 
				+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
			
 
				+{
			
 
				+    bool is_success = vg_ptr_->enqueuePointCloud(pc_ptr);
			
 
				+    if (!is_success) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    num_voxels_ = vg_ptr_->pointsToVoxels(voxels_, coordinates_, num_points_per_voxel_);
			
 
				+    if (num_voxels_ == 0) {
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    const auto voxels_size =
			
 
				+      num_voxels_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
			
 
				+    const auto coordinates_size = num_voxels_ * config_.point_dim_size_;
			
 
				+    // memcpy from host to device (not copy empty voxels)
			
 
				+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				+      voxels_d_.get(), voxels_.data(), voxels_size * sizeof(float), cudaMemcpyHostToDevice));
			
 
				+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				+      coordinates_d_.get(), coordinates_.data(), coordinates_size * sizeof(int),
			
 
				+      cudaMemcpyHostToDevice));
			
 
				+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				+      num_points_per_voxel_d_.get(), num_points_per_voxel_.data(), num_voxels_ * sizeof(float),
			
 
				+      cudaMemcpyHostToDevice));
			
 
				+    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
			
 
				+
			
 
				+    CHECK_CUDA_ERROR(generateFeatures_launch(
			
 
				+      voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_,
			
 
				+      config_.max_voxel_size_, config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_,
			
 
				+      config_.range_min_x_, config_.range_min_y_, config_.range_min_z_, encoder_in_features_d_.get(),
			
 
				+      stream_));
			
 
				+
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+//bool CenterPointTRT::preprocess(
			
 
				+//  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
			
 
				+//{
			
 
				+//  bool is_success = vg_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer);
			
 
				+//  if (!is_success) {
			
 
				+//    return false;
			
 
				+//  }
			
 
				+//  num_voxels_ = vg_ptr_->pointsToVoxels(voxels_, coordinates_, num_points_per_voxel_);
			
 
				+//  if (num_voxels_ == 0) {
			
 
				+//    return false;
			
 
				+//  }
			
 
				+
			
 
				+//  const auto voxels_size =
			
 
				+//    num_voxels_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_;
			
 
				+//  const auto coordinates_size = num_voxels_ * config_.point_dim_size_;
			
 
				+//  // memcpy from host to device (not copy empty voxels)
			
 
				+//  CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				+//    voxels_d_.get(), voxels_.data(), voxels_size * sizeof(float), cudaMemcpyHostToDevice));
			
 
				+//  CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				+//    coordinates_d_.get(), coordinates_.data(), coordinates_size * sizeof(int),
			
 
				+//    cudaMemcpyHostToDevice));
			
 
				+//  CHECK_CUDA_ERROR(cudaMemcpyAsync(
			
 
				+//    num_points_per_voxel_d_.get(), num_points_per_voxel_.data(), num_voxels_ * sizeof(float),
			
 
				+//    cudaMemcpyHostToDevice));
			
 
				+//  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
			
 
				+
			
 
				+//  CHECK_CUDA_ERROR(generateFeatures_launch(
			
 
				+//    voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_,
			
 
				+//    config_.max_voxel_size_, config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_,
			
 
				+//    config_.range_min_x_, config_.range_min_y_, config_.range_min_z_, encoder_in_features_d_.get(),
			
 
				+//    stream_));
			
 
				+
			
 
				+//  return true;
			
 
				+//}
			
 
				+
			
 
				+void CenterPointTRT::inference()
			
 
				+{
			
 
				+  if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) {
			
 
				+    throw std::runtime_error("Failed to create tensorrt context.");
			
 
				+  }
			
 
				+
			
 
				+  // pillar encoder network
			
 
				+  std::vector<void *> encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()};
			
 
				+  encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr);
			
 
				+
			
 
				+  // scatter
			
 
				+  CHECK_CUDA_ERROR(scatterFeatures_launch(
			
 
				+    pillar_features_d_.get(), coordinates_d_.get(), num_voxels_, config_.max_voxel_size_,
			
 
				+    config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_,
			
 
				+    spatial_features_d_.get(), stream_));
			
 
				+
			
 
				+  // head network
			
 
				+  std::vector<void *> head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(),
			
 
				+                                      head_out_offset_d_.get(),  head_out_z_d_.get(),
			
 
				+                                      head_out_dim_d_.get(),     head_out_rot_d_.get(),
			
 
				+                                      head_out_vel_d_.get()};
			
 
				+  head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr);
			
 
				+}
			
 
				+
			
 
				+void CenterPointTRT::postProcess(std::vector<Box3D> & det_boxes3d)
			
 
				+{
			
 
				+  CHECK_CUDA_ERROR(post_proc_ptr_->generateDetectedBoxes3D_launch(
			
 
				+    head_out_heatmap_d_.get(), head_out_offset_d_.get(), head_out_z_d_.get(), head_out_dim_d_.get(),
			
 
				+    head_out_rot_d_.get(), head_out_vel_d_.get(), det_boxes3d, stream_));
			
 
				+  if (det_boxes3d.size() == 0) {
			
 
				+      std::cout<<"No detected boxes"<<std::endl;
			
 
				+//    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_centerpoint"), "No detected boxes.");
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/detection_class_remapper.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/detection_class_remapper.cpp
@@ -0,0 +1,71 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include <lidar_centerpoint/detection_class_remapper.hpp>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+void DetectionClassRemapper::setParameters(
			
 
				+  const std::vector<int64_t> & allow_remapping_by_area_matrix,
			
 
				+  const std::vector<double> & min_area_matrix, const std::vector<double> & max_area_matrix)
			
 
				+{
			
 
				+  assert(allow_remapping_by_area_matrix.size() == min_area_matrix.size());
			
 
				+  assert(allow_remapping_by_area_matrix.size() == max_area_matrix.size());
			
 
				+  assert(
			
 
				+    std::pow(static_cast<int>(std::sqrt(min_area_matrix.size())), 2) == min_area_matrix.size());
			
 
				+
			
 
				+  num_labels_ = static_cast<int>(std::sqrt(min_area_matrix.size()));
			
 
				+
			
 
				+  Eigen::Map<const Eigen::Matrix<int64_t, Eigen::Dynamic, Eigen::Dynamic>>
			
 
				+    allow_remapping_by_area_matrix_tmp(
			
 
				+      allow_remapping_by_area_matrix.data(), num_labels_, num_labels_);
			
 
				+  allow_remapping_by_area_matrix_ = allow_remapping_by_area_matrix_tmp.transpose()
			
 
				+                                      .cast<bool>();  // Eigen is column major by default
			
 
				+
			
 
				+  Eigen::Map<const Eigen::MatrixXd> min_area_matrix_tmp(
			
 
				+    min_area_matrix.data(), num_labels_, num_labels_);
			
 
				+  min_area_matrix_ = min_area_matrix_tmp.transpose();  // Eigen is column major by default
			
 
				+
			
 
				+  Eigen::Map<const Eigen::MatrixXd> max_area_matrix_tmp(
			
 
				+    max_area_matrix.data(), num_labels_, num_labels_);
			
 
				+  max_area_matrix_ = max_area_matrix_tmp.transpose();  // Eigen is column major by default
			
 
				+
			
 
				+  min_area_matrix_ = min_area_matrix_.unaryExpr(
			
 
				+    [](double v) { return std::isfinite(v) ? v : std::numeric_limits<double>::max(); });
			
 
				+  max_area_matrix_ = max_area_matrix_.unaryExpr(
			
 
				+    [](double v) { return std::isfinite(v) ? v : std::numeric_limits<double>::max(); });
			
 
				+}
			
 
				+
			
 
				+void DetectionClassRemapper::mapClasses(autoware_auto_perception_msgs::msg::DetectedObjects & msg)
			
 
				+{
			
 
				+  for (auto & object : msg.objects) {
			
 
				+    const float bev_area = object.shape.dimensions.x * object.shape.dimensions.y;
			
 
				+
			
 
				+    for (auto & classification : object.classification) {
			
 
				+      auto & label = classification.label;
			
 
				+
			
 
				+      for (int i = 0; i < num_labels_; ++i) {
			
 
				+        if (
			
 
				+          allow_remapping_by_area_matrix_(label, i) && bev_area >= min_area_matrix_(label, i) &&
			
 
				+          bev_area <= max_area_matrix_(label, i)) {
			
 
				+          label = i;
			
 
				+          break;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/network/network_trt.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/network/network_trt.cpp
@@ -0,0 +1,74 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/network/network_trt.hpp"
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+bool VoxelEncoderTRT::setProfile(
			
 
				+  nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
			
 
				+  nvinfer1::IBuilderConfig & config)
			
 
				+{
			
 
				+  auto profile = builder.createOptimizationProfile();
			
 
				+  auto in_name = network.getInput(0)->getName();
			
 
				+  auto in_dims = nvinfer1::Dims3(
			
 
				+    config_.max_voxel_size_, config_.max_point_in_voxel_size_, config_.encoder_in_feature_size_);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMIN, in_dims);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kOPT, in_dims);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMAX, in_dims);
			
 
				+
			
 
				+  auto out_name = network.getOutput(0)->getName();
			
 
				+  auto out_dims = nvinfer1::Dims2(config_.max_voxel_size_, config_.encoder_out_feature_size_);
			
 
				+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMIN, out_dims);
			
 
				+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kOPT, out_dims);
			
 
				+  profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMAX, out_dims);
			
 
				+  config.addOptimizationProfile(profile);
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+HeadTRT::HeadTRT(
			
 
				+  const std::vector<std::size_t> & out_channel_sizes, const CenterPointConfig & config)
			
 
				+: TensorRTWrapper(config), out_channel_sizes_(out_channel_sizes)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+bool HeadTRT::setProfile(
			
 
				+  nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
			
 
				+  nvinfer1::IBuilderConfig & config)
			
 
				+{
			
 
				+  auto profile = builder.createOptimizationProfile();
			
 
				+  auto in_name = network.getInput(0)->getName();
			
 
				+  auto in_dims = nvinfer1::Dims4(
			
 
				+    config_.batch_size_, config_.encoder_out_feature_size_, config_.grid_size_y_,
			
 
				+    config_.grid_size_x_);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMIN, in_dims);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kOPT, in_dims);
			
 
				+  profile->setDimensions(in_name, nvinfer1::OptProfileSelector::kMAX, in_dims);
			
 
				+
			
 
				+  for (std::size_t ci = 0; ci < out_channel_sizes_.size(); ci++) {
			
 
				+    auto out_name = network.getOutput(ci)->getName();
			
 
				+    auto out_dims = nvinfer1::Dims4(
			
 
				+      config_.batch_size_, out_channel_sizes_[ci], config_.down_grid_size_y_,
			
 
				+      config_.down_grid_size_x_);
			
 
				+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMIN, out_dims);
			
 
				+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kOPT, out_dims);
			
 
				+    profile->setDimensions(out_name, nvinfer1::OptProfileSelector::kMAX, out_dims);
			
 
				+  }
			
 
				+  config.addOptimizationProfile(profile);
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/network/scatter_kernel.cu
+++ b/src/detection/detection_lidar_centerpoint/lib/network/scatter_kernel.cu
@@ -0,0 +1,68 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/network/scatter_kernel.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+const std::size_t THREADS_PER_BLOCK = 32;
			
 
				+}  // namespace
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+__global__ void scatterFeatures_kernel(
			
 
				+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
			
 
				+  const std::size_t pillar_feature_size, const std::size_t grid_size_x,
			
 
				+  const std::size_t grid_size_y, float * scattered_features)
			
 
				+{
			
 
				+  // pillar_features: shape of (max_num_pillars, pillar_feature_size)
			
 
				+  // coords: shape of (max_num_pillars, 3)
			
 
				+  // scattered_features: shape of (num_pillars, grid_size_y, grid_size_x)
			
 
				+  const auto pillar_i = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
			
 
				+  const auto feature_i = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
			
 
				+
			
 
				+  if (pillar_i >= num_pillars || feature_i >= pillar_feature_size) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  const int3 coord = ((int3 *)coords)[pillar_i];  // zyx
			
 
				+  if (coord.x < 0) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  const auto feature = pillar_features[pillar_feature_size * pillar_i + feature_i];
			
 
				+  scattered_features[grid_size_y * grid_size_x * feature_i + grid_size_x * coord.y + coord.z] =
			
 
				+    feature;
			
 
				+}
			
 
				+
			
 
				+// cspell: ignore divup
			
 
				+cudaError_t scatterFeatures_launch(
			
 
				+  const float * pillar_features, const int * coords, const std::size_t num_pillars,
			
 
				+  const std::size_t max_voxel_size, const std::size_t encoder_out_feature_size,
			
 
				+  const std::size_t grid_size_x, const std::size_t grid_size_y, float * scattered_features,
			
 
				+  cudaStream_t stream)
			
 
				+{
			
 
				+  dim3 blocks(
			
 
				+    divup(max_voxel_size, THREADS_PER_BLOCK), divup(encoder_out_feature_size, THREADS_PER_BLOCK));
			
 
				+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
			
 
				+  scatterFeatures_kernel<<<blocks, threads, 0, stream>>>(
			
 
				+    pillar_features, coords, num_pillars, encoder_out_feature_size, grid_size_x, grid_size_y,
			
 
				+    scattered_features);
			
 
				+
			
 
				+  return cudaGetLastError();
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/network/tensorrt_wrapper.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/network/tensorrt_wrapper.cpp
@@ -0,0 +1,161 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/network/tensorrt_wrapper.hpp"
			
 
				+
			
 
				+#include <NvOnnxParser.h>
			
 
				+
			
 
				+#include <fstream>
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+TensorRTWrapper::TensorRTWrapper(const CenterPointConfig & config) : config_(config) {}
			
 
				+
			
 
				+TensorRTWrapper::~TensorRTWrapper()
			
 
				+{
			
 
				+  context_.reset();
			
 
				+  runtime_.reset();
			
 
				+  plan_.reset();
			
 
				+  engine_.reset();
			
 
				+}
			
 
				+
			
 
				+bool TensorRTWrapper::init(
			
 
				+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision)
			
 
				+{
			
 
				+  runtime_ =
			
 
				+    tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
			
 
				+  if (!runtime_) {
			
 
				+    std::cout << "Fail to create runtime" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  bool success;
			
 
				+  std::ifstream engine_file(engine_path);
			
 
				+  if (engine_file.is_open()) {
			
 
				+    success = loadEngine(engine_path);
			
 
				+  } else {
			
 
				+    success = parseONNX(onnx_path, engine_path, precision);
			
 
				+  }
			
 
				+  success &= createContext();
			
 
				+
			
 
				+  return success;
			
 
				+}
			
 
				+
			
 
				+bool TensorRTWrapper::createContext()
			
 
				+{
			
 
				+  if (!engine_) {
			
 
				+    std::cout << "Fail to create context: Engine isn't created" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  context_ =
			
 
				+    tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
			
 
				+  if (!context_) {
			
 
				+    std::cout << "Fail to create context" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool TensorRTWrapper::parseONNX(
			
 
				+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
			
 
				+  const size_t workspace_size)
			
 
				+{
			
 
				+  auto builder =
			
 
				+    tensorrt_common::TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
			
 
				+  if (!builder) {
			
 
				+    std::cout << "Fail to create builder" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  auto config =
			
 
				+    tensorrt_common::TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
			
 
				+  if (!config) {
			
 
				+    std::cout << "Fail to create config" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
			
 
				+  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, workspace_size);
			
 
				+#else
			
 
				+  config->setMaxWorkspaceSize(workspace_size);
			
 
				+#endif
			
 
				+  if (precision == "fp16") {
			
 
				+    if (builder->platformHasFastFp16()) {
			
 
				+      std::cout << "use TensorRT FP16 Inference" << std::endl;
			
 
				+      config->setFlag(nvinfer1::BuilderFlag::kFP16);
			
 
				+    } else {
			
 
				+      std::cout << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  const auto flag =
			
 
				+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
			
 
				+  auto network =
			
 
				+    tensorrt_common::TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
			
 
				+  if (!network) {
			
 
				+    std::cout << "Fail to create network" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  auto parser = tensorrt_common::TrtUniquePtr<nvonnxparser::IParser>(
			
 
				+    nvonnxparser::createParser(*network, logger_));
			
 
				+  parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR));
			
 
				+
			
 
				+  if (!setProfile(*builder, *network, *config)) {
			
 
				+    std::cout << "Fail to set profile" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  std::cout << "Applying optimizations and building TRT CUDA engine (" << onnx_path << ") ..."
			
 
				+            << std::endl;
			
 
				+  plan_ = tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory>(
			
 
				+    builder->buildSerializedNetwork(*network, *config));
			
 
				+  if (!plan_) {
			
 
				+    std::cout << "Fail to create serialized network" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+  engine_ = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(
			
 
				+    runtime_->deserializeCudaEngine(plan_->data(), plan_->size()));
			
 
				+  if (!engine_) {
			
 
				+    std::cout << "Fail to create engine" << std::endl;
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  return saveEngine(engine_path);
			
 
				+}
			
 
				+
			
 
				+bool TensorRTWrapper::saveEngine(const std::string & engine_path)
			
 
				+{
			
 
				+  std::cout << "Writing to " << engine_path << std::endl;
			
 
				+  std::ofstream file(engine_path, std::ios::out | std::ios::binary);
			
 
				+  file.write(reinterpret_cast<const char *>(plan_->data()), plan_->size());
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool TensorRTWrapper::loadEngine(const std::string & engine_path)
			
 
				+{
			
 
				+  std::ifstream engine_file(engine_path);
			
 
				+  std::stringstream engine_buffer;
			
 
				+  engine_buffer << engine_file.rdbuf();
			
 
				+  std::string engine_str = engine_buffer.str();
			
 
				+  engine_ = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(
			
 
				+    reinterpret_cast<const void *>(engine_str.data()), engine_str.size()));
			
 
				+  std::cout << "Loaded engine from " << engine_path << std::endl;
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/postprocess/circle_nms_kernel.cu
+++ b/src/detection/detection_lidar_centerpoint/lib/postprocess/circle_nms_kernel.cu
@@ -0,0 +1,145 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+// Modified from
			
 
				+// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu
			
 
				+
			
 
				+/*
			
 
				+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
			
 
				+Written by Shaoshuai Shi
			
 
				+All Rights Reserved 2019-2020.
			
 
				+*/
			
 
				+
			
 
				+#include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/cuda_utils.hpp>
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+#include <thrust/host_vector.h>
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+const std::size_t THREADS_PER_BLOCK_NMS = 16;
			
 
				+}  // namespace
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+__device__ inline float dist2dPow(const Box3D * a, const Box3D * b)
			
 
				+{
			
 
				+  return powf(a->x - b->x, 2) + powf(a->y - b->y, 2);
			
 
				+}
			
 
				+
			
 
				+// cspell: ignore divup
			
 
				+__global__ void circleNMS_Kernel(
			
 
				+  const Box3D * boxes, const std::size_t num_boxes3d, const std::size_t col_blocks,
			
 
				+  const float dist2d_pow_threshold, std::uint64_t * mask)
			
 
				+{
			
 
				+  // params: boxes (N,)
			
 
				+  // params: mask (N, divup(N/THREADS_PER_BLOCK_NMS))
			
 
				+
			
 
				+  const auto row_start = blockIdx.y;
			
 
				+  const auto col_start = blockIdx.x;
			
 
				+
			
 
				+  if (row_start > col_start) return;
			
 
				+
			
 
				+  const std::size_t row_size =
			
 
				+    fminf(num_boxes3d - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
			
 
				+  const std::size_t col_size =
			
 
				+    fminf(num_boxes3d - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
			
 
				+
			
 
				+  __shared__ Box3D block_boxes[THREADS_PER_BLOCK_NMS];
			
 
				+
			
 
				+  if (threadIdx.x < col_size) {
			
 
				+    block_boxes[threadIdx.x] = boxes[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x];
			
 
				+  }
			
 
				+  __syncthreads();
			
 
				+
			
 
				+  if (threadIdx.x < row_size) {
			
 
				+    const std::size_t cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
			
 
				+    const Box3D * cur_box = boxes + cur_box_idx;
			
 
				+
			
 
				+    std::uint64_t t = 0;
			
 
				+    std::size_t start = 0;
			
 
				+    if (row_start == col_start) {
			
 
				+      start = threadIdx.x + 1;
			
 
				+    }
			
 
				+    for (std::size_t i = start; i < col_size; i++) {
			
 
				+      if (dist2dPow(cur_box, block_boxes + i) < dist2d_pow_threshold) {
			
 
				+        t |= 1ULL << i;
			
 
				+      }
			
 
				+    }
			
 
				+    mask[cur_box_idx * col_blocks + col_start] = t;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+cudaError_t circleNMS_launch(
			
 
				+  const thrust::device_vector<Box3D> & boxes3d, const std::size_t num_boxes3d,
			
 
				+  std::size_t col_blocks, const float distance_threshold,
			
 
				+  thrust::device_vector<std::uint64_t> & mask, cudaStream_t stream)
			
 
				+{
			
 
				+  const float dist2d_pow_thres = powf(distance_threshold, 2);
			
 
				+
			
 
				+  dim3 blocks(col_blocks, col_blocks);
			
 
				+  dim3 threads(THREADS_PER_BLOCK_NMS);
			
 
				+  circleNMS_Kernel<<<blocks, threads, 0, stream>>>(
			
 
				+    thrust::raw_pointer_cast(boxes3d.data()), num_boxes3d, col_blocks, dist2d_pow_thres,
			
 
				+    thrust::raw_pointer_cast(mask.data()));
			
 
				+
			
 
				+  return cudaGetLastError();
			
 
				+}
			
 
				+
			
 
				+std::size_t circleNMS(
			
 
				+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
			
 
				+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream)
			
 
				+{
			
 
				+  const auto num_boxes3d = boxes3d.size();
			
 
				+  const auto col_blocks = divup(num_boxes3d, THREADS_PER_BLOCK_NMS);
			
 
				+  thrust::device_vector<std::uint64_t> mask_d(num_boxes3d * col_blocks);
			
 
				+
			
 
				+  CHECK_CUDA_ERROR(
			
 
				+    circleNMS_launch(boxes3d, num_boxes3d, col_blocks, distance_threshold, mask_d, stream));
			
 
				+
			
 
				+  // memcpy device to host
			
 
				+  thrust::host_vector<std::uint64_t> mask_h(mask_d.size());
			
 
				+  thrust::copy(mask_d.begin(), mask_d.end(), mask_h.begin());
			
 
				+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream));
			
 
				+
			
 
				+  // generate keep_mask
			
 
				+  std::vector<std::uint64_t> remv_h(col_blocks);
			
 
				+  thrust::host_vector<bool> keep_mask_h(keep_mask.size());
			
 
				+  std::size_t num_to_keep = 0;
			
 
				+  for (std::size_t i = 0; i < num_boxes3d; i++) {
			
 
				+    auto nblock = i / THREADS_PER_BLOCK_NMS;
			
 
				+    auto inblock = i % THREADS_PER_BLOCK_NMS;
			
 
				+
			
 
				+    if (!(remv_h[nblock] & (1ULL << inblock))) {
			
 
				+      keep_mask_h[i] = true;
			
 
				+      num_to_keep++;
			
 
				+      std::uint64_t * p = &mask_h[0] + i * col_blocks;
			
 
				+      for (std::size_t j = nblock; j < col_blocks; j++) {
			
 
				+        remv_h[j] |= p[j];
			
 
				+      }
			
 
				+    } else {
			
 
				+      keep_mask_h[i] = false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // memcpy host to device
			
 
				+  keep_mask = keep_mask_h;
			
 
				+
			
 
				+  return num_to_keep;
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/postprocess/non_maximum_suppression.cpp
@@ -0,0 +1,104 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/postprocess/non_maximum_suppression.hpp"
			
 
				+
			
 
				+#include "perception_utils/geometry.hpp"
			
 
				+#include "perception_utils/perception_utils.hpp"
			
 
				+//#include "tier4_autoware_utils/tier4_autoware_utils.hpp"
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+void NonMaximumSuppression::setParameters(const NMSParams & params)
			
 
				+{
			
 
				+  assert(params.search_distance_2d_ >= 0.0);
			
 
				+  assert(params.iou_threshold_ >= 0.0 && params.iou_threshold_ <= 1.0);
			
 
				+
			
 
				+  params_ = params;
			
 
				+  target_class_mask_ = classNamesToBooleanMask(params.target_class_names_);
			
 
				+}
			
 
				+
			
 
				+bool NonMaximumSuppression::isTargetLabel(const uint8_t label)
			
 
				+{
			
 
				+  if (label >= target_class_mask_.size()) {
			
 
				+    return false;
			
 
				+  }
			
 
				+  return target_class_mask_.at(label);
			
 
				+}
			
 
				+
			
 
				+bool NonMaximumSuppression::isTargetPairObject(
			
 
				+  const DetectedObject & object1, const DetectedObject & object2)
			
 
				+{
			
 
				+  const auto label1 = perception_utils::getHighestProbLabel(object1.classification);
			
 
				+  const auto label2 = perception_utils::getHighestProbLabel(object2.classification);
			
 
				+
			
 
				+  if (isTargetLabel(label1) && isTargetLabel(label2)) {
			
 
				+    return true;
			
 
				+  }
			
 
				+
			
 
				+  const auto search_sqr_dist_2d = params_.search_distance_2d_ * params_.search_distance_2d_;
			
 
				+  const auto sqr_dist_2d = tier4_autoware_utils::calcSquaredDistance2d(
			
 
				+    perception_utils::getPose(object1), perception_utils::getPose(object2));
			
 
				+  return sqr_dist_2d <= search_sqr_dist_2d;
			
 
				+}
			
 
				+
			
 
				+Eigen::MatrixXd NonMaximumSuppression::generateIoUMatrix(
			
 
				+  const std::vector<DetectedObject> & input_objects)
			
 
				+{
			
 
				+  // NOTE: row = target objects to be suppressed, col = source objects to be compared
			
 
				+  Eigen::MatrixXd triangular_matrix =
			
 
				+    Eigen::MatrixXd::Zero(input_objects.size(), input_objects.size());
			
 
				+  for (std::size_t target_i = 0; target_i < input_objects.size(); ++target_i) {
			
 
				+    for (std::size_t source_i = 0; source_i < target_i; ++source_i) {
			
 
				+      const auto & target_obj = input_objects.at(target_i);
			
 
				+      const auto & source_obj = input_objects.at(source_i);
			
 
				+      if (!isTargetPairObject(target_obj, source_obj)) {
			
 
				+        continue;
			
 
				+      }
			
 
				+
			
 
				+      if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
			
 
				+        const double iou = perception_utils::get2dIoU(target_obj, source_obj);
			
 
				+        triangular_matrix(target_i, source_i) = iou;
			
 
				+        // NOTE: If the target object has any objects with iou > iou_threshold, it
			
 
				+        // will be suppressed regardless of later results.
			
 
				+        if (iou > params_.iou_threshold_) {
			
 
				+          break;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return triangular_matrix;
			
 
				+}
			
 
				+
			
 
				+std::vector<DetectedObject> NonMaximumSuppression::apply(
			
 
				+  const std::vector<DetectedObject> & input_objects)
			
 
				+{
			
 
				+  Eigen::MatrixXd iou_matrix = generateIoUMatrix(input_objects);
			
 
				+
			
 
				+  std::vector<DetectedObject> output_objects;
			
 
				+  output_objects.reserve(input_objects.size());
			
 
				+  for (std::size_t i = 0; i < input_objects.size(); ++i) {
			
 
				+    const auto value = iou_matrix.row(i).maxCoeff();
			
 
				+    if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
			
 
				+      if (value <= params_.iou_threshold_) {
			
 
				+        output_objects.emplace_back(input_objects.at(i));
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return output_objects;
			
 
				+}
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/postprocess/postprocess_kernel.cu
+++ b/src/detection/detection_lidar_centerpoint/lib/postprocess/postprocess_kernel.cu
@@ -0,0 +1,166 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/postprocess/postprocess_kernel.hpp>
			
 
				+
			
 
				+#include <thrust/count.h>
			
 
				+#include <thrust/device_vector.h>
			
 
				+#include <thrust/sort.h>
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+const std::size_t THREADS_PER_BLOCK = 32;
			
 
				+}  // namespace
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+struct is_score_greater
			
 
				+{
			
 
				+  is_score_greater(float t) : t_(t) {}
			
 
				+
			
 
				+  __device__ bool operator()(const Box3D & b) { return b.score > t_; }
			
 
				+
			
 
				+private:
			
 
				+  float t_{0.0};
			
 
				+};
			
 
				+
			
 
				+struct is_kept
			
 
				+{
			
 
				+  __device__ bool operator()(const bool keep) { return keep; }
			
 
				+};
			
 
				+
			
 
				+struct score_greater
			
 
				+{
			
 
				+  __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; }
			
 
				+};
			
 
				+
			
 
				+__device__ inline float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); }
			
 
				+
			
 
				+__global__ void generateBoxes3D_kernel(
			
 
				+  const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
			
 
				+  const float * out_rot, const float * out_vel, const float voxel_size_x, const float voxel_size_y,
			
 
				+  const float range_min_x, const float range_min_y, const std::size_t down_grid_size_x,
			
 
				+  const std::size_t down_grid_size_y, const std::size_t downsample_factor, const int class_size,
			
 
				+  const float * yaw_norm_thresholds, Box3D * det_boxes3d)
			
 
				+{
			
 
				+  // generate boxes3d from the outputs of the network.
			
 
				+  // shape of out_*: (N, DOWN_GRID_SIZE_Y, DOWN_GRID_SIZE_X)
			
 
				+  // heatmap: N = class_size, offset: N = 2, z: N = 1, dim: N = 3, rot: N = 2, vel: N = 2
			
 
				+  const auto yi = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
			
 
				+  const auto xi = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
			
 
				+  const auto idx = down_grid_size_x * yi + xi;
			
 
				+  const auto down_grid_size = down_grid_size_y * down_grid_size_x;
			
 
				+
			
 
				+  if (yi >= down_grid_size_y || xi >= down_grid_size_x) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  int label = -1;
			
 
				+  float max_score = -1;
			
 
				+  for (int ci = 0; ci < class_size; ci++) {
			
 
				+    float score = sigmoid(out_heatmap[down_grid_size * ci + idx]);
			
 
				+    if (score > max_score) {
			
 
				+      label = ci;
			
 
				+      max_score = score;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  const float offset_x = out_offset[down_grid_size * 0 + idx];
			
 
				+  const float offset_y = out_offset[down_grid_size * 1 + idx];
			
 
				+  const float x = voxel_size_x * downsample_factor * (xi + offset_x) + range_min_x;
			
 
				+  const float y = voxel_size_y * downsample_factor * (yi + offset_y) + range_min_y;
			
 
				+  const float z = out_z[idx];
			
 
				+  const float w = out_dim[down_grid_size * 0 + idx];
			
 
				+  const float l = out_dim[down_grid_size * 1 + idx];
			
 
				+  const float h = out_dim[down_grid_size * 2 + idx];
			
 
				+  const float yaw_sin = out_rot[down_grid_size * 0 + idx];
			
 
				+  const float yaw_cos = out_rot[down_grid_size * 1 + idx];
			
 
				+  const float yaw_norm = sqrtf(yaw_sin * yaw_sin + yaw_cos * yaw_cos);
			
 
				+  const float vel_x = out_vel[down_grid_size * 0 + idx];
			
 
				+  const float vel_y = out_vel[down_grid_size * 1 + idx];
			
 
				+
			
 
				+  det_boxes3d[idx].label = label;
			
 
				+  det_boxes3d[idx].score = yaw_norm >= yaw_norm_thresholds[label] ? max_score : 0.f;
			
 
				+  det_boxes3d[idx].x = x;
			
 
				+  det_boxes3d[idx].y = y;
			
 
				+  det_boxes3d[idx].z = z;
			
 
				+  det_boxes3d[idx].length = expf(l);
			
 
				+  det_boxes3d[idx].width = expf(w);
			
 
				+  det_boxes3d[idx].height = expf(h);
			
 
				+  det_boxes3d[idx].yaw = atan2f(yaw_sin, yaw_cos);
			
 
				+  det_boxes3d[idx].vel_x = vel_x;
			
 
				+  det_boxes3d[idx].vel_y = vel_y;
			
 
				+}
			
 
				+
			
 
				+PostProcessCUDA::PostProcessCUDA(const CenterPointConfig & config) : config_(config)
			
 
				+{
			
 
				+  const auto num_raw_boxes3d = config.down_grid_size_y_ * config.down_grid_size_x_;
			
 
				+  boxes3d_d_ = thrust::device_vector<Box3D>(num_raw_boxes3d);
			
 
				+  yaw_norm_thresholds_d_ = thrust::device_vector<float>(
			
 
				+    config_.yaw_norm_thresholds_.begin(), config_.yaw_norm_thresholds_.end());
			
 
				+}
			
 
				+
			
 
				+// cspell: ignore divup
			
 
				+cudaError_t PostProcessCUDA::generateDetectedBoxes3D_launch(
			
 
				+  const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
			
 
				+  const float * out_rot, const float * out_vel, std::vector<Box3D> & det_boxes3d,
			
 
				+  cudaStream_t stream)
			
 
				+{
			
 
				+  dim3 blocks(
			
 
				+    divup(config_.down_grid_size_y_, THREADS_PER_BLOCK),
			
 
				+    divup(config_.down_grid_size_x_, THREADS_PER_BLOCK));
			
 
				+  dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
			
 
				+  generateBoxes3D_kernel<<<blocks, threads, 0, stream>>>(
			
 
				+    out_heatmap, out_offset, out_z, out_dim, out_rot, out_vel, config_.voxel_size_x_,
			
 
				+    config_.voxel_size_y_, config_.range_min_x_, config_.range_min_y_, config_.down_grid_size_x_,
			
 
				+    config_.down_grid_size_y_, config_.downsample_factor_, config_.class_size_,
			
 
				+    thrust::raw_pointer_cast(yaw_norm_thresholds_d_.data()),
			
 
				+    thrust::raw_pointer_cast(boxes3d_d_.data()));
			
 
				+
			
 
				+  // suppress by score
			
 
				+  const auto num_det_boxes3d = thrust::count_if(
			
 
				+    thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(),
			
 
				+    is_score_greater(config_.score_threshold_));
			
 
				+  if (num_det_boxes3d == 0) {
			
 
				+    return cudaGetLastError();
			
 
				+  }
			
 
				+  thrust::device_vector<Box3D> det_boxes3d_d(num_det_boxes3d);
			
 
				+  thrust::copy_if(
			
 
				+    thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), det_boxes3d_d.begin(),
			
 
				+    is_score_greater(config_.score_threshold_));
			
 
				+
			
 
				+  // sort by score
			
 
				+  thrust::sort(det_boxes3d_d.begin(), det_boxes3d_d.end(), score_greater());
			
 
				+
			
 
				+  // supress by NMS
			
 
				+  thrust::device_vector<bool> final_keep_mask_d(num_det_boxes3d);
			
 
				+  const auto num_final_det_boxes3d =
			
 
				+    circleNMS(det_boxes3d_d, config_.circle_nms_dist_threshold_, final_keep_mask_d, stream);
			
 
				+
			
 
				+  thrust::device_vector<Box3D> final_det_boxes3d_d(num_final_det_boxes3d);
			
 
				+  thrust::copy_if(
			
 
				+    thrust::device, det_boxes3d_d.begin(), det_boxes3d_d.end(), final_keep_mask_d.begin(),
			
 
				+    final_det_boxes3d_d.begin(), is_kept());
			
 
				+
			
 
				+  // memcpy device to host
			
 
				+  det_boxes3d.resize(num_final_det_boxes3d);
			
 
				+  thrust::copy(final_det_boxes3d_d.begin(), final_det_boxes3d_d.end(), det_boxes3d.begin());
			
 
				+
			
 
				+  return cudaGetLastError();
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp
@@ -0,0 +1,118 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/preprocess/pointcloud_densification.hpp"
			
 
				+
			
 
				+//#include <pcl_ros/transforms.hpp>
			
 
				+
			
 
				+#include <boost/optional.hpp>
			
 
				+
			
 
				+//#include <pcl_conversions/pcl_conversions.h>
			
 
				+#ifdef ROS_DISTRO_GALACTIC
			
 
				+#include <tf2_eigen/tf2_eigen.h>
			
 
				+#else
			
 
				+//#include <tf2_eigen/tf2_eigen.hpp>
			
 
				+#endif
			
 
				+
			
 
				+#include <string>
			
 
				+#include <utility>
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+//boost::optional<geometry_msgs::msg::Transform> getTransform(
			
 
				+//  const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id,
			
 
				+//  const std::string & source_frame_id, const rclcpp::Time & time)
			
 
				+//{
			
 
				+//  try {
			
 
				+//    geometry_msgs::msg::TransformStamped transform_stamped;
			
 
				+//    transform_stamped = tf_buffer.lookupTransform(
			
 
				+//      target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5));
			
 
				+//    return transform_stamped.transform;
			
 
				+//  } catch (tf2::TransformException & ex) {
			
 
				+//    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_centerpoint"), ex.what());
			
 
				+//    return boost::none;
			
 
				+//  }
			
 
				+//}
			
 
				+
			
 
				+//Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t)
			
 
				+//{
			
 
				+//  Eigen::Affine3f a;
			
 
				+//  a.matrix() = tf2::transformToEigen(t).matrix().cast<float>();
			
 
				+//  return a;
			
 
				+//}
			
 
				+
			
 
				+}  // namespace
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+PointCloudDensification::PointCloudDensification(const DensificationParam & param) : param_(param)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+
			
 
				+bool PointCloudDensification::enqueuePointCloud(
			
 
				+   const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
			
 
				+{
			
 
				+    enqueue(pc_ptr);
			
 
				+    dequeue();
			
 
				+
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+//bool PointCloudDensification::enqueuePointCloud(
			
 
				+//  const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
			
 
				+//{
			
 
				+//  const auto header = pointcloud_msg.header;
			
 
				+
			
 
				+//  if (param_.pointcloud_cache_size() > 1) {
			
 
				+//    auto transform_world2current =
			
 
				+//      getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp);
			
 
				+//    if (!transform_world2current) {
			
 
				+//      return false;
			
 
				+//    }
			
 
				+//    auto affine_world2current = transformToEigen(transform_world2current.get());
			
 
				+
			
 
				+//    enqueue(pointcloud_msg, affine_world2current);
			
 
				+//  } else {
			
 
				+//    enqueue(pointcloud_msg, Eigen::Affine3f::Identity());
			
 
				+//  }
			
 
				+
			
 
				+//  dequeue();
			
 
				+
			
 
				+//  return true;
			
 
				+//}
			
 
				+
			
 
				+
			
 
				+void PointCloudDensification::enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
			
 
				+{
			
 
				+    PointCloudWithTransform pointcloud = {pc_ptr};
			
 
				+    pointcloud_cache_.push_front(pointcloud);
			
 
				+}
			
 
				+//void PointCloudDensification::enqueue(
			
 
				+//  const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current)
			
 
				+//{
			
 
				+//  affine_world2current_ = affine_world2current;
			
 
				+//  current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
			
 
				+//  PointCloudWithTransform pointcloud = {msg, affine_world2current.inverse()};
			
 
				+//  pointcloud_cache_.push_front(pointcloud);
			
 
				+//}
			
 
				+
			
 
				+void PointCloudDensification::dequeue()
			
 
				+{
			
 
				+  if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) {
			
 
				+    pointcloud_cache_.pop_back();
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
+++ b/src/detection/detection_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
@@ -0,0 +1,160 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+/*
			
 
				+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
			
 
				+ * SPDX-License-Identifier: Apache-2.0
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ * http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+#include "lidar_centerpoint/preprocess/preprocess_kernel.hpp"
			
 
				+
			
 
				+#include <lidar_centerpoint/utils.hpp>
			
 
				+
			
 
				+namespace
			
 
				+{
			
 
				+const std::size_t MAX_POINT_IN_VOXEL_SIZE = 32;  // the same as max_point_in_voxel_size_ in config
			
 
				+const std::size_t WARPS_PER_BLOCK = 4;
			
 
				+const std::size_t ENCODER_IN_FEATURE_SIZE = 9;  // the same as encoder_in_feature_size_ in config
			
 
				+}  // namespace
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+__global__ void generateFeatures_kernel(
			
 
				+  const float * voxel_features, const float * voxel_num_points, const int * coords,
			
 
				+  const std::size_t num_voxels, const float voxel_x, const float voxel_y, const float voxel_z,
			
 
				+  const float range_min_x, const float range_min_y, const float range_min_z, float * features)
			
 
				+{
			
 
				+  // voxel_features (float): (max_voxel_size, max_point_in_voxel_size, point_feature_size)
			
 
				+  // voxel_num_points (int): (max_voxel_size)
			
 
				+  // coords (int): (max_voxel_size, point_dim_size)
			
 
				+  int pillar_idx = blockIdx.x * WARPS_PER_BLOCK + threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;
			
 
				+  int point_idx = threadIdx.x % MAX_POINT_IN_VOXEL_SIZE;
			
 
				+  int pillar_idx_inBlock = threadIdx.x / MAX_POINT_IN_VOXEL_SIZE;  // max_point_in_voxel_size
			
 
				+
			
 
				+  if (pillar_idx >= num_voxels) return;
			
 
				+
			
 
				+  // load src
			
 
				+  __shared__ float4 pillarSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE];
			
 
				+  __shared__ float3 pillarSumSM[WARPS_PER_BLOCK];
			
 
				+  __shared__ int3 cordsSM[WARPS_PER_BLOCK];
			
 
				+  __shared__ int pointsNumSM[WARPS_PER_BLOCK];
			
 
				+  __shared__ float pillarOutSM[WARPS_PER_BLOCK][MAX_POINT_IN_VOXEL_SIZE][ENCODER_IN_FEATURE_SIZE];
			
 
				+
			
 
				+  if (threadIdx.x < WARPS_PER_BLOCK) {
			
 
				+    pointsNumSM[threadIdx.x] = voxel_num_points[blockIdx.x * WARPS_PER_BLOCK + threadIdx.x];
			
 
				+    cordsSM[threadIdx.x] = ((int3 *)coords)[blockIdx.x * WARPS_PER_BLOCK + threadIdx.x];
			
 
				+    pillarSumSM[threadIdx.x] = {0, 0, 0};
			
 
				+  }
			
 
				+
			
 
				+  pillarSM[pillar_idx_inBlock][point_idx] =
			
 
				+    ((float4 *)voxel_features)[pillar_idx * MAX_POINT_IN_VOXEL_SIZE + point_idx];
			
 
				+  __syncthreads();
			
 
				+
			
 
				+  // calculate sm in a pillar
			
 
				+  if (point_idx < pointsNumSM[pillar_idx_inBlock]) {
			
 
				+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].x), pillarSM[pillar_idx_inBlock][point_idx].x);
			
 
				+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].y), pillarSM[pillar_idx_inBlock][point_idx].y);
			
 
				+    atomicAdd(&(pillarSumSM[pillar_idx_inBlock].z), pillarSM[pillar_idx_inBlock][point_idx].z);
			
 
				+  }
			
 
				+  __syncthreads();
			
 
				+
			
 
				+  // feature-mean
			
 
				+  float3 mean;
			
 
				+  float validPoints = pointsNumSM[pillar_idx_inBlock];
			
 
				+  mean.x = pillarSumSM[pillar_idx_inBlock].x / validPoints;
			
 
				+  mean.y = pillarSumSM[pillar_idx_inBlock].y / validPoints;
			
 
				+  mean.z = pillarSumSM[pillar_idx_inBlock].z / validPoints;
			
 
				+
			
 
				+  mean.x = pillarSM[pillar_idx_inBlock][point_idx].x - mean.x;
			
 
				+  mean.y = pillarSM[pillar_idx_inBlock][point_idx].y - mean.y;
			
 
				+  mean.z = pillarSM[pillar_idx_inBlock][point_idx].z - mean.z;
			
 
				+
			
 
				+  // calculate offset
			
 
				+  float x_offset = voxel_x / 2 + cordsSM[pillar_idx_inBlock].z * voxel_x + range_min_x;
			
 
				+  float y_offset = voxel_y / 2 + cordsSM[pillar_idx_inBlock].y * voxel_y + range_min_y;
			
 
				+  float z_offset = voxel_z / 2 + cordsSM[pillar_idx_inBlock].x * voxel_z + range_min_z;
			
 
				+
			
 
				+  // feature-offset
			
 
				+  float3 center;
			
 
				+  center.x = pillarSM[pillar_idx_inBlock][point_idx].x - x_offset;
			
 
				+  center.y = pillarSM[pillar_idx_inBlock][point_idx].y - y_offset;
			
 
				+  center.z = pillarSM[pillar_idx_inBlock][point_idx].z - z_offset;
			
 
				+
			
 
				+  // store output
			
 
				+  if (point_idx < pointsNumSM[pillar_idx_inBlock]) {
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][0] = pillarSM[pillar_idx_inBlock][point_idx].x;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][1] = pillarSM[pillar_idx_inBlock][point_idx].y;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][2] = pillarSM[pillar_idx_inBlock][point_idx].z;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][3] = pillarSM[pillar_idx_inBlock][point_idx].w;
			
 
				+
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][4] = mean.x;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][5] = mean.y;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][6] = mean.z;
			
 
				+
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][7] = center.x;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][8] = center.y;
			
 
				+
			
 
				+  } else {
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][0] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][1] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][2] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][3] = 0;
			
 
				+
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][4] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][5] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][6] = 0;
			
 
				+
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][7] = 0;
			
 
				+    pillarOutSM[pillar_idx_inBlock][point_idx][8] = 0;
			
 
				+  }
			
 
				+
			
 
				+  __syncthreads();
			
 
				+
			
 
				+  for (int i = 0; i < ENCODER_IN_FEATURE_SIZE; i++) {
			
 
				+    int outputSMId = pillar_idx_inBlock * MAX_POINT_IN_VOXEL_SIZE * ENCODER_IN_FEATURE_SIZE +
			
 
				+                     i * MAX_POINT_IN_VOXEL_SIZE + point_idx;
			
 
				+    int outputId = pillar_idx * MAX_POINT_IN_VOXEL_SIZE * ENCODER_IN_FEATURE_SIZE +
			
 
				+                   i * MAX_POINT_IN_VOXEL_SIZE + point_idx;
			
 
				+    features[outputId] = ((float *)pillarOutSM)[outputSMId];
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// cspell: ignore divup
			
 
				+cudaError_t generateFeatures_launch(
			
 
				+  const float * voxel_features, const float * voxel_num_points, const int * coords,
			
 
				+  const std::size_t num_voxels, const std::size_t max_voxel_size, const float voxel_size_x,
			
 
				+  const float voxel_size_y, const float voxel_size_z, const float range_min_x,
			
 
				+  const float range_min_y, const float range_min_z, float * features, cudaStream_t stream)
			
 
				+{
			
 
				+  dim3 blocks(divup(max_voxel_size, WARPS_PER_BLOCK));
			
 
				+  dim3 threads(WARPS_PER_BLOCK * MAX_POINT_IN_VOXEL_SIZE);
			
 
				+  generateFeatures_kernel<<<blocks, threads, 0, stream>>>(
			
 
				+    voxel_features, voxel_num_points, coords, num_voxels, voxel_size_x, voxel_size_y, voxel_size_z,
			
 
				+    range_min_x, range_min_y, range_min_z, features);
			
 
				+
			
 
				+  return cudaGetLastError();
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/preprocess/voxel_generator.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/preprocess/voxel_generator.cpp
@@ -0,0 +1,224 @@
 
				+// Copyright 2021 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/preprocess/voxel_generator.hpp"
			
 
				+
			
 
				+//#include <sensor_msgs/point_cloud2_iterator.hpp>
			
 
				+
			
 
				+#include <memory>
			
 
				+
			
 
				+#include <Eigen/Eigen>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+VoxelGeneratorTemplate::VoxelGeneratorTemplate(
			
 
				+  const DensificationParam & param, const CenterPointConfig & config)
			
 
				+: config_(config)
			
 
				+{
			
 
				+  pd_ptr_ = std::make_unique<PointCloudDensification>(param);
			
 
				+  range_[0] = config.range_min_x_;
			
 
				+  range_[1] = config.range_min_y_;
			
 
				+  range_[2] = config.range_min_z_;
			
 
				+  range_[3] = config.range_max_x_;
			
 
				+  range_[4] = config.range_max_y_;
			
 
				+  range_[5] = config.range_max_z_;
			
 
				+  grid_size_[0] = config.grid_size_x_;
			
 
				+  grid_size_[1] = config.grid_size_y_;
			
 
				+  grid_size_[2] = config.grid_size_z_;
			
 
				+  recip_voxel_size_[0] = 1 / config.voxel_size_x_;
			
 
				+  recip_voxel_size_[1] = 1 / config.voxel_size_y_;
			
 
				+  recip_voxel_size_[2] = 1 / config.voxel_size_z_;
			
 
				+}
			
 
				+
			
 
				+bool VoxelGeneratorTemplate::enqueuePointCloud(
			
 
				+  const pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr)
			
 
				+{
			
 
				+    return pd_ptr_->enqueuePointCloud(pc_ptr);
			
 
				+}
			
 
				+
			
 
				+//bool VoxelGeneratorTemplate::enqueuePointCloud(
			
 
				+//  const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
			
 
				+//{
			
 
				+//  return pd_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer);
			
 
				+//}
			
 
				+
			
 
				+
			
 
				+std::size_t VoxelGenerator::pointsToVoxels(
			
 
				+  std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				+  std::vector<float> & num_points_per_voxel)
			
 
				+{
			
 
				+  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
			
 
				+  // coordinates (int): (max_voxel_size * point_dim_size)
			
 
				+  // num_points_per_voxel (float): (max_voxel_size)
			
 
				+
			
 
				+  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
			
 
				+  std::vector<int> coord_to_voxel_idx(grid_size, -1);
			
 
				+
			
 
				+  std::size_t voxel_cnt = 0;  // @return
			
 
				+  std::vector<float> point;
			
 
				+  point.resize(config_.point_feature_size_);
			
 
				+  std::vector<float> coord_zyx;
			
 
				+  coord_zyx.resize(config_.point_dim_size_);
			
 
				+  bool out_of_range;
			
 
				+  std::size_t point_cnt;
			
 
				+  int c, coord_idx, voxel_idx;
			
 
				+  Eigen::Vector3f point_current, point_past;
			
 
				+
			
 
				+  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
			
 
				+       pc_cache_iter++) {
			
 
				+    pcl::PointCloud<pcl::PointXYZI>::Ptr pc_ptr = pc_cache_iter->pc_ptr;
			
 
				+
			
 
				+    int nsize = pc_ptr->points.size();
			
 
				+    int i;
			
 
				+
			
 
				+    for (i=0;i<nsize;i++) {
			
 
				+
			
 
				+
			
 
				+      point[0] = pc_ptr->points.at(i).x;
			
 
				+      point[1] = pc_ptr->points.at(i).y;
			
 
				+      point[2] = pc_ptr->points.at(i).z;
			
 
				+      point[3] = 0;
			
 
				+
			
 
				+      out_of_range = false;
			
 
				+      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
			
 
				+        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
			
 
				+        if (c < 0 || c >= grid_size_[di]) {
			
 
				+          out_of_range = true;
			
 
				+          break;
			
 
				+        }
			
 
				+        coord_zyx[config_.point_dim_size_ - di - 1] = c;
			
 
				+      }
			
 
				+      if (out_of_range) {
			
 
				+        continue;
			
 
				+      }
			
 
				+
			
 
				+      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
			
 
				+                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
			
 
				+      voxel_idx = coord_to_voxel_idx[coord_idx];
			
 
				+      if (voxel_idx == -1) {
			
 
				+        voxel_idx = voxel_cnt;
			
 
				+        if (voxel_cnt >= config_.max_voxel_size_) {
			
 
				+          continue;
			
 
				+        }
			
 
				+
			
 
				+        voxel_cnt++;
			
 
				+        coord_to_voxel_idx[coord_idx] = voxel_idx;
			
 
				+        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
			
 
				+          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      point_cnt = num_points_per_voxel[voxel_idx];
			
 
				+      if (point_cnt < config_.max_point_in_voxel_size_) {
			
 
				+        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
			
 
				+          voxels
			
 
				+            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
			
 
				+             point_cnt * config_.point_feature_size_ + fi] = point[fi];
			
 
				+        }
			
 
				+        num_points_per_voxel[voxel_idx]++;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return voxel_cnt;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//std::size_t VoxelGenerator::pointsToVoxels(
			
 
				+//  std::vector<float> & voxels, std::vector<int> & coordinates,
			
 
				+//  std::vector<float> & num_points_per_voxel)
			
 
				+//{
			
 
				+//  // voxels (float): (max_voxel_size * max_point_in_voxel_size * point_feature_size)
			
 
				+//  // coordinates (int): (max_voxel_size * point_dim_size)
			
 
				+//  // num_points_per_voxel (float): (max_voxel_size)
			
 
				+
			
 
				+//  const std::size_t grid_size = config_.grid_size_z_ * config_.grid_size_y_ * config_.grid_size_x_;
			
 
				+//  std::vector<int> coord_to_voxel_idx(grid_size, -1);
			
 
				+
			
 
				+//  std::size_t voxel_cnt = 0;  // @return
			
 
				+//  std::vector<float> point;
			
 
				+//  point.resize(config_.point_feature_size_);
			
 
				+//  std::vector<float> coord_zyx;
			
 
				+//  coord_zyx.resize(config_.point_dim_size_);
			
 
				+//  bool out_of_range;
			
 
				+//  std::size_t point_cnt;
			
 
				+//  int c, coord_idx, voxel_idx;
			
 
				+//  Eigen::Vector3f point_current, point_past;
			
 
				+
			
 
				+//  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
			
 
				+//       pc_cache_iter++) {
			
 
				+//    auto pc_msg = pc_cache_iter->pointcloud_msg;
			
 
				+//    auto affine_past2current =
			
 
				+//      pd_ptr_->pointcloud_cache_size() > 1
			
 
				+//        ? pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world
			
 
				+//        : Eigen::Affine3f::Identity();
			
 
				+//    float time_lag = static_cast<float>(
			
 
				+//      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_msg.header.stamp).seconds());
			
 
				+
			
 
				+//    for (sensor_msgs::PointCloud2ConstIterator<float> x_iter(pc_msg, "x"), y_iter(pc_msg, "y"),
			
 
				+//         z_iter(pc_msg, "z");
			
 
				+//         x_iter != x_iter.end(); ++x_iter, ++y_iter, ++z_iter) {
			
 
				+//      point_past << *x_iter, *y_iter, *z_iter;
			
 
				+//      point_current = affine_past2current * point_past;
			
 
				+
			
 
				+//      point[0] = point_current.x();
			
 
				+//      point[1] = point_current.y();
			
 
				+//      point[2] = point_current.z();
			
 
				+//      point[3] = time_lag;
			
 
				+
			
 
				+//      out_of_range = false;
			
 
				+//      for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
			
 
				+//        c = static_cast<int>((point[di] - range_[di]) * recip_voxel_size_[di]);
			
 
				+//        if (c < 0 || c >= grid_size_[di]) {
			
 
				+//          out_of_range = true;
			
 
				+//          break;
			
 
				+//        }
			
 
				+//        coord_zyx[config_.point_dim_size_ - di - 1] = c;
			
 
				+//      }
			
 
				+//      if (out_of_range) {
			
 
				+//        continue;
			
 
				+//      }
			
 
				+
			
 
				+//      coord_idx = coord_zyx[0] * config_.grid_size_y_ * config_.grid_size_x_ +
			
 
				+//                  coord_zyx[1] * config_.grid_size_x_ + coord_zyx[2];
			
 
				+//      voxel_idx = coord_to_voxel_idx[coord_idx];
			
 
				+//      if (voxel_idx == -1) {
			
 
				+//        voxel_idx = voxel_cnt;
			
 
				+//        if (voxel_cnt >= config_.max_voxel_size_) {
			
 
				+//          continue;
			
 
				+//        }
			
 
				+
			
 
				+//        voxel_cnt++;
			
 
				+//        coord_to_voxel_idx[coord_idx] = voxel_idx;
			
 
				+//        for (std::size_t di = 0; di < config_.point_dim_size_; di++) {
			
 
				+//          coordinates[voxel_idx * config_.point_dim_size_ + di] = coord_zyx[di];
			
 
				+//        }
			
 
				+//      }
			
 
				+
			
 
				+//      point_cnt = num_points_per_voxel[voxel_idx];
			
 
				+//      if (point_cnt < config_.max_point_in_voxel_size_) {
			
 
				+//        for (std::size_t fi = 0; fi < config_.point_feature_size_; fi++) {
			
 
				+//          voxels
			
 
				+//            [voxel_idx * config_.max_point_in_voxel_size_ * config_.point_feature_size_ +
			
 
				+//             point_cnt * config_.point_feature_size_ + fi] = point[fi];
			
 
				+//        }
			
 
				+//        num_points_per_voxel[voxel_idx]++;
			
 
				+//      }
			
 
				+//    }
			
 
				+//  }
			
 
				+
			
 
				+//  return voxel_cnt;
			
 
				+//}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/ros_utils.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/ros_utils.cpp
@@ -0,0 +1,95 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/ros_utils.hpp"
			
 
				+
			
 
				+#include "perception_utils/perception_utils.hpp"
			
 
				+#include "tier4_autoware_utils/geometry/geometry.hpp"
			
 
				+#include "tier4_autoware_utils/math/constants.hpp"
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+
			
 
				+using Label = autoware_auto_perception_msgs::msg::ObjectClassification;
			
 
				+
			
 
				+void box3DToDetectedObject(
			
 
				+  const Box3D & box3d, const std::vector<std::string> & class_names, const bool has_twist,
			
 
				+  autoware_auto_perception_msgs::msg::DetectedObject & obj)
			
 
				+{
			
 
				+  // TODO(yukke42): the value of classification confidence of DNN, not probability.
			
 
				+  obj.existence_probability = box3d.score;
			
 
				+
			
 
				+  // classification
			
 
				+  autoware_auto_perception_msgs::msg::ObjectClassification classification;
			
 
				+  classification.probability = 1.0f;
			
 
				+  if (box3d.label >= 0 && static_cast<size_t>(box3d.label) < class_names.size()) {
			
 
				+    classification.label = getSemanticType(class_names[box3d.label]);
			
 
				+  } else {
			
 
				+    classification.label = Label::UNKNOWN;
			
 
				+    RCLCPP_WARN_STREAM(
			
 
				+      rclcpp::get_logger("lidar_centerpoint"), "Unexpected label: UNKNOWN is set.");
			
 
				+  }
			
 
				+
			
 
				+  if (perception_utils::isCarLikeVehicle(classification.label)) {
			
 
				+    obj.kinematics.orientation_availability =
			
 
				+      autoware_auto_perception_msgs::msg::DetectedObjectKinematics::SIGN_UNKNOWN;
			
 
				+  }
			
 
				+
			
 
				+  obj.classification.emplace_back(classification);
			
 
				+
			
 
				+  // pose and shape
			
 
				+  // mmdet3d yaw format to ros yaw format
			
 
				+  float yaw = -box3d.yaw - tier4_autoware_utils::pi / 2;
			
 
				+  obj.kinematics.pose_with_covariance.pose.position =
			
 
				+    tier4_autoware_utils::createPoint(box3d.x, box3d.y, box3d.z);
			
 
				+  obj.kinematics.pose_with_covariance.pose.orientation =
			
 
				+    tier4_autoware_utils::createQuaternionFromYaw(yaw);
			
 
				+  obj.shape.type = autoware_auto_perception_msgs::msg::Shape::BOUNDING_BOX;
			
 
				+  obj.shape.dimensions =
			
 
				+    tier4_autoware_utils::createTranslation(box3d.length, box3d.width, box3d.height);
			
 
				+
			
 
				+  // twist
			
 
				+  if (has_twist) {
			
 
				+    float vel_x = box3d.vel_x;
			
 
				+    float vel_y = box3d.vel_y;
			
 
				+    geometry_msgs::msg::Twist twist;
			
 
				+    twist.linear.x = std::sqrt(std::pow(vel_x, 2) + std::pow(vel_y, 2));
			
 
				+    twist.angular.z = 2 * (std::atan2(vel_y, vel_x) - yaw);
			
 
				+    obj.kinematics.twist_with_covariance.twist = twist;
			
 
				+    obj.kinematics.has_twist = has_twist;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+uint8_t getSemanticType(const std::string & class_name)
			
 
				+{
			
 
				+  if (class_name == "CAR") {
			
 
				+    return Label::CAR;
			
 
				+  } else if (class_name == "TRUCK") {
			
 
				+    return Label::TRUCK;
			
 
				+  } else if (class_name == "BUS") {
			
 
				+    return Label::BUS;
			
 
				+  } else if (class_name == "TRAILER") {
			
 
				+    return Label::TRAILER;
			
 
				+  } else if (class_name == "BICYCLE") {
			
 
				+    return Label::BICYCLE;
			
 
				+  } else if (class_name == "MOTORBIKE") {
			
 
				+    return Label::MOTORCYCLE;
			
 
				+  } else if (class_name == "PEDESTRIAN") {
			
 
				+    return Label::PEDESTRIAN;
			
 
				+  } else {
			
 
				+    return Label::UNKNOWN;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/lib/utils.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/utils.cpp
@@ -0,0 +1,34 @@
 
				+// Copyright 2022 TIER IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include "lidar_centerpoint/utils.hpp"
			
 
				+
			
 
				+#include <stdexcept>
			
 
				+
			
 
				+namespace centerpoint
			
 
				+{
			
 
				+// cspell: ignore divup
			
 
				+std::size_t divup(const std::size_t a, const std::size_t b)
			
 
				+{
			
 
				+  if (a == 0) {
			
 
				+    throw std::runtime_error("A dividend of divup isn't positive.");
			
 
				+  }
			
 
				+  if (b == 0) {
			
 
				+    throw std::runtime_error("A divisor of divup isn't positive.");
			
 
				+  }
			
 
				+
			
 
				+  return (a + b - 1) / b;
			
 
				+}
			
 
				+
			
 
				+}  // namespace centerpoint
			
--- a/src/detection/detection_lidar_centerpoint/main.cpp
+++ b/src/detection/detection_lidar_centerpoint/main.cpp
@@ -0,0 +1,59 @@
 
				+#include <QCoreApplication>
			
 
				+
			
 
				+
			
 
				+
			
 
				+#include <lidar_centerpoint/centerpoint_config.hpp>
			
 
				+#include <lidar_centerpoint/centerpoint_trt.hpp>
			
 
				+#include <lidar_centerpoint/preprocess/pointcloud_densification.hpp>
			
 
				+
			
 
				+using namespace centerpoint;
			
 
				+
			
 
				+std::unique_ptr<CenterPointTRT> detector_ptr_{nullptr};
			
 
				+
			
 
				+void init()
			
 
				+{
			
 
				+    const float score_threshold = 0.35;
			
 
				+    const float circle_nms_dist_threshold =1.5;
			
 
				+  //    static_cast<float>(this->declare_parameter<double>("circle_nms_dist_threshold"));
			
 
				+    std::vector<double> yaw_norm_thresholds ;
			
 
				+    const std::string densification_world_frame_id = "map";
			
 
				+    const int densification_num_past_frames = 1;
			
 
				+    const std::string trt_precision = "fp16";
			
 
				+    const std::string encoder_onnx_path = "/home/nvidia/models/pts_voxel_encoder_centerpoint.onnx";//this->declare_parameter<std::string>("encoder_onnx_path");
			
 
				+    const std::string encoder_engine_path ="/home/nvidia/models/pts_voxel_encoder_centerpoint.eng";//this->declare_parameter<std::string>("encoder_engine_path");
			
 
				+    const std::string head_onnx_path = "/home/nvidia/models/pts_backbone_neck_head_centerpoint.onnx";//this->declare_parameter<std::string>("head_onnx_path");
			
 
				+    const std::string head_engine_path ="/home/nvidia/models/pts_backbone_neck_head_centerpoint.eng" ;//this->declare_parameter<std::string>("head_engine_path");
			
 
				+    const std::size_t point_feature_size =4;
			
 
				+    const std::size_t max_voxel_size =40000;
			
 
				+    std::vector<double> point_cloud_range ;
			
 
				+    std::vector<double>voxel_size ;
			
 
				+    const std::size_t downsample_factor =2;
			
 
				+    const std::size_t encoder_in_feature_size = 9;
			
 
				+    std::vector<int64_t> allow_remapping_by_area_matrix;
			
 
				+    std::vector<double> min_area_matrix ;
			
 
				+    std::vector<double> max_area_matrix ;
			
 
				+
			
 
				+
			
 
				+    NetworkParam encoder_param(encoder_onnx_path, encoder_engine_path, trt_precision);
			
 
				+    NetworkParam head_param(head_onnx_path, head_engine_path, trt_precision);
			
 
				+    DensificationParam densification_param(
			
 
				+      densification_world_frame_id, densification_num_past_frames);
			
 
				+
			
 
				+    CenterPointConfig config(
			
 
				+      3, point_feature_size, max_voxel_size, point_cloud_range, voxel_size,
			
 
				+      downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold,
			
 
				+      yaw_norm_thresholds);
			
 
				+    detector_ptr_ =
			
 
				+      std::make_unique<CenterPointTRT>(encoder_param, head_param, densification_param, config);
			
 
				+
			
 
				+    std::cout<<"init  complete."<<std::endl;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char *argv[])
			
 
				+{
			
 
				+    QCoreApplication a(argc, argv);
			
 
				+    init();
			
 
				+
			
 
				+    return a.exec();
			
 
				+}
			
--- a/src/detection/detection_lidar_centerpoint/tensorrt_common/tensorrt_common.cpp
+++ b/src/detection/detection_lidar_centerpoint/tensorrt_common/tensorrt_common.cpp
@@ -0,0 +1,223 @@
 
				+// Copyright 2020 Tier IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#include <tensorrt_common/tensorrt_common.hpp>
			
 
				+
			
 
				+#include <NvInferPlugin.h>
			
 
				+#include <dlfcn.h>
			
 
				+
			
 
				+#include <fstream>
			
 
				+#include <functional>
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <utility>
			
 
				+
			
 
				+namespace tensorrt_common
			
 
				+{
			
 
				+
			
 
				+TrtCommon::TrtCommon(
			
 
				+  const std::string & model_path, const std::string & precision,
			
 
				+  std::unique_ptr<nvinfer1::IInt8EntropyCalibrator2> calibrator,
			
 
				+  const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size,
			
 
				+  const std::vector<std::string> & plugin_paths)
			
 
				+: model_file_path_(model_path),
			
 
				+  calibrator_(std::move(calibrator)),
			
 
				+  precision_(precision),
			
 
				+  batch_config_(batch_config),
			
 
				+  max_workspace_size_(max_workspace_size)
			
 
				+{
			
 
				+  for (const auto & plugin_path : plugin_paths) {
			
 
				+    int32_t flags{RTLD_LAZY};
			
 
				+// cspell: ignore asan
			
 
				+#if ENABLE_ASAN
			
 
				+    // https://github.com/google/sanitizers/issues/89
			
 
				+    // asan doesn't handle module unloading correctly and there are no plans on doing
			
 
				+    // so. In order to get proper stack traces, don't delete the shared library on
			
 
				+    // close so that asan can resolve the symbols correctly.
			
 
				+    flags |= RTLD_NODELETE;
			
 
				+#endif  // ENABLE_ASAN
			
 
				+    void * handle = dlopen(plugin_path.c_str(), flags);
			
 
				+    if (!handle) {
			
 
				+      logger_.log(nvinfer1::ILogger::Severity::kERROR, "Could not load plugin library");
			
 
				+    }
			
 
				+  }
			
 
				+  runtime_ = TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
			
 
				+  initLibNvInferPlugins(&logger_, "");
			
 
				+}
			
 
				+
			
 
				+void TrtCommon::setup()
			
 
				+{
			
 
				+  if (!fs::exists(model_file_path_)) {
			
 
				+    is_initialized_ = false;
			
 
				+    return;
			
 
				+  }
			
 
				+  if (model_file_path_.extension() == ".engine") {
			
 
				+    loadEngine(model_file_path_);
			
 
				+  } else if (model_file_path_.extension() == ".onnx") {
			
 
				+    fs::path cache_engine_path{model_file_path_};
			
 
				+    cache_engine_path.replace_extension("engine");
			
 
				+    if (fs::exists(cache_engine_path)) {
			
 
				+      loadEngine(cache_engine_path);
			
 
				+    } else {
			
 
				+      logger_.log(nvinfer1::ILogger::Severity::kINFO, "Start build engine");
			
 
				+      buildEngineFromOnnx(model_file_path_, cache_engine_path);
			
 
				+      logger_.log(nvinfer1::ILogger::Severity::kINFO, "End build engine");
			
 
				+    }
			
 
				+  } else {
			
 
				+    is_initialized_ = false;
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  context_ = TrtUniquePtr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
			
 
				+  if (!context_) {
			
 
				+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create context");
			
 
				+    is_initialized_ = false;
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  is_initialized_ = true;
			
 
				+}
			
 
				+
			
 
				+bool TrtCommon::loadEngine(const std::string & engine_file_path)
			
 
				+{
			
 
				+  std::ifstream engine_file(engine_file_path);
			
 
				+  std::stringstream engine_buffer;
			
 
				+  engine_buffer << engine_file.rdbuf();
			
 
				+  std::string engine_str = engine_buffer.str();
			
 
				+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(
			
 
				+    reinterpret_cast<const void *>(engine_str.data()), engine_str.size()));
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool TrtCommon::buildEngineFromOnnx(
			
 
				+  const std::string & onnx_file_path, const std::string & output_engine_file_path)
			
 
				+{
			
 
				+  auto builder = TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
			
 
				+  if (!builder) {
			
 
				+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder");
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  const auto explicitBatch =
			
 
				+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
			
 
				+
			
 
				+  auto network =
			
 
				+    TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch));
			
 
				+  if (!network) {
			
 
				+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create network");
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  auto config = TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
			
 
				+  if (!config) {
			
 
				+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder config");
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  if (precision_ == "fp16" || precision_ == "int8") {
			
 
				+    config->setFlag(nvinfer1::BuilderFlag::kFP16);
			
 
				+  }
			
 
				+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
			
 
				+  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_);
			
 
				+#else
			
 
				+  config->setMaxWorkspaceSize(max_workspace_size_);
			
 
				+#endif
			
 
				+
			
 
				+  auto parser = TrtUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger_));
			
 
				+  if (!parser->parseFromFile(
			
 
				+        onnx_file_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR))) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  const auto input = network->getInput(0);
			
 
				+  const auto input_dims = input->getDimensions();
			
 
				+  const auto input_channel = input_dims.d[1];
			
 
				+  const auto input_height = input_dims.d[2];
			
 
				+  const auto input_width = input_dims.d[3];
			
 
				+
			
 
				+  auto profile = builder->createOptimizationProfile();
			
 
				+  profile->setDimensions(
			
 
				+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMIN,
			
 
				+    nvinfer1::Dims4{batch_config_.at(0), input_channel, input_height, input_width});
			
 
				+  profile->setDimensions(
			
 
				+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kOPT,
			
 
				+    nvinfer1::Dims4{batch_config_.at(1), input_channel, input_height, input_width});
			
 
				+  profile->setDimensions(
			
 
				+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMAX,
			
 
				+    nvinfer1::Dims4{batch_config_.at(2), input_channel, input_height, input_width});
			
 
				+  config->addOptimizationProfile(profile);
			
 
				+
			
 
				+  if (precision_ == "int8" && calibrator_) {
			
 
				+    config->setFlag(nvinfer1::BuilderFlag::kINT8);
			
 
				+    config->setInt8Calibrator(calibrator_.get());
			
 
				+  }
			
 
				+
			
 
				+#if TENSORRT_VERSION_MAJOR >= 8
			
 
				+  auto plan =
			
 
				+    TrtUniquePtr<nvinfer1::IHostMemory>(builder->buildSerializedNetwork(*network, *config));
			
 
				+  if (!plan) {
			
 
				+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create host memory");
			
 
				+    return false;
			
 
				+  }
			
 
				+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(
			
 
				+    runtime_->deserializeCudaEngine(plan->data(), plan->size()));
			
 
				+#else
			
 
				+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(builder->buildEngineWithConfig(*network, *config));
			
 
				+#endif
			
 
				+
			
 
				+  if (!engine_) {
			
 
				+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create engine");
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // save engine
			
 
				+#if TENSORRT_VERSION_MAJOR < 8
			
 
				+  auto data = TrtUniquePtr<nvinfer1::IHostMemory>(engine_->serialize());
			
 
				+#endif
			
 
				+  std::ofstream file;
			
 
				+  file.open(output_engine_file_path, std::ios::binary | std::ios::out);
			
 
				+  if (!file.is_open()) {
			
 
				+    return false;
			
 
				+  }
			
 
				+#if TENSORRT_VERSION_MAJOR < 8
			
 
				+  file.write(reinterpret_cast<const char *>(data->data()), data->size());
			
 
				+#else
			
 
				+  file.write(reinterpret_cast<const char *>(plan->data()), plan->size());
			
 
				+#endif
			
 
				+
			
 
				+  file.close();
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+bool TrtCommon::isInitialized() { return is_initialized_; }
			
 
				+
			
 
				+nvinfer1::Dims TrtCommon::getBindingDimensions(const int32_t index) const
			
 
				+{
			
 
				+  return context_->getBindingDimensions(index);
			
 
				+}
			
 
				+
			
 
				+int32_t TrtCommon::getNbBindings() { return engine_->getNbBindings(); }
			
 
				+
			
 
				+bool TrtCommon::setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const
			
 
				+{
			
 
				+  return context_->setBindingDimensions(index, dimensions);
			
 
				+}
			
 
				+
			
 
				+bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed)
			
 
				+{
			
 
				+  return context_->enqueueV2(bindings, stream, input_consumed);
			
 
				+}
			
 
				+
			
 
				+}  // namespace tensorrt_common
			
--- a/src/detection/detection_lidar_centerpoint/tensorrt_common/tensorrt_common.hpp
+++ b/src/detection/detection_lidar_centerpoint/tensorrt_common/tensorrt_common.hpp
@@ -0,0 +1,142 @@
 
				+// Copyright 2022 Tier IV, Inc.
			
 
				+//
			
 
				+// Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+// you may not use this file except in compliance with the License.
			
 
				+// You may obtain a copy of the License at
			
 
				+//
			
 
				+//     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+//
			
 
				+// Unless required by applicable law or agreed to in writing, software
			
 
				+// distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+// See the License for the specific language governing permissions and
			
 
				+// limitations under the License.
			
 
				+
			
 
				+#ifndef TENSORRT_COMMON__TENSORRT_COMMON_HPP_
			
 
				+#define TENSORRT_COMMON__TENSORRT_COMMON_HPP_
			
 
				+
			
 
				+//#include <rclcpp/rclcpp.hpp>
			
 
				+
			
 
				+#include <NvInfer.h>
			
 
				+#include <NvOnnxParser.h>
			
 
				+
			
 
				+#if (defined(_MSC_VER) or (defined(__GNUC__) and (7 <= __GNUC_MAJOR__)))
			
 
				+#include <filesystem>
			
 
				+namespace fs = ::std::filesystem;
			
 
				+#else
			
 
				+#include <experimental/filesystem>
			
 
				+namespace fs = ::std::experimental::filesystem;
			
 
				+#endif
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <sstream>
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+#include <iostream>
			
 
				+
			
 
				+namespace tensorrt_common
			
 
				+{
			
 
				+class Logger : public nvinfer1::ILogger  // NOLINT
			
 
				+{
			
 
				+public:
			
 
				+  Logger() : Logger(Severity::kINFO) {}
			
 
				+
			
 
				+  explicit Logger(Severity severity) : reportable_severity_(severity) {}
			
 
				+
			
 
				+  void log(Severity severity, const char * msg) noexcept override
			
 
				+  {
			
 
				+    // suppress messages with severity enum value greater than the reportable
			
 
				+    if (severity > reportable_severity_) {
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    switch (severity) {
			
 
				+      case Severity::kINTERNAL_ERROR:
			
 
				+ //       RCLCPP_ERROR_STREAM(logger_, msg);
			
 
				+        std::cout<<msg<<std::endl;
			
 
				+        break;
			
 
				+      case Severity::kERROR:
			
 
				+    //    RCLCPP_ERROR_STREAM(logger_, msg);
			
 
				+        std::cout<<msg<<std::endl;
			
 
				+        break;
			
 
				+      case Severity::kWARNING:
			
 
				+    //    RCLCPP_WARN_STREAM(logger_, msg);
			
 
				+        std::cout<<msg<<std::endl;
			
 
				+        break;
			
 
				+      case Severity::kINFO:
			
 
				+  //      RCLCPP_INFO_STREAM(logger_, msg);
			
 
				+        std::cout<<msg<<std::endl;
			
 
				+        break;
			
 
				+      default:
			
 
				+  //      RCLCPP_INFO_STREAM(logger_, msg);
			
 
				+        std::cout<<msg<<std::endl;
			
 
				+        break;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  Severity reportable_severity_{Severity::kWARNING};
			
 
				+ // rclcpp::Logger logger_{rclcpp::get_logger("tensorrt_common")};
			
 
				+
			
 
				+};
			
 
				+
			
 
				+template <typename T>
			
 
				+struct InferDeleter  // NOLINT
			
 
				+{
			
 
				+  void operator()(T * obj) const
			
 
				+  {
			
 
				+    if (obj) {
			
 
				+#if TENSORRT_VERSION_MAJOR >= 8
			
 
				+      delete obj;
			
 
				+#else
			
 
				+      obj->destroy();
			
 
				+#endif
			
 
				+    }
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+template <typename T>
			
 
				+using TrtUniquePtr = std::unique_ptr<T, InferDeleter<T>>;
			
 
				+
			
 
				+using BatchConfig = std::array<int32_t, 3>;
			
 
				+
			
 
				+class TrtCommon  // NOLINT
			
 
				+{
			
 
				+public:
			
 
				+  TrtCommon(
			
 
				+    const std::string & model_path, const std::string & precision,
			
 
				+    std::unique_ptr<nvinfer1::IInt8EntropyCalibrator2> calibrator = nullptr,
			
 
				+    const BatchConfig & batch_config = {1, 1, 1}, const size_t max_workspace_size = (16 << 20),
			
 
				+    const std::vector<std::string> & plugin_paths = {});
			
 
				+
			
 
				+  bool loadEngine(const std::string & engine_file_path);
			
 
				+  bool buildEngineFromOnnx(
			
 
				+    const std::string & onnx_file_path, const std::string & output_engine_file_path);
			
 
				+  void setup();
			
 
				+
			
 
				+  bool isInitialized();
			
 
				+
			
 
				+  nvinfer1::Dims getBindingDimensions(const int32_t index) const;
			
 
				+  int32_t getNbBindings();
			
 
				+  bool setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const;
			
 
				+  bool enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed);
			
 
				+
			
 
				+private:
			
 
				+  Logger logger_;
			
 
				+  fs::path model_file_path_;
			
 
				+  TrtUniquePtr<nvinfer1::IRuntime> runtime_;
			
 
				+  TrtUniquePtr<nvinfer1::ICudaEngine> engine_;
			
 
				+  TrtUniquePtr<nvinfer1::IExecutionContext> context_;
			
 
				+  std::unique_ptr<nvinfer1::IInt8EntropyCalibrator2> calibrator_;
			
 
				+
			
 
				+  nvinfer1::Dims input_dims_;
			
 
				+  nvinfer1::Dims output_dims_;
			
 
				+  std::string precision_;
			
 
				+  BatchConfig batch_config_;
			
 
				+  size_t max_workspace_size_;
			
 
				+  bool is_initialized_{false};
			
 
				+};
			
 
				+
			
 
				+}  // namespace tensorrt_common
			
 
				+
			
 
				+#endif  // TENSORRT_COMMON__TENSORRT_COMMON_HPP_
			
--- a/src/detection/detection_lidar_cnn_segmentation/cluster2d.cpp
+++ b/src/detection/detection_lidar_cnn_segmentation/cluster2d.cpp
@@ -44,6 +44,186 @@ void Cluster2D::traverse(Node *x)
 
				     }
			
 
				 }
			
 
				 
			
 
				+void Cluster2D::cluster(const float * category_pt_data,
			
 
				+             const float *instance_pt_x_data,
			
 
				+             const float *instance_pt_y_data,
			
 
				+             const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
			
 
				+             const pcl::PointIndices &valid_indices,
			
 
				+             float objectness_thresh, bool use_all_grids_for_clustering)
			
 
				+{
			
 
				+    pc_ptr_ = pc_ptr;
			
 
				+    std::vector<std::vector<Node>> nodes(rows_,
			
 
				+                                         std::vector<Node>(cols_, Node()));
			
 
				+
			
 
				+    // map points into grids
			
 
				+    size_t tot_point_num = pc_ptr_->size();
			
 
				+    valid_indices_in_pc_ = &(valid_indices.indices);
			
 
				+    CHECK_LE(valid_indices_in_pc_->size(), tot_point_num);
			
 
				+    point2grid_.assign(valid_indices_in_pc_->size(), -1);
			
 
				+
			
 
				+    int64 time1 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+    for (size_t i = 0; i < valid_indices_in_pc_->size(); ++i)
			
 
				+    {
			
 
				+        int point_id = valid_indices_in_pc_->at(i);
			
 
				+        CHECK_GE(point_id, 0);
			
 
				+        CHECK_LT(point_id, static_cast<int>(tot_point_num));
			
 
				+        const auto &point = pc_ptr_->points[point_id];
			
 
				+        // * the coordinates of x and y have been exchanged in feature generation
			
 
				+        // step,
			
 
				+        // so we swap them back here.
			
 
				+        int pos_x = F2I(point.y, range_, inv_res_x_);  // col
			
 
				+        int pos_y = F2I(point.x, range_, inv_res_y_);  // row
			
 
				+        if (IsValidRowCol(pos_y, pos_x))
			
 
				+        {
			
 
				+            // get grid index and count point number for corresponding node
			
 
				+            point2grid_[i] = RowCol2Grid(pos_y, pos_x);
			
 
				+            nodes[pos_y][pos_x].point_num++;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    int64 time2 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+    // construct graph with center offset prediction and objectness
			
 
				+    for (int row = 0; row < rows_; ++row)
			
 
				+    {
			
 
				+        for (int col = 0; col < cols_; ++col)
			
 
				+        {
			
 
				+            int grid = RowCol2Grid(row, col);
			
 
				+            Node *node = &nodes[row][col];
			
 
				+            DisjointSetMakeSet(node);
			
 
				+            node->is_object =
			
 
				+                    (use_all_grids_for_clustering || nodes[row][col].point_num > 0) &&
			
 
				+                    (*(category_pt_data + grid) >= objectness_thresh);
			
 
				+            int center_row = std::round(row + instance_pt_x_data[grid] * scale_);
			
 
				+            int center_col = std::round(col + instance_pt_y_data[grid] * scale_);
			
 
				+            center_row = std::min(std::max(center_row, 0), rows_ - 1);
			
 
				+            center_col = std::min(std::max(center_col, 0), cols_ - 1);
			
 
				+            node->center_node = &nodes[center_row][center_col];
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    int64 time3 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+    // traverse nodes
			
 
				+    for (int row = 0; row < rows_; ++row)
			
 
				+    {
			
 
				+        for (int col = 0; col < cols_; ++col)
			
 
				+        {
			
 
				+            Node *node = &nodes[row][col];
			
 
				+            if (node->is_object && node->traversed == 0)
			
 
				+            {
			
 
				+                traverse(node);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    int64 time4 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+    for (int row = 0; row < rows_; ++row)
			
 
				+    {
			
 
				+        for (int col = 0; col < cols_; ++col)
			
 
				+        {
			
 
				+            Node *node = &nodes[row][col];
			
 
				+            if (!node->is_center)
			
 
				+            {
			
 
				+                continue;
			
 
				+            }
			
 
				+            for (int row2 = row - 1; row2 <= row + 1; ++row2)
			
 
				+            {
			
 
				+                for (int col2 = col - 1; col2 <= col + 1; ++col2)
			
 
				+                {
			
 
				+                    if ((row2 == row || col2 == col) && IsValidRowCol(row2, col2))
			
 
				+                    {
			
 
				+                        Node *node2 = &nodes[row2][col2];
			
 
				+                        if (node2->is_center)
			
 
				+                        {
			
 
				+                            DisjointSetUnion(node, node2);
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    int64 time5 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+    int count_obstacles = 0;
			
 
				+    obstacles_.clear();
			
 
				+    id_img_.assign(grids_, -1);
			
 
				+    for (int row = 0; row < rows_; ++row)
			
 
				+    {
			
 
				+        for (int col = 0; col < cols_; ++col)
			
 
				+        {
			
 
				+            Node *node = &nodes[row][col];
			
 
				+            if (!node->is_object)
			
 
				+            {
			
 
				+                continue;
			
 
				+            }
			
 
				+            Node *root = DisjointSetFind(node);
			
 
				+            if (root->obstacle_id < 0)
			
 
				+            {
			
 
				+                root->obstacle_id = count_obstacles++;
			
 
				+                CHECK_EQ(static_cast<int>(obstacles_.size()), count_obstacles - 1);
			
 
				+                obstacles_.push_back(Obstacle());
			
 
				+            }
			
 
				+            int grid = RowCol2Grid(row, col);
			
 
				+            CHECK_GE(root->obstacle_id, 0);
			
 
				+            id_img_[grid] = root->obstacle_id;
			
 
				+            obstacles_[root->obstacle_id].grids.push_back(grid);
			
 
				+        }
			
 
				+    }
			
 
				+    CHECK_EQ(static_cast<size_t>(count_obstacles), obstacles_.size());
			
 
				+    int64 time6 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+
			
 
				+//    std::cout<<" t1: "<<(time2 - time1)/1000<<" t2: "<<(time3 - time2)/1000<<" t3: "<<(time4 - time3)/1000
			
 
				+//            <<" t4: "<<(time5 - time4)/1000<<" t5: "<<(time6 - time5)/1000<<std::endl;
			
 
				+}
			
 
				+
			
 
				+void Cluster2D::filter(const float *confidence_pt_data,
			
 
				+            const float *height_pt_data)
			
 
				+{
			
 
				+    for (size_t obstacle_id = 0; obstacle_id < obstacles_.size();
			
 
				+         obstacle_id++)
			
 
				+    {
			
 
				+        Obstacle *obs = &obstacles_[obstacle_id];
			
 
				+        CHECK_GT(obs->grids.size(), 0);
			
 
				+        double score = 0.0;
			
 
				+        double height = 0.0;
			
 
				+        for (int grid : obs->grids)
			
 
				+        {
			
 
				+            score += static_cast<double>(confidence_pt_data[grid]);
			
 
				+            height += static_cast<double>(height_pt_data[grid]);
			
 
				+        }
			
 
				+        obs->score = score / static_cast<double>(obs->grids.size());
			
 
				+        obs->height = height / static_cast<double>(obs->grids.size());
			
 
				+        obs->cloud_ptr.reset(new pcl::PointCloud<pcl::PointXYZI>);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void Cluster2D::classify(const float *classify_pt_data,int num_classes )
			
 
				+{
			
 
				+    CHECK_EQ(num_classes, MAX_META_TYPE);
			
 
				+    for (size_t obs_id = 0; obs_id < obstacles_.size(); obs_id++)
			
 
				+    {
			
 
				+        Obstacle *obs = &obstacles_[obs_id];
			
 
				+        for (size_t grid_id = 0; grid_id < obs->grids.size(); grid_id++)
			
 
				+        {
			
 
				+            int grid = obs->grids[grid_id];
			
 
				+            for (int k = 0; k < num_classes; k++)
			
 
				+            {
			
 
				+                obs->meta_type_probs[k] += classify_pt_data[k * grids_ + grid];
			
 
				+            }
			
 
				+        }
			
 
				+        int meta_type_id = 0;
			
 
				+        for (int k = 0; k < num_classes; k++)
			
 
				+        {
			
 
				+            obs->meta_type_probs[k] /= obs->grids.size();
			
 
				+            if (obs->meta_type_probs[k] > obs->meta_type_probs[meta_type_id])
			
 
				+            {
			
 
				+                meta_type_id = k;
			
 
				+            }
			
 
				+        }
			
 
				+        obs->meta_type = static_cast<MetaType>(meta_type_id);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+#include <chrono>
			
 
				 void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
			
 
				                         const caffe::Blob<float> &instance_pt_blob,
			
 
				                         const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
			
@@ -55,6 +235,7 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
 
				     const float *instance_pt_y_data =
			
 
				             instance_pt_blob.cpu_data() + instance_pt_blob.offset(0, 1);
			
 
				 
			
 
				+
			
 
				     pc_ptr_ = pc_ptr;
			
 
				     std::vector<std::vector<Node>> nodes(rows_,
			
 
				                                          std::vector<Node>(cols_, Node()));
			
@@ -65,6 +246,7 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
 
				     CHECK_LE(valid_indices_in_pc_->size(), tot_point_num);
			
 
				     point2grid_.assign(valid_indices_in_pc_->size(), -1);
			
 
				 
			
 
				+    int64 time1 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				     for (size_t i = 0; i < valid_indices_in_pc_->size(); ++i)
			
 
				     {
			
 
				         int point_id = valid_indices_in_pc_->at(i);
			
@@ -84,6 +266,7 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
 
				         }
			
 
				     }
			
 
				 
			
 
				+    int64 time2 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				     // construct graph with center offset prediction and objectness
			
 
				     for (int row = 0; row < rows_; ++row)
			
 
				     {
			
@@ -103,6 +286,7 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
 
				         }
			
 
				     }
			
 
				 
			
 
				+    int64 time3 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				     // traverse nodes
			
 
				     for (int row = 0; row < rows_; ++row)
			
 
				     {
			
@@ -115,6 +299,8 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
 
				             }
			
 
				         }
			
 
				     }
			
 
				+
			
 
				+    int64 time4 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				     for (int row = 0; row < rows_; ++row)
			
 
				     {
			
 
				         for (int col = 0; col < cols_; ++col)
			
@@ -141,6 +327,7 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
 
				         }
			
 
				     }
			
 
				 
			
 
				+    int64 time5 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				     int count_obstacles = 0;
			
 
				     obstacles_.clear();
			
 
				     id_img_.assign(grids_, -1);
			
@@ -167,6 +354,10 @@ void Cluster2D::cluster(const caffe::Blob<float> &category_pt_blob,
 
				         }
			
 
				     }
			
 
				     CHECK_EQ(static_cast<size_t>(count_obstacles), obstacles_.size());
			
 
				+    int64 time6 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+
			
 
				+    std::cout<<" t1: "<<(time2 - time1)/1000<<" t2: "<<(time3 - time2)/1000<<" t3: "<<(time4 - time3)/1000
			
 
				+            <<" t4: "<<(time5 - time4)/1000<<" t5: "<<(time6 - time5)/1000<<std::endl;
			
 
				 }
			
 
				 
			
 
				 void Cluster2D::filter(const caffe::Blob<float> &confidence_pt_blob,
			
@@ -194,7 +385,10 @@ void Cluster2D::filter(const caffe::Blob<float> &confidence_pt_blob,
 
				 
			
 
				 void Cluster2D::classify(const caffe::Blob<float> &classify_pt_blob)
			
 
				 {
			
 
				+//    int64_t time1 = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
			
 
				     const float *classify_pt_data = classify_pt_blob.cpu_data();
			
 
				+//    int64_t time2 = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
			
 
				+//    std::cout<<"cpu data use: "<<(time2-time1)<<std::endl;
			
 
				     int num_classes = classify_pt_blob.channels();
			
 
				     CHECK_EQ(num_classes, MAX_META_TYPE);
			
 
				     for (size_t obs_id = 0; obs_id < obstacles_.size(); obs_id++)
			
--- a/src/detection/detection_lidar_cnn_segmentation/cluster2d.h
+++ b/src/detection/detection_lidar_cnn_segmentation/cluster2d.h
@@ -93,6 +93,10 @@ public:
 
				                const pcl::PointIndices &valid_indices,
			
 
				                float objectness_thresh, bool use_all_grids_for_clustering);
			
 
				 
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				   void filter(const caffe::Blob<float> &confidence_pt_blob,
			
 
				               const caffe::Blob<float> &height_pt_blob);
			
 
				 
			
@@ -102,6 +106,18 @@ public:
 
				                       const float height_thresh,
			
 
				                       const int min_pts_num,std::vector<Obstacle> & objvec);
			
 
				 
			
 
				+ void cluster(const float * category_pt_data,
			
 
				+              const float *instance_pt_x_data,
			
 
				+              const float *instance_pt_y_data,
			
 
				+              const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
			
 
				+              const pcl::PointIndices &valid_indices,
			
 
				+              float objectness_thresh, bool use_all_grids_for_clustering);
			
 
				+
			
 
				+ void filter(const float *confidence_pt_data,
			
 
				+             const float *height_pt_data);
			
 
				+
			
 
				+ void classify(const float *classify_pt_data,int num_classes );
			
 
				+
			
 
				 //  void getObjects(const float confidence_thresh,
			
 
				 //                  const float height_thresh,
			
 
				 //                  const int min_pts_num,
			
--- a/src/detection/detection_lidar_cnn_segmentation/cnn_segmentation.cpp
+++ b/src/detection/detection_lidar_cnn_segmentation/cnn_segmentation.cpp
@@ -1,7 +1,14 @@
 
				 #include "cnn_segmentation.h"
			
 
				 
			
 
				+
			
 
				+
			
 
				 CNNSegmentation::CNNSegmentation()
			
 
				 {
			
 
				+
			
 
				+    minferbuffer.mbNew = false;
			
 
				+    mbThreadRun = true;
			
 
				+    mcnnres.mbNew = false;
			
 
				+
			
 
				 }
			
 
				 
			
 
				 
			
@@ -80,15 +87,99 @@ bool CNNSegmentation::init(std::string proto_file,std::string weight_file,double
 
				      }
			
 
				 
			
 
				      feature_generator_.reset(new FeatureGenerator());
			
 
				-     if (!feature_generator_->init(feature_blob_.get(), use_constant_feature_))
			
 
				+     if (!feature_generator_->init(feature_blob_.get(), use_constant_feature_,range_,width_,height_))
			
 
				      {
			
 
				   //     ROS_ERROR("[%s] Fail to Initialize feature generator for CNNSegmentation", __APP_NAME__);
			
 
				        return false;
			
 
				      }
			
 
				 
			
 
				+
			
 
				+ int i;
			
 
				+ for(i=0;i<3;i++)
			
 
				+ {
			
 
				+     std::thread * pt = new std::thread(&CNNSegmentation::threadCluster,this);
			
 
				+     mpthreadv.push_back(pt);
			
 
				+ }
			
 
				+
			
 
				   return true;
			
 
				 }
			
 
				 
			
 
				+void CNNSegmentation::threadCluster()
			
 
				+{
			
 
				+    std::shared_ptr<Cluster2D> xcluster2d;
			
 
				+
			
 
				+    xcluster2d.reset(new Cluster2D());
			
 
				+    if (!xcluster2d->init(width_,height_,range_))
			
 
				+//    if (!cluster2d_->init(512,512,60))
			
 
				+    {
			
 
				+        std::cout<<" CNNSegmentation::threadCluster "<<" cluster2d init fail."<<std::endl;
			
 
				+ //     ROS_ERROR("[%s] Fail to Initialize cluster2d for CNNSegmentation", __APP_NAME__);
			
 
				+      return ;
			
 
				+    }
			
 
				+    float objectness_thresh = 0.5;
			
 
				+    bool use_all_grids_for_clustering = true;
			
 
				+    float confidence_thresh = score_threshold_;
			
 
				+    float height_thresh = 3.5;// 0.5;//3.5;//0.5;
			
 
				+    int min_pts_num = 3;
			
 
				+    while(mbThreadRun)
			
 
				+    {
			
 
				+        bool bNew = false;
			
 
				+        iv::Cluster2DData xCluster2DData;
			
 
				+        mmutexbuffer.lock();
			
 
				+        if(minferbuffer.mbNew)
			
 
				+        {
			
 
				+            xCluster2DData = minferbuffer.mData;
			
 
				+            minferbuffer.mbNew = false;
			
 
				+            bNew = true;
			
 
				+        }
			
 
				+        mmutexbuffer.unlock();
			
 
				+        if(bNew == false)
			
 
				+        {
			
 
				+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
			
 
				+            continue;
			
 
				+        }
			
 
				+
			
 
				+        xcluster2d->cluster(xCluster2DData.mcategory_pt_data_ptr.get(),
			
 
				+                             xCluster2DData.minstance_pt_x_data_ptr.get(),
			
 
				+                             xCluster2DData.minstance_pt_y_data_ptr.get(),
			
 
				+                             xCluster2DData.mpointcloud,
			
 
				+                             *xCluster2DData.mvalid_idx_ptr, objectness_thresh,
			
 
				+                             use_all_grids_for_clustering);
			
 
				+        xcluster2d->filter(xCluster2DData.mconfidence_pt_data_ptr.get(),xCluster2DData.mheight_pt_data_ptr.get());
			
 
				+        xcluster2d->classify(xCluster2DData.mclassify_pt_data_ptr.get(),xCluster2DData.mnum_classes);
			
 
				+
			
 
				+        int64 nnow = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
			
 
				+        std::cout<<"time "<<xCluster2DData.mPointCloudUpdateTime<<" infer use: "<<(xCluster2DData.mInferUpdateTime - xCluster2DData.mPointCloudUpdateTime)
			
 
				+                <<" cluster use: "<<(nnow - xCluster2DData.mInferUpdateTime)<<" total use:"<<(nnow - xCluster2DData.mPointCloudUpdateTime)<<std::endl;
			
 
				+//        xcluster2d_->filter(*confidence_pt_blob_, *height_pt_blob_);
			
 
				+//        xcluster2d_->classify(*class_pt_blob_);
			
 
				+
			
 
				+        std::vector<Obstacle> objvec;
			
 
				+        xcluster2d->getObjectVector(confidence_thresh, height_thresh, min_pts_num,objvec);
			
 
				+
			
 
				+        mmutexcnnres.lock();
			
 
				+        mcnnres.mobjvec = objvec;
			
 
				+        mcnnres.mcluster2ddata = xCluster2DData;
			
 
				+        mcnnres.mbNew = true;
			
 
				+        mmutexcnnres.unlock();
			
 
				+
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+int CNNSegmentation::GetRes(iv::cnnres & xres)
			
 
				+{
			
 
				+    int nrtn = 0;
			
 
				+    mmutexcnnres.lock();
			
 
				+    if(mcnnres.mbNew)
			
 
				+    {
			
 
				+        xres = mcnnres;
			
 
				+        mcnnres.mbNew = false;
			
 
				+        nrtn = 1;
			
 
				+    }
			
 
				+    mmutexcnnres.unlock();
			
 
				+    return nrtn;
			
 
				+}
			
 
				+
			
 
				 bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
			
 
				                               const pcl::PointIndices &valid_idx,
			
 
				                               std::vector<Obstacle> & objvec)
			
@@ -110,8 +201,13 @@ bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr
 
				 
			
 
				     int64_t time3 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				 
			
 
				+
			
 
				+
			
 
				     float objectness_thresh = 0.5;
			
 
				     bool use_all_grids_for_clustering = true;
			
 
				+
			
 
				+//    boost::shared_ptr<caffe::Blob<float>> xinstance_pt_blob = boost::shared_ptr<caffe::Blob<float>>(new caffe::Blob<float>());
			
 
				+//    xinstance_pt_blob->CopyFrom(*instance_pt_blob_);
			
 
				     cluster2d_->cluster(*category_pt_blob_, *instance_pt_blob_, pc_ptr,
			
 
				                         valid_idx, objectness_thresh,
			
 
				                         use_all_grids_for_clustering);
			
@@ -120,10 +216,13 @@ bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr
 
				     float confidence_thresh = score_threshold_;
			
 
				     float height_thresh = 3.5;// 0.5;//3.5;//0.5;
			
 
				     int min_pts_num = 3;
			
 
				+    int64_t time4 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				 
			
 
				     cluster2d_->getObjectVector(confidence_thresh, height_thresh, min_pts_num,objvec);
			
 
				 
			
 
				-    int64_t time4 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+
			
 
				+
			
 
				+
			
 
				 
			
 
				 
			
 
				     std::cout<<"infer time: "<<(time3-time2)/1000000<<" generate: "<<(time2-time1)/1000000<<" cluster: "<<(time4-time3)/1000000<<std::endl;
			
@@ -137,6 +236,94 @@ bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr
 
				 
			
 
				 }
			
 
				 
			
 
				+
			
 
				+bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
			
 
				+                              pcl::PointIndicesPtr & valid_idx_ptr)
			
 
				+{
			
 
				+
			
 
				+    int64 npctime = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
			
 
				+    if(use_gpu_)
			
 
				+    {
			
 
				+        caffe::Caffe::SetDevice(gpu_device_id_);
			
 
				+        caffe::Caffe::set_mode(caffe::Caffe::GPU);
			
 
				+    }
			
 
				+
			
 
				+    int64_t time1 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+
			
 
				+    feature_generator_->generate(pc_ptr);
			
 
				+
			
 
				+    int64_t time2 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+
			
 
				+    caffe_net_->Forward();
			
 
				+
			
 
				+    int64_t time3 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+
			
 
				+
			
 
				+
			
 
				+    float objectness_thresh = 0.5;
			
 
				+    bool use_all_grids_for_clustering = true;
			
 
				+
			
 
				+
			
 
				+    iv::Cluster2DData xCluster2DData;
			
 
				+
			
 
				+    int nSize = width_*height_;
			
 
				+    xCluster2DData.mcategory_pt_data_ptr = std::shared_ptr<float>(new float[nSize]);
			
 
				+    xCluster2DData.minstance_pt_x_data_ptr = std::shared_ptr<float>(new float[nSize]);
			
 
				+    xCluster2DData.minstance_pt_y_data_ptr = std::shared_ptr<float>(new float[nSize]);
			
 
				+    xCluster2DData.mconfidence_pt_data_ptr = std::shared_ptr<float>(new float[nSize]);
			
 
				+    xCluster2DData.mheight_pt_data_ptr = std::shared_ptr<float>(new float[nSize]);
			
 
				+    xCluster2DData.mnum_classes = class_pt_blob_->channels();
			
 
				+    xCluster2DData.mclassify_pt_data_ptr = std::shared_ptr<float>(new float[nSize * xCluster2DData.mnum_classes]);
			
 
				+
			
 
				+    int64_t time4 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+
			
 
				+    memcpy(xCluster2DData.mcategory_pt_data_ptr.get(),category_pt_blob_->cpu_data(),nSize * sizeof(float) );
			
 
				+    memcpy(xCluster2DData.minstance_pt_x_data_ptr.get(),instance_pt_blob_->cpu_data(),nSize* sizeof(float));
			
 
				+    memcpy(xCluster2DData.minstance_pt_y_data_ptr.get(),instance_pt_blob_->cpu_data()+instance_pt_blob_->offset(0, 1),nSize* sizeof(float));
			
 
				+    memcpy(xCluster2DData.mconfidence_pt_data_ptr.get(),confidence_pt_blob_->cpu_data(),nSize* sizeof(float));
			
 
				+    memcpy(xCluster2DData.mheight_pt_data_ptr.get(),height_pt_blob_->cpu_data(),nSize* sizeof(float));
			
 
				+    memcpy(xCluster2DData.mclassify_pt_data_ptr.get(),class_pt_blob_->cpu_data(),nSize*xCluster2DData.mnum_classes* sizeof(float));
			
 
				+    xCluster2DData.mInferUpdateTime = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
			
 
				+    xCluster2DData.mpointcloud = pc_ptr;
			
 
				+    xCluster2DData.mvalid_idx_ptr = valid_idx_ptr;
			
 
				+    xCluster2DData.mPointCloudUpdateTime = npctime;
			
 
				+
			
 
				+    int64_t time5 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+
			
 
				+    mmutexbuffer.lock();
			
 
				+    minferbuffer.mData = xCluster2DData;
			
 
				+    minferbuffer.mbNew = true;
			
 
				+    mmutexbuffer.unlock();
			
 
				+
			
 
				+    std::cout<<" feature use: "<<(time2-time1)/1000000<<" infer use: "<<(time3 - time2)/1000000
			
 
				+            <<" copy use:"<<(time5-time4)/1000000<<std::endl;
			
 
				+
			
 
				+//    cluster2d_->cluster(*category_pt_blob_, *instance_pt_blob_, pc_ptr,
			
 
				+//                        valid_idx, objectness_thresh,
			
 
				+//                        use_all_grids_for_clustering);
			
 
				+//    cluster2d_->filter(*confidence_pt_blob_, *height_pt_blob_);
			
 
				+//    cluster2d_->classify(*class_pt_blob_);
			
 
				+//    float confidence_thresh = score_threshold_;
			
 
				+//    float height_thresh = 3.5;// 0.5;//3.5;//0.5;
			
 
				+//    int min_pts_num = 3;
			
 
				+
			
 
				+//    cluster2d_->getObjectVector(confidence_thresh, height_thresh, min_pts_num,objvec);
			
 
				+
			
 
				+//    int64_t time4 = std::chrono::system_clock::now().time_since_epoch().count();
			
 
				+
			
 
				+
			
 
				+ //   std::cout<<"infer time: "<<(time3-time2)/1000000<<" generate: "<<(time2-time1)/1000000<<" cluster: "<<(time4-time3)/1000000<<std::endl;
			
 
				+
			
 
				+//    cluster2d_->getObjects(confidence_thresh, height_thresh, min_pts_num,
			
 
				+//                           objects, message_header_);
			
 
				+
			
 
				+
			
 
				+
			
 
				+    return true;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				 //bool CNNSegmentation::segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
			
 
				 //                              const pcl::PointIndices &valid_idx,
			
 
				 //                              rockauto_msgs::DetectedObjectArray &objects)
			
--- a/src/detection/detection_lidar_cnn_segmentation/cnn_segmentation.h
+++ b/src/detection/detection_lidar_cnn_segmentation/cnn_segmentation.h
@@ -26,6 +26,9 @@
 
				 
			
 
				 #define __APP_NAME__ "lidar_cnn_seg_detect"
			
 
				 
			
 
				+
			
 
				+#include <thread>
			
 
				+
			
 
				 struct CellStat
			
 
				 {
			
 
				   CellStat() : point_num(0), valid_point_num(0)
			
@@ -36,6 +39,47 @@ struct CellStat
 
				   int valid_point_num;
			
 
				 };
			
 
				 
			
 
				+
			
 
				+namespace iv {
			
 
				+struct Cluster2DData
			
 
				+{
			
 
				+    std::shared_ptr<float>  mcategory_pt_data_ptr;
			
 
				+    std::shared_ptr<float>  minstance_pt_x_data_ptr;
			
 
				+    std::shared_ptr<float>  minstance_pt_y_data_ptr;
			
 
				+    std::shared_ptr<float>  mconfidence_pt_data_ptr;
			
 
				+    std::shared_ptr<float>  mheight_pt_data_ptr;
			
 
				+    std::shared_ptr<float>  mclassify_pt_data_ptr;
			
 
				+    int mnum_classes;
			
 
				+    int64 mPointCloudUpdateTime; //ms since epoch
			
 
				+    int64 mInferUpdateTime; //ms since epoch
			
 
				+    pcl::PointCloud<pcl::PointXYZI>::Ptr mpointcloud;
			
 
				+    pcl::PointIndicesPtr mvalid_idx_ptr;
			
 
				+
			
 
				+};
			
 
				+
			
 
				+}
			
 
				+
			
 
				+namespace iv {
			
 
				+struct pcbuffer
			
 
				+{
			
 
				+    pcl::PointCloud<pcl::PointXYZI>::Ptr mpointcloud;
			
 
				+    bool mbNew;
			
 
				+    int64 mPointCloudUpdateTime; //ms since epoch
			
 
				+};
			
 
				+struct inferresbuffer
			
 
				+{
			
 
				+    Cluster2DData mData;
			
 
				+    bool mbNew;
			
 
				+};
			
 
				+
			
 
				+struct cnnres
			
 
				+{
			
 
				+    std::vector<Obstacle> mobjvec;
			
 
				+    Cluster2DData mcluster2ddata;
			
 
				+    bool mbNew;
			
 
				+};
			
 
				+}
			
 
				+
			
 
				 class CNNSegmentation
			
 
				 {
			
 
				 public:
			
@@ -78,6 +122,20 @@ private:
 
				   // bird-view raw feature generator
			
 
				   std::shared_ptr<FeatureGenerator> feature_generator_;
			
 
				 
			
 
				+private:
			
 
				+  void threadCluster();
			
 
				+
			
 
				+  iv::inferresbuffer minferbuffer;
			
 
				+  std::mutex mmutexbuffer;
			
 
				+
			
 
				+  iv::cnnres mcnnres;
			
 
				+  std::mutex mmutexcnnres;
			
 
				+
			
 
				+  bool mbThreadRun;
			
 
				+
			
 
				+  std::vector<std::thread *> mpthreadv;
			
 
				+
			
 
				+
			
 
				 
			
 
				 public:
			
 
				   bool init(  std::string proto_file,std::string weight_file,double rangemax,double score,
			
@@ -86,6 +144,10 @@ public:
 
				   bool segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
			
 
				                const pcl::PointIndices &valid_idx,
			
 
				                std::vector<Obstacle> & objvec);
			
 
				+  bool segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
			
 
				+               pcl::PointIndicesPtr & valid_idx_ptr);
			
 
				+
			
 
				+  int GetRes(iv::cnnres & xres);
			
 
				 
			
 
				 
			
 
				 //  bool segment(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,
			
--- a/src/detection/detection_lidar_cnn_segmentation/detection_lidar_cnn_segmentation.xml
+++ b/src/detection/detection_lidar_cnn_segmentation/detection_lidar_cnn_segmentation.xml
@@ -0,0 +1,11 @@
 
				+<xml>	
			
 
				+	<node name="detection_ndt_matching_gpu_multi.xml">
			
 
				+		<param name="prototxt" value="/home/nvidia/models/lidar/model.prototxt" />
			
 
				+		<param name="caffemodel" value="/home/nvidia/models/lidar/model.caffemodel" />
			
 
				+		<param name="input" value="lidarpc_center" />
			
 
				+		<param name="output" value="lidar_track" />
			
 
				+		<param name="range" value="30.0" />
			
 
				+		<param name="width" value="512" />
			
 
				+		<param name="height" value="512" />
			
 
				+	</node>
			
 
				+</xml>
			
--- a/src/detection/detection_lidar_cnn_segmentation/feature_generator.cpp
+++ b/src/detection/detection_lidar_cnn_segmentation/feature_generator.cpp
@@ -1,14 +1,14 @@
 
				 #include "feature_generator.h"
			
 
				 
			
 
				 
			
 
				-bool FeatureGenerator::init(caffe::Blob<float>* out_blob, bool use_constant_feature)
			
 
				+bool FeatureGenerator::init(caffe::Blob<float>* out_blob, bool use_constant_feature,double range,int width,int height)
			
 
				 {
			
 
				   out_blob_ = out_blob;
			
 
				 
			
 
				   // raw feature parameters
			
 
				-  range_ = 40;
			
 
				-  width_ = 512;
			
 
				-  height_ = 512;
			
 
				+  range_ = range;
			
 
				+  width_ = width;
			
 
				+  height_ = height;
			
 
				   min_height_ = -5.0;
			
 
				   max_height_ = 5.0;
			
 
				   CHECK_EQ(width_, height_)
			
--- a/src/detection/detection_lidar_cnn_segmentation/feature_generator.h
+++ b/src/detection/detection_lidar_cnn_segmentation/feature_generator.h
@@ -16,7 +16,7 @@ public:
 
				   FeatureGenerator(){}
			
 
				   ~FeatureGenerator(){}
			
 
				 
			
 
				-  bool init(caffe::Blob<float>* out_blob, bool use_constant_feature);
			
 
				+  bool init(caffe::Blob<float>* out_blob, bool use_constant_feature,double range,int width,int height);
			
 
				   void generate(
			
 
				       const pcl::PointCloud<pcl::PointXYZI>::Ptr& pc_ptr);
			
 
				 private:
			
--- a/src/detection/detection_lidar_cnn_segmentation/main.cpp
+++ b/src/detection/detection_lidar_cnn_segmentation/main.cpp
@@ -19,6 +19,12 @@
 
				 //#include "alog.h"
			
 
				 
			
 
				 CNNSegmentation gcnn;
			
 
				+CNNSegmentation gcnnv[10];
			
 
				+
			
 
				+
			
 
				+
			
 
				+iv::pcbuffer gpcbuffer;
			
 
				+std::mutex gmutexpcbuffer;
			
 
				 
			
 
				 void * gpa;
			
 
				 void * gpdetect;
			
@@ -30,6 +36,8 @@ std::thread * gpthread;
 
				 std::string gstrinput;
			
 
				 std::string gstroutput;
			
 
				 
			
 
				+bool gbUseMultiCNN = false;
			
 
				+
			
 
				 static iv::lidar::PointXYZ pcltoprotopoint(pcl::PointXYZ x)
			
 
				 {
			
 
				     iv::lidar::PointXYZ lx;
			
@@ -245,48 +253,117 @@ static void GetLidarObj(std::vector<Obstacle> objvec,iv::lidar::objectarray & li
 
				 void DectectOnePCD(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr)
			
 
				 {
			
 
				     std::cout<<"pcd size is "<<pc_ptr->size()<<std::endl;
			
 
				-    pcl::PointIndices valid_idx;
			
 
				-    auto &indices = valid_idx.indices;
			
 
				-    indices.resize(pc_ptr->size());
			
 
				-    std::iota(indices.begin(), indices.end(), 0);
			
 
				 
			
 
				-    std::vector<Obstacle> objvec;
			
 
				 
			
 
				-    gcnn.segment(pc_ptr, valid_idx, objvec);
			
 
				-    givlog->verbose("obj size is %d", objvec.size());
			
 
				-    std::cout<<"obj size is "<<objvec.size()<<std::endl;
			
 
				+    bool bUseMutliCluster = true;
			
 
				 
			
 
				-//    std::vector<iv::lidar::lidarobject> lidarobjvec;
			
 
				-    iv::lidar::objectarray lidarobjvec;
			
 
				-    GetLidarObj(objvec,lidarobjvec);
			
 
				+    if(bUseMutliCluster == false)
			
 
				+    {
			
 
				+        pcl::PointIndices valid_idx;
			
 
				+        auto &indices = valid_idx.indices;
			
 
				+        indices.resize(pc_ptr->size());
			
 
				+        std::iota(indices.begin(), indices.end(), 0);
			
 
				+        std::vector<Obstacle> objvec;
			
 
				+        gcnn.segment(pc_ptr, valid_idx, objvec);
			
 
				+        givlog->verbose("obj size is %d", objvec.size());
			
 
				+        std::cout<<"obj size is "<<objvec.size()<<std::endl;
			
 
				+
			
 
				+        //    std::vector<iv::lidar::lidarobject> lidarobjvec;
			
 
				+        iv::lidar::objectarray lidarobjvec;
			
 
				+        GetLidarObj(objvec,lidarobjvec);
			
 
				+
			
 
				+        double timex = pc_ptr->header.stamp;
			
 
				+        timex = timex/1000.0;
			
 
				+        lidarobjvec.set_timestamp(pc_ptr->header.stamp);
			
 
				+
			
 
				+        int ntlen;
			
 
				+        std::string out = lidarobjvec.SerializeAsString();
			
 
				+        //   char * strout = lidarobjtostr(lidarobjvec,ntlen);
			
 
				+        iv::modulecomm::ModuleSendMsg(gpdetect,out.data(),out.length());
			
 
				+        givlog->verbose("lenth is %d",out.length());
			
 
				+        //  delete strout;
			
 
				+
			
 
				+        //    int i;
			
 
				+        //    for(i=0;i<objvec.size();i++)
			
 
				+        //    {
			
 
				+        //        Obstacle x;
			
 
				+        //        x = objvec.at(i);
			
 
				+
			
 
				+        //        std::cout<<"obj "<<i<<" x is "<<x.GetTypeString()<<std::endl;
			
 
				+
			
 
				+        //        iv::lidar::lidarobject y;
			
 
				+        //        y = lidarobjvec.at(i);
			
 
				+        //        std::cout<<"     "<<" x is"<<y.position.x<<" y is "<<y.position.y<<" len x is "<<y.dimensions.x<<" len y is "<<y.dimensions.y<<std::endl;
			
 
				+        //        std::cout<<"     "<<" type is "<<y.mnType<<std::endl;
			
 
				+        //    }
			
 
				 
			
 
				-    double timex = pc_ptr->header.stamp;
			
 
				-    timex = timex/1000.0;
			
 
				-    lidarobjvec.set_timestamp(pc_ptr->header.stamp);
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        pcl::PointIndicesPtr valid_idx_ptr = boost::shared_ptr<pcl::PointIndices>(new pcl::PointIndices());
			
 
				+        auto &indices = valid_idx_ptr->indices;
			
 
				+        indices.resize(pc_ptr->size());
			
 
				+        std::iota(indices.begin(), indices.end(), 0);
			
 
				+        gcnn.segment(pc_ptr, valid_idx_ptr);
			
 
				+    }
			
 
				+}
			
 
				 
			
 
				-    int ntlen;
			
 
				-    std::string out = lidarobjvec.SerializeAsString();
			
 
				- //   char * strout = lidarobjtostr(lidarobjvec,ntlen);
			
 
				-    iv::modulecomm::ModuleSendMsg(gpdetect,out.data(),out.length());
			
 
				-    givlog->verbose("lenth is %d",out.length());
			
 
				-  //  delete strout;
			
 
				 
			
 
				-//    int i;
			
 
				-//    for(i=0;i<objvec.size();i++)
			
 
				-//    {
			
 
				-//        Obstacle x;
			
 
				-//        x = objvec.at(i);
			
 
				+void threadgetres()
			
 
				+{
			
 
				+    while(1)
			
 
				+    {
			
 
				+        iv::cnnres xcnnres;
			
 
				+        int nrtn = gcnn.GetRes(xcnnres);
			
 
				+        if(nrtn == 1)
			
 
				+        {
			
 
				+            iv::lidar::objectarray lidarobjvec;
			
 
				+            GetLidarObj(xcnnres.mobjvec,lidarobjvec);
			
 
				+            std::string out = lidarobjvec.SerializeAsString();
			
 
				+            //   char * strout = lidarobjtostr(lidarobjvec,ntlen);
			
 
				+            iv::modulecomm::ModuleSendMsg(gpdetect,out.data(),out.length());
			
 
				+
			
 
				+            int64 now =  std::chrono::system_clock::now().time_since_epoch().count()/1000000;
			
 
				+            std::cout<<" pc time : "<<xcnnres.mcluster2ddata.mPointCloudUpdateTime<<" cnn use: "<<(now - xcnnres.mcluster2ddata.mPointCloudUpdateTime)
			
 
				+                    <<" obj size: "<<xcnnres.mobjvec.size()<<std::endl;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				 
			
 
				-//        std::cout<<"obj "<<i<<" x is "<<x.GetTypeString()<<std::endl;
			
 
				+void DectectOnePCD(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr,CNNSegmentation * pcnn)
			
 
				+{
			
 
				+    std::cout<<"pcd size is "<<pc_ptr->size()<<std::endl;
			
 
				 
			
 
				-//        iv::lidar::lidarobject y;
			
 
				-//        y = lidarobjvec.at(i);
			
 
				-//        std::cout<<"     "<<" x is"<<y.position.x<<" y is "<<y.position.y<<" len x is "<<y.dimensions.x<<" len y is "<<y.dimensions.y<<std::endl;
			
 
				-//        std::cout<<"     "<<" type is "<<y.mnType<<std::endl;
			
 
				-//    }
			
 
				-}
			
 
				 
			
 
				 
			
 
				+        pcl::PointIndices valid_idx;
			
 
				+        auto &indices = valid_idx.indices;
			
 
				+        indices.resize(pc_ptr->size());
			
 
				+        std::iota(indices.begin(), indices.end(), 0);
			
 
				+        std::vector<Obstacle> objvec;
			
 
				+        pcnn->segment(pc_ptr, valid_idx, objvec);
			
 
				+        givlog->verbose("obj size is %d", objvec.size());
			
 
				+        std::cout<<"obj size is "<<objvec.size()<<std::endl;
			
 
				+
			
 
				+        //    std::vector<iv::lidar::lidarobject> lidarobjvec;
			
 
				+        iv::lidar::objectarray lidarobjvec;
			
 
				+        GetLidarObj(objvec,lidarobjvec);
			
 
				+
			
 
				+        double timex = pc_ptr->header.stamp;
			
 
				+        timex = timex/1000.0;
			
 
				+        lidarobjvec.set_timestamp(pc_ptr->header.stamp);
			
 
				+
			
 
				+        int ntlen;
			
 
				+        std::string out = lidarobjvec.SerializeAsString();
			
 
				+        //   char * strout = lidarobjtostr(lidarobjvec,ntlen);
			
 
				+        iv::modulecomm::ModuleSendMsg(gpdetect,out.data(),out.length());
			
 
				+        givlog->verbose("lenth is %d",out.length());
			
 
				+}
			
 
				+
			
 
				 int gnothavedatatime = 0;
			
 
				 
			
 
				 void ListenPointCloud(const char * strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
			
@@ -341,12 +418,62 @@ void ListenPointCloud(const char * strdata,const unsigned int nSize,const unsign
 
				 
			
 
				 //    std::cout<<"pcl time is "<<xTime.elapsed()<<std::endl;
			
 
				 
			
 
				-    DectectOnePCD(point_cloud);
			
 
				-    std::cout<<"time is "<<(QDateTime::currentMSecsSinceEpoch() % 1000)<<" "<<xTime.elapsed()<<std::endl;
			
 
				+    bool bUseMultiCNN = gbUseMultiCNN;
			
 
				+    if(bUseMultiCNN)
			
 
				+    {
			
 
				+        gmutexpcbuffer.lock();
			
 
				+        gpcbuffer.mbNew = true;
			
 
				+        gpcbuffer.mpointcloud = point_cloud;
			
 
				+        gpcbuffer.mPointCloudUpdateTime = std::chrono::system_clock::now().time_since_epoch().count()/1000000;
			
 
				+        gmutexpcbuffer.unlock();
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        DectectOnePCD(point_cloud);
			
 
				+    }
			
 
				+
			
 
				+//    std::cout<<"time is "<<(QDateTime::currentMSecsSinceEpoch() % 1000)<<" "<<xTime.elapsed()<<std::endl;
			
 
				     gfault->SetFaultState(0, 0, "ok");
			
 
				 
			
 
				 }
			
 
				 
			
 
				+void threadcnn(std::string protofile,std::string weightfile,double range,int width,int height)
			
 
				+{
			
 
				+    CNNSegmentation cnn;
			
 
				+    cnn.init(protofile,weightfile,60.0,0.6,1024,1024,false,true,0);
			
 
				+
			
 
				+    pcl::PointCloud<pcl::PointXYZI>::Ptr point_cloud;
			
 
				+
			
 
				+    while(1)
			
 
				+    {
			
 
				+        bool bNew = false;
			
 
				+        int64 npctime;
			
 
				+        gmutexpcbuffer.lock();
			
 
				+        if(gpcbuffer.mbNew)
			
 
				+        {
			
 
				+            gpcbuffer.mbNew = false;
			
 
				+            bNew = true;
			
 
				+            point_cloud = gpcbuffer.mpointcloud;
			
 
				+            npctime = gpcbuffer.mPointCloudUpdateTime;
			
 
				+        }
			
 
				+        gmutexpcbuffer.unlock();
			
 
				+
			
 
				+        if(bNew == false)
			
 
				+        {
			
 
				+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
			
 
				+            continue;
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+        DectectOnePCD(point_cloud,&cnn);
			
 
				+        std::cout<<"pc time: is "<<npctime<<" res use time: "<<(std::chrono::system_clock::now().time_since_epoch().count()/1000000 - npctime)<<std::endl;
			
 
				+
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				 bool gbstate = true;
			
 
				 void statethread()
			
 
				 {
			
@@ -449,9 +576,37 @@ int main(int argc, char *argv[])
 
				     gstrinput = xparam.GetParam("input","lidar_pc");
			
 
				     gstroutput = xparam.GetParam("output","lidar_cnn_dectect");
			
 
				 
			
 
				+    double range,width,height;
			
 
				+    range = xparam.GetParam("range",60.0);
			
 
				+    width = xparam.GetParam("width",1024);
			
 
				+    height = xparam.GetParam("height",1024);
			
 
				+    int ncnn = xparam.GetParam("ncnn",2);
			
 
				+
			
 
				 //    std::string protofile = "/home/yuchuli/qt/bq/models/lidar/model.prototxt";
			
 
				 //    std::string weightfile = "/home/yuchuli/qt/bq/models/lidar/model.caffemodel";
			
 
				-    gcnn.init(protofile,weightfile,60.0,0.6,512,512,false,true,0);
			
 
				+    gcnn.init(protofile,weightfile,range,0.6,width,height,false,true,0);
			
 
				+
			
 
				+//    int i;
			
 
				+//    for(i=0;i<3;i++)
			
 
				+//    {
			
 
				+//        gcnnv[i].init(protofile,weightfile,60.0,0.6,1024,1024,false,true,0);
			
 
				+
			
 
				+//    }
			
 
				+
			
 
				+    std::thread * pthreadv[10];
			
 
				+      if(gbUseMultiCNN)
			
 
				+      {
			
 
				+          int i;
			
 
				+          if(ncnn>10)ncnn = 10;
			
 
				+          for(i=0;i<ncnn;i++)
			
 
				+          {
			
 
				+              pthreadv[i] = new std::thread(threadcnn,protofile,weightfile,range,width,height);
			
 
				+          }
			
 
				+      }
			
 
				+
			
 
				+
			
 
				+      std::thread * pthreadsend = new std::thread(threadgetres);
			
 
				+
			
 
				 
			
 
				 
			
 
				 
			
--- a/src/detection/detection_lidar_cnn_segmentation_trt/src/detector.cpp
+++ b/src/detection/detection_lidar_cnn_segmentation_trt/src/detector.cpp
@@ -30,9 +30,9 @@ LidarApolloInstanceSegmentation::LidarApolloInstanceSegmentation()
 
				   std::string prototxt_file;
			
 
				   std::string caffemodel_file;
			
 
				   score_threshold_ = 0.4;//node_->declare_parameter("score_threshold", 0.8);
			
 
				-  range = 60;//node_->declare_parameter("range", 60);
			
 
				-  width = 640;// 640;//node_->declare_parameter("width", 640);
			
 
				-  height = 640; //640;//node_->declare_parameter("height", 640);
			
 
				+  range = 30;//node_->declare_parameter("range", 60);
			
 
				+  width = 512;// 640;//node_->declare_parameter("width", 640);
			
 
				+  height = 512; //640;//node_->declare_parameter("height", 640);
			
 
				   engine_file =  "deploy.engine";//node_->declare_parameter("engine_file", "vls-128.engine");
			
 
				   prototxt_file = "hdl-64.prototxt";//node_->declare_parameter("prototxt_file", "vls-128.prototxt");
			
 
				   caffemodel_file = "deploy.caffemodel";// node_->declare_parameter("caffemodel_file", "vls-128.caffemodel");
			
--- a/src/driver/driver_map_xodrload/driver_map_xodrload.xml
+++ b/src/driver/driver_map_xodrload/driver_map_xodrload.xml
@@ -2451,21 +2451,42 @@ static void AddSignalMark(Road * pRoad, int nlane,std::vector<PlanPoint> & xvect
 
				         Signal * pSig = pRoad->GetSignal(i);
			
 
				         int nfromlane = -100;
			
 
				         int ntolane = 100;
			
 
				-        signal_laneValidity * pSig_laneValidity = pSig->GetlaneValidity();
			
 
				-        if(pSig_laneValidity != 0)
			
 
				-        {
			
 
				-            nfromlane = pSig_laneValidity->GetfromLane();
			
 
				-            ntolane = pSig_laneValidity->GettoLane();
			
 
				-        }
			
 
				-        if((nlane < 0)&&(nfromlane >= 0))
			
 
				-        {
			
 
				-            continue;
			
 
				-        }
			
 
				-        if((nlane > 0)&&(ntolane<=0))
			
 
				+
			
 
				+        if(pSig->GetlaneValidityCount()>0)
			
 
				         {
			
 
				-            continue;
			
 
				+            bool bValid = false;
			
 
				+            unsigned int j;
			
 
				+            std::vector<signal_laneValidity> * pvectorlanevalid = pSig->GetlaneValidityVector();
			
 
				+            unsigned int nsize = static_cast<unsigned int>(pvectorlanevalid->size());
			
 
				+            for(j=0;j<nsize;j++)
			
 
				+            {
			
 
				+                signal_laneValidity * pSig_laneValidity = &pvectorlanevalid->at(j);
			
 
				+                nfromlane = pSig_laneValidity->GetfromLane();
			
 
				+                ntolane = pSig_laneValidity->GettoLane();
			
 
				+                if((nlane >= nfromlane)&&(nlane<=ntolane))
			
 
				+                {
			
 
				+                    bValid = true;
			
 
				+                    break;
			
 
				+                }
			
 
				+            }
			
 
				+            if(bValid == false)continue;
			
 
				         }
			
 
				 
			
 
				+//        signal_laneValidity * pSig_laneValidity = pSig->GetlaneValidity();
			
 
				+//        if(pSig_laneValidity != 0)
			
 
				+//        {
			
 
				+//            nfromlane = pSig_laneValidity->GetfromLane();
			
 
				+//            ntolane = pSig_laneValidity->GettoLane();
			
 
				+//        }
			
 
				+//        if((nlane < 0)&&(nfromlane >= 0))
			
 
				+//        {
			
 
				+//            continue;
			
 
				+//        }
			
 
				+//        if((nlane > 0)&&(ntolane<=0))
			
 
				+//        {
			
 
				+//            continue;
			
 
				+//        }
			
 
				+
			
 
				         double s = pSig->Gets();
			
 
				         double fpos = s/pRoad->GetRoadLength();
			
 
				         if(nlane > 0)fpos = 1.0 -fpos;
			
@@ -2779,18 +2800,20 @@ void ChangeLane(std::vector<PlanPoint> & xvectorPP)
 
				         else
			
 
				         {
			
 
				             int k;
			
 
				-            for(k=i;k<(nsize-1);k++)
			
 
				+            for(k=i;k<=(nsize-1);k++)
			
 
				             {
			
 
				-                if((xvectorPP[k].mfSecx == xvectorPP[k].x)&&(xvectorPP[k].mfSecy == xvectorPP[k].y))
			
 
				+                if((fabs(xvectorPP[k].mfSecx - xvectorPP[k].x)<0.001)&&(fabs(xvectorPP[k].mfSecy - xvectorPP[k].y)<0.001))
			
 
				                 {
			
 
				                     break;
			
 
				                 }
			
 
				             }
			
 
				+            if(k>(nsize-1))k=nsize-1;
			
 
				             int nnum = k-i;
			
 
				             int nchangepoint = 300;
			
 
				             double froadlen = sqrt(pow(xvectorPP[k].x - xvectorPP[i].x,2)
			
 
				                                    +pow(xvectorPP[k].y - xvectorPP[i].y,2));
			
 
				             const double fMAXCHANGE = 50;
			
 
				+
			
 
				             if(froadlen<fMAXCHANGE)
			
 
				             {
			
 
				                 nchangepoint = nnum;
			
@@ -2817,23 +2840,29 @@ void ChangeLane(std::vector<PlanPoint> & xvectorPP)
 
				                 xvectorPP[j].y = xvectorPP[j].mfSecy;
			
 
				             }
			
 
				             nnum = nend - nstart;
			
 
				-            for(j=nstart;j<nend;j++)
			
 
				+            int nlast = k-1;
			
 
				+            if(k==(nsize-1))nlast = k;
			
 
				+            for(j=nstart;j<=nlast;j++)
			
 
				             {
			
 
				                 double fdis = sqrt(pow(xvectorPP[j].x - xvectorPP[j].mfSecx,2)
			
 
				                                    +pow(xvectorPP[j].y - xvectorPP[j].mfSecy,2));
			
 
				-                double foff = fdis *(j-nstart)/nnum;
			
 
				+                double foff = 0;
			
 
				+                if(j<nend)
			
 
				+                    foff = fdis *(j-nstart)/nnum;
			
 
				+                else
			
 
				+                    foff = fdis;
			
 
				                 if(xvectorPP[j].nlrchange == 1)
			
 
				                 {
			
 
				 //                    std::cout<<"foff is "<<foff<<std::endl;
			
 
				                     xvectorPP[j].x = xvectorPP[j].mfSecx + foff *cos(xvectorPP[j].hdg + M_PI/2.0);
			
 
				                     xvectorPP[j].y = xvectorPP[j].mfSecy + foff *sin(xvectorPP[j].hdg + M_PI/2.0);
			
 
				-                    xvectorPP[j].mfDisToRoadLeft = xvectorPP[j].mfDisToRoadLeft + fdis - foff;
			
 
				+                    xvectorPP[j].mfDisToRoadLeft = xvectorPP[j].mfDisToRoadLeft - foff; //+ fdis
			
 
				                 }
			
 
				                 else
			
 
				                 {
			
 
				                     xvectorPP[j].x = xvectorPP[j].mfSecx + foff *cos(xvectorPP[j].hdg - M_PI/2.0);
			
 
				                     xvectorPP[j].y = xvectorPP[j].mfSecy + foff *sin(xvectorPP[j].hdg - M_PI/2.0);
			
 
				-                    xvectorPP[j].mfDisToRoadLeft = xvectorPP[j].mfDisToRoadLeft  - fdis +foff;
			
 
				+                    xvectorPP[j].mfDisToRoadLeft = xvectorPP[j].mfDisToRoadLeft  +foff;//- fdis
			
 
				                 }
			
 
				             }
			
 
				             i =k;
			
--- a/src/driver/driver_map_xodrload/main.cpp
+++ b/src/driver/driver_map_xodrload/main.cpp
@@ -860,6 +860,14 @@ void SetPlan(xodrobj xo)
 
				     xfile_1.setFileName(strpath_1);
			
 
				     xfile_1.open(QIODevice::ReadWrite);
			
 
				 
			
 
				+    double fpointdis = 0.1;
			
 
				+    if(nSize > 2)
			
 
				+    {
			
 
				+        fpointdis = sqrt(pow(xPlan[0].x - xPlan[1].x,2)+pow(xPlan[0].y - xPlan[1].y,2));
			
 
				+    }
			
 
				+
			
 
				+    if(fpointdis < 0.1)fpointdis = 0.1;
			
 
				+
			
 
				     for(i=0;i<nSize;i++)
			
 
				     {
			
 
				         iv::map::mappoint * pmappoint =  xtrace.add_point();
			
@@ -907,15 +915,27 @@ void SetPlan(xodrobj xo)
 
				         iv::GPSData data(new iv::GPS_INS);
			
 
				         data->roadMode = 0;
			
 
				 
			
 
				- //       std::cout<<"i:"<<i<<" lr:"<<xPlan[i].nlrchange<<std::endl;
			
 
				-        if(xPlan[i].nlrchange == 1)
			
 
				+        static int nChangeCount = 0;
			
 
				+        static int nlrmode = -1;
			
 
				+
			
 
				+        if(nChangeCount == 0)
			
 
				         {
			
 
				-            data->roadMode = 14;
			
 
				+
			
 
				         }
			
 
				-        if(xPlan[i].nlrchange == 2)
			
 
				+        else
			
 
				         {
			
 
				-            data->roadMode = 15;
			
 
				+            data->roadMode = nlrmode;
			
 
				+            nChangeCount--;
			
 
				         }
			
 
				+ //       std::cout<<"i:"<<i<<" lr:"<<xPlan[i].nlrchange<<std::endl;
			
 
				+//        if(xPlan[i].nlrchange == 1)
			
 
				+//        {
			
 
				+//            data->roadMode = 14;
			
 
				+//        }
			
 
				+//        if(xPlan[i].nlrchange == 2)
			
 
				+//        {
			
 
				+//            data->roadMode = 15;
			
 
				+//        }
			
 
				 
			
 
				         if(i>50)
			
 
				         {
			
@@ -934,6 +954,32 @@ void SetPlan(xodrobj xo)
 
				                     }
			
 
				                     if(j <=0)break;
			
 
				                 }
			
 
				+                int nChangeCount = 0;
			
 
				+                for(j=i;j<(nSize-1);j++)
			
 
				+                {
			
 
				+                    if((xPlan[j].nlrchange != 0)&&(xPlan[j+1].nlrchange != 0))
			
 
				+                    {
			
 
				+                        nChangeCount++;
			
 
				+                    }
			
 
				+                }
			
 
				+                if(nChangeCount < static_cast<int>(10.0/fpointdis))   //Fast Change road.
			
 
				+                {
			
 
				+                    nChangeCount = nChangeCount + static_cast<int>(3.0/fpointdis);
			
 
				+                }
			
 
				+                else
			
 
				+                {
			
 
				+                    nChangeCount =  static_cast<int>(5.0/fpointdis);
			
 
				+                }
			
 
				+
			
 
				+                    if(xPlan[i].nlrchange == 1)
			
 
				+                    {
			
 
				+                        nlrmode = 14;
			
 
				+                    }
			
 
				+                    if(xPlan[i].nlrchange == 2)
			
 
				+                    {
			
 
				+                        nlrmode = 15;
			
 
				+                    }
			
 
				+
			
 
				             }
			
 
				         }
			
 
				 
			
--- a/src/tool/IVSysMan/IVSysManWithoutWin.pro
+++ b/src/tool/IVSysMan/IVSysManWithoutWin.pro
@@ -52,6 +52,7 @@ contains(QMAKE_HOST.os, Windows){
 
				 SOURCES += \
			
 
				     ../../include/msgtype/switch.pb.cc \
			
 
				         mainwithoutwin.cpp \
			
 
				+    procmemstat.cpp \
			
 
				     sysman.cpp \
			
 
				     progmon.cpp \
			
 
				     cpumem.cpp \