3 months ago · e148e62fca
--- a/src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp
+++ b/src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp
@@ -64,6 +64,7 @@ PointCloudDensification::PointCloudDensification(const DensificationParam & para
 
															 bool PointCloudDensification::enqueuePointCloud(
														
 
															    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
														
 
															 {
														
 
															+    affine_world2current_ =Eigen::Affine3f::Identity();
														
 
															     enqueue(pc_ptr);
														
 
															     dequeue();
														
--- a/src/detection/detection_lidar_centerpoint/main.cpp
+++ b/src/detection/detection_lidar_centerpoint/main.cpp
@@ -33,9 +33,10 @@ void init()
 
															     const float circle_nms_dist_threshold =0.5;
														
 
															   //    static_cast<float>(this->declare_parameter<double>("circle_nms_dist_threshold"));
														
 
															     std::vector<double> yaw_norm_thresholds ;
														
 
															-    yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.0);
														
 
															+    yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);
														
 
															+    yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.0);
														
 
															     const std::string densification_world_frame_id = "map";
														
 
															-    const int densification_num_past_frames = 1;
														
 
															+    const int densification_num_past_frames = 0;//1;
														
 
															     const std::string trt_precision = "fp16";
														
 
															     const std::string encoder_onnx_path = "/home/nvidia/models/pts_voxel_encoder_centerpoint.onnx";//this->declare_parameter<std::string>("encoder_onnx_path");
														
 
															     const std::string encoder_engine_path ="/home/nvidia/models/pts_voxel_encoder_centerpoint.eng";//this->declare_parameter<std::string>("encoder_engine_path");
														
@@ -72,7 +73,7 @@ void init()
 
															       densification_world_frame_id, densification_num_past_frames);
														
 
															     CenterPointConfig config(
														
 
															-      3, point_feature_size, max_voxel_size, point_cloud_range, voxel_size,
														
 
															+      5, point_feature_size, max_voxel_size, point_cloud_range, voxel_size,
														
 
															       downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold,
														
 
															       yaw_norm_thresholds);
														
 
															     detector_ptr_ =
														
@@ -189,7 +190,9 @@ void ListenPointCloud(const char * strdata,const unsigned int nSize,const unsign
 
															         iv::lidar::lidarobject obj;
														
 
															         box3DToDetectedObject(det_boxes3d[i],class_names_,false,obj);
														
 
															         iv::lidar::lidarobject * pobj = lidarobjvec.add_obj();
														
 
															+
														
 
															         pobj->CopyFrom(obj);
														
 
															+        std::cout<<" obj i: "<<i<<" class: "<<pobj->type_name()<<" x: "<<pobj->position().x()<<" y: "<<pobj->position().y()<<std::endl;
														
 
															     }
														
 
															     std::string out = lidarobjvec.SerializeAsString();
														
--- a/src/detection/detection_lidar_transfusion/detection_lidar_transfusion.pro
+++ b/src/detection/detection_lidar_transfusion/detection_lidar_transfusion.pro
@@ -11,8 +11,11 @@ SOURCES += \
 
															         main.cpp \
														
 
															         network/network_trt.cpp \
														
 
															         postprocess/non_maximum_suppression.cpp \
														
 
															+        preprocess/pointcloud_densification.cpp \
														
 
															+        preprocess/voxel_generator.cpp \
														
 
															         simple_profiler.cpp \
														
 
															-        tensorrt_common.cpp
														
 
															+        tensorrt_common.cpp \
														
 
															+        transfusion_trt.cpp
														
 
															 # Default rules for deployment.
														
 
															 qnx: target.path = /tmp/$${TARGET}/bin
														
@@ -21,7 +24,8 @@ else: unix:!android: target.path = /opt/$${TARGET}/bin
 
															 DISTFILES += \
														
 
															     postprocess/circle_nms_kernel.cu \
														
 
															-    postprocess/postprocess_kernel.cu
														
 
															+    postprocess/postprocess_kernel.cu \
														
 
															+    preprocess/preprocess_kernel.cu
														
 
															 HEADERS += \
														
 
															     cuda_utils.hpp \
														
@@ -35,8 +39,13 @@ HEADERS += \
 
															     include/tensorrt_common/logger.hpp \
														
 
															     include/tensorrt_common/simple_profiler.hpp \
														
 
															     include/tensorrt_common/tensorrt_common.hpp \
														
 
															-    utils.hpp
														
 
															+    transfusion_trt.hpp \
														
 
															+    utils.hpp \
														
 
															+    visibility_control.hpp
														
 
															+INCLUDEPATH += /usr/include/pcl-1.7
														
 
															+INCLUDEPATH += /usr/include/pcl-1.8
														
 
															+INCLUDEPATH += /usr/include/pcl-1.12
														
 
															 INCLUDEPATH += /usr/include/eigen3
														
 
															 INCLUDEPATH += $$PWD/include
														
@@ -50,7 +59,8 @@ LIBS += -L"/usr/local/lib" \
 
															 CUDA_SOURCES += \
														
 
															     postprocess/circle_nms_kernel.cu \
														
 
															-    postprocess/postprocess_kernel.cu
														
 
															+    postprocess/postprocess_kernel.cu \
														
 
															+    preprocess/preprocess_kernel.cu
														
 
															 CUDA_SDK = "/usr/local/cuda/"   # cudaSDK路径
														
--- a/src/detection/detection_lidar_transfusion/include/preprocess/pointcloud_densification.hpp
+++ b/src/detection/detection_lidar_transfusion/include/preprocess/pointcloud_densification.hpp
@@ -15,20 +15,25 @@
 
															 #ifndef AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
														
 
															 #define AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
														
 
															-#include "autoware/lidar_transfusion/cuda_utils.hpp"
														
 
															+#include "cuda_utils.hpp"
														
 
															-#include <tf2_ros/buffer.h>
														
 
															-#include <tf2_ros/transform_listener.h>
														
 
															+#include <Eigen/Eigen>
														
 
															+
														
 
															+//#include <tf2_ros/buffer.h>
														
 
															+//#include <tf2_ros/transform_listener.h>
														
 
															 #ifdef ROS_DISTRO_GALACTIC
														
 
															 #include <tf2_sensor_msgs/tf2_sensor_msgs.h>
														
 
															 #else
														
 
															-#include <tf2_sensor_msgs/tf2_sensor_msgs.hpp>
														
 
															+//#include <tf2_sensor_msgs/tf2_sensor_msgs.hpp>
														
 
															 #endif
														
 
															 #include <list>
														
 
															 #include <string>
														
 
															 #include <utility>
														
 
															+#include <pcl/point_cloud.h>
														
 
															+#include <pcl/point_types.h>
														
 
															+
														
 
															 namespace autoware::lidar_transfusion
														
 
															 {
														
@@ -52,7 +57,7 @@ private:
 
															 struct PointCloudWithTransform
														
 
															 {
														
 
															   cuda::unique_ptr<uint8_t[]> data_d{nullptr};
														
 
															-  std_msgs::msg::Header header;
														
 
															+//  std_msgs::msg::Header header;
														
 
															   size_t num_points{0};
														
 
															   Eigen::Affine3f affine_past2world;
														
 
															 };
														
@@ -63,7 +68,9 @@ public:
 
															   explicit PointCloudDensification(const DensificationParam & param, cudaStream_t & stream);
														
 
															   bool enqueuePointCloud(
														
 
															-    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
														
 
															+       const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
														
 
															+//  bool enqueuePointCloud(
														
 
															+//    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
														
 
															   double getCurrentTimestamp() const { return current_timestamp_; }
														
 
															   Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; }
														
@@ -86,7 +93,8 @@ public:
 
															   unsigned int pointcloud_cache_size() const { return param_.pointcloud_cache_size(); }
														
 
															 private:
														
 
															-  void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine);
														
 
															+//  void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine);
														
 
															+  void enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
														
 
															   void dequeue();
														
 
															   DensificationParam param_;
														
--- a/src/detection/detection_lidar_transfusion/include/preprocess/preprocess_kernel.hpp
+++ b/src/detection/detection_lidar_transfusion/include/preprocess/preprocess_kernel.hpp
@@ -31,9 +31,9 @@
 
															 #ifndef AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_
														
 
															 #define AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_
														
 
															-#include "autoware/lidar_transfusion/cuda_utils.hpp"
														
 
															-#include "autoware/lidar_transfusion/transfusion_config.hpp"
														
 
															-#include "autoware/lidar_transfusion/utils.hpp"
														
 
															+#include "cuda_utils.hpp"
														
 
															+#include "transfusion_config.hpp"
														
 
															+#include "utils.hpp"
														
 
															 #include <cuda_runtime_api.h>
														
--- a/src/detection/detection_lidar_transfusion/include/preprocess/voxel_generator.hpp
+++ b/src/detection/detection_lidar_transfusion/include/preprocess/voxel_generator.hpp
@@ -15,29 +15,52 @@
 
															 #ifndef AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_
														
 
															 #define AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_
														
 
															-#include "autoware/lidar_transfusion/cuda_utils.hpp"
														
 
															-#include "autoware/lidar_transfusion/preprocess/pointcloud_densification.hpp"
														
 
															-#include "autoware/lidar_transfusion/preprocess/preprocess_kernel.hpp"
														
 
															-#include "autoware/lidar_transfusion/ros_utils.hpp"
														
 
															-#include "autoware/lidar_transfusion/transfusion_config.hpp"
														
 
															+#include "cuda_utils.hpp"
														
 
															+#include "preprocess/pointcloud_densification.hpp"
														
 
															+#include "preprocess/preprocess_kernel.hpp"
														
 
															+//#include "autoware/lidar_transfusion/ros_utils.hpp"
														
 
															+#include "transfusion_config.hpp"
														
 
															 #ifdef ROS_DISTRO_GALACTIC
														
 
															 #include <tf2_eigen/tf2_eigen.h>
														
 
															 #else
														
 
															-#include <tf2_eigen/tf2_eigen.hpp>
														
 
															+//#include <tf2_eigen/tf2_eigen.hpp>
														
 
															 #endif
														
 
															-#include <autoware_point_types/types.hpp>
														
 
															+//#include <autoware_point_types/types.hpp>
														
 
															-#include <sensor_msgs/msg/point_cloud2.hpp>
														
 
															+//#include <sensor_msgs/msg/point_cloud2.hpp>
														
 
															+
														
 
															+#include <pcl/point_cloud.h>
														
 
															+#include <pcl/point_types.h>
														
 
															 #include <memory>
														
 
															 #include <string>
														
 
															 #include <tuple>
														
 
															 #include <vector>
														
 
															+
														
 
															+namespace autoware_point_types
														
 
															+{
														
 
															+struct PointXYZIRCAEDT
														
 
															+{
														
 
															+  float x{0.0F};
														
 
															+  float y{0.0F};
														
 
															+  float z{0.0F};
														
 
															+  std::uint8_t intensity{0U};
														
 
															+  std::uint8_t return_type{0U};
														
 
															+  std::uint16_t channel{0U};
														
 
															+  float azimuth{0.0F};
														
 
															+  float elevation{0.0F};
														
 
															+  float distance{0.0F};
														
 
															+  std::uint32_t time_stamp{0U};
														
 
															+};
														
 
															+}
														
 
															 namespace autoware::lidar_transfusion
														
 
															 {
														
 
															+
														
 
															+
														
 
															+
														
 
															 constexpr size_t AFF_MAT_SIZE = 16;  // 4x4 matrix
														
 
															 constexpr size_t MAX_CLOUD_STEP_SIZE = sizeof(autoware_point_types::PointXYZIRCAEDT);
														
@@ -47,13 +70,21 @@ public:
 
															   explicit VoxelGenerator(
														
 
															     const DensificationParam & densification_param, const TransfusionConfig & config,
														
 
															     cudaStream_t & stream);
														
 
															+//  std::size_t generateSweepPoints(
														
 
															+//    const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d);
														
 
															+//  bool enqueuePointCloud(
														
 
															+//    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
														
 
															+
														
 
															+//  void initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg);
														
 
															+//  std::tuple<const uint32_t, const uint8_t, const uint8_t> getFieldInfo(
														
 
															+//    const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name);
														
 
															+
														
 
															+
														
 
															   std::size_t generateSweepPoints(
														
 
															-    const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d);
														
 
															+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr, cuda::unique_ptr<float[]> & points_d);
														
 
															   bool enqueuePointCloud(
														
 
															-    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
														
 
															-  void initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg);
														
 
															-  std::tuple<const uint32_t, const uint8_t, const uint8_t> getFieldInfo(
														
 
															-    const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name);
														
 
															+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
														
 
															+
														
 
															 private:
														
 
															   std::unique_ptr<PointCloudDensification> pd_ptr_{nullptr};
														
@@ -63,7 +94,7 @@ private:
 
															   cuda::unique_ptr<float[]> affine_past2current_d_{nullptr};
														
 
															   std::vector<float> points_;
														
 
															   cudaStream_t stream_;
														
 
															-  CloudInfo cloud_info_;
														
 
															+//  CloudInfo cloud_info_;
														
 
															   bool is_initialized_{false};
														
 
															 };
														
--- a/src/detection/detection_lidar_transfusion/preprocess/pointcloud_densification.cpp
+++ b/src/detection/detection_lidar_transfusion/preprocess/pointcloud_densification.cpp
@@ -12,15 +12,15 @@
 
															 // See the License for the specific language governing permissions and
														
 
															 // limitations under the License.
														
 
															-#include "autoware/lidar_transfusion/preprocess/pointcloud_densification.hpp"
														
 
															+#include "preprocess/pointcloud_densification.hpp"
														
 
															-#include <pcl_ros/transforms.hpp>
														
 
															+//#include <pcl_ros/transforms.hpp>
														
 
															-#include <pcl_conversions/pcl_conversions.h>
														
 
															+//#include <pcl_conversions/pcl_conversions.h>
														
 
															 #ifdef ROS_DISTRO_GALACTIC
														
 
															 #include <tf2_eigen/tf2_eigen.h>
														
 
															 #else
														
 
															-#include <tf2_eigen/tf2_eigen.hpp>
														
 
															+//#include <tf2_eigen/tf2_eigen.hpp>
														
 
															 #endif
														
 
															 #include <boost/optional.hpp>
														
@@ -31,27 +31,27 @@
 
															 namespace
														
 
															 {
														
 
															-boost::optional<geometry_msgs::msg::Transform> getTransform(
														
 
															-  const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id,
														
 
															-  const std::string & source_frame_id, const rclcpp::Time & time)
														
 
															-{
														
 
															-  try {
														
 
															-    geometry_msgs::msg::TransformStamped transform_stamped;
														
 
															-    transform_stamped = tf_buffer.lookupTransform(
														
 
															-      target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5));
														
 
															-    return transform_stamped.transform;
														
 
															-  } catch (tf2::TransformException & ex) {
														
 
															-    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_transfusion"), ex.what());
														
 
															-    return boost::none;
														
 
															-  }
														
 
															-}
														
 
															-
														
 
															-Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t)
														
 
															-{
														
 
															-  Eigen::Affine3f a;
														
 
															-  a.matrix() = tf2::transformToEigen(t).matrix().cast<float>();
														
 
															-  return a;
														
 
															-}
														
 
															+//boost::optional<geometry_msgs::msg::Transform> getTransform(
														
 
															+//  const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id,
														
 
															+//  const std::string & source_frame_id, const rclcpp::Time & time)
														
 
															+//{
														
 
															+//  try {
														
 
															+//    geometry_msgs::msg::TransformStamped transform_stamped;
														
 
															+//    transform_stamped = tf_buffer.lookupTransform(
														
 
															+//      target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5));
														
 
															+//    return transform_stamped.transform;
														
 
															+//  } catch (tf2::TransformException & ex) {
														
 
															+//    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_transfusion"), ex.what());
														
 
															+//    return boost::none;
														
 
															+//  }
														
 
															+//}
														
 
															+
														
 
															+//Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t)
														
 
															+//{
														
 
															+//  Eigen::Affine3f a;
														
 
															+//  a.matrix() = tf2::transformToEigen(t).matrix().cast<float>();
														
 
															+//  return a;
														
 
															+//}
														
 
															 }  // namespace
														
@@ -62,50 +62,76 @@ PointCloudDensification::PointCloudDensification(
 
															   const DensificationParam & param, cudaStream_t & stream)
														
 
															 : param_(param), stream_(stream)
														
 
															 {
														
 
															+    affine_world2current_ = Eigen::Affine3f::Identity();
														
 
															 }
														
 
															+
														
 
															 bool PointCloudDensification::enqueuePointCloud(
														
 
															-  const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
														
 
															+     const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
														
 
															 {
														
 
															-  const auto header = pointcloud_msg.header;
														
 
															-
														
 
															-  if (param_.pointcloud_cache_size() > 1) {
														
 
															-    auto transform_world2current =
														
 
															-      getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp);
														
 
															-    if (!transform_world2current) {
														
 
															-      return false;
														
 
															-    }
														
 
															-    auto affine_world2current = transformToEigen(transform_world2current.get());
														
 
															-
														
 
															-    enqueue(pointcloud_msg, affine_world2current);
														
 
															-  } else {
														
 
															-    enqueue(pointcloud_msg, Eigen::Affine3f::Identity());
														
 
															-  }
														
 
															+    enqueue(pc_ptr);
														
 
															+    dequeue();
														
 
															+    return true;
														
 
															+}
														
 
															-  dequeue();
														
 
															+//bool PointCloudDensification::enqueuePointCloud(
														
 
															+//  const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
														
 
															+//{
														
 
															+//  const auto header = pointcloud_msg.header;
														
 
															-  return true;
														
 
															-}
														
 
															+//  if (param_.pointcloud_cache_size() > 1) {
														
 
															+//    auto transform_world2current =
														
 
															+//      getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp);
														
 
															+//    if (!transform_world2current) {
														
 
															+//      return false;
														
 
															+//    }
														
 
															+//    auto affine_world2current = transformToEigen(transform_world2current.get());
														
 
															-void PointCloudDensification::enqueue(
														
 
															-  const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current)
														
 
															-{
														
 
															-  affine_world2current_ = affine_world2current;
														
 
															-  current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
														
 
															+//    enqueue(pointcloud_msg, affine_world2current);
														
 
															+//  } else {
														
 
															+//    enqueue(pointcloud_msg, Eigen::Affine3f::Identity());
														
 
															+//  }
														
 
															+
														
 
															+//  dequeue();
														
 
															+
														
 
															+//  return true;
														
 
															+//}
														
 
															-  auto data_d = cuda::make_unique<uint8_t[]>(
														
 
															-    sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(uint8_t));
														
 
															-  CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															-    data_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
														
 
															-    cudaMemcpyHostToDevice, stream_));
														
 
															+void PointCloudDensification::enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
														
 
															+{
														
 
															+    auto data_d = cuda::make_unique<uint8_t[]>(
														
 
															+      sizeof(uint8_t) * pc_ptr->width * pc_ptr->height * sizeof(pcl::PointXYZI) / sizeof(uint8_t));
														
 
															+
														
 
															+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															+      data_d.get(), pc_ptr->data(), sizeof(uint8_t) * pc_ptr->width * pc_ptr->height * sizeof(pcl::PointXYZI),
														
 
															+      cudaMemcpyHostToDevice, stream_));
														
 
															-  PointCloudWithTransform pointcloud = {
														
 
															-    std::move(data_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
														
 
															+    PointCloudWithTransform pointcloud = {
														
 
															+      std::move(data_d), pc_ptr->width * pc_ptr->height, Eigen::Affine3f::Identity()};
														
 
															-  pointcloud_cache_.push_front(std::move(pointcloud));
														
 
															+    pointcloud_cache_.push_front(std::move(pointcloud));
														
 
															 }
														
 
															+//void PointCloudDensification::enqueue(
														
 
															+//  const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current)
														
 
															+//{
														
 
															+//  affine_world2current_ = affine_world2current;
														
 
															+//  current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
														
 
															+
														
 
															+//  auto data_d = cuda::make_unique<uint8_t[]>(
														
 
															+//    sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(uint8_t));
														
 
															+
														
 
															+//  CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															+//    data_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
														
 
															+//    cudaMemcpyHostToDevice, stream_));
														
 
															+
														
 
															+//  PointCloudWithTransform pointcloud = {
														
 
															+//    std::move(data_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
														
 
															+
														
 
															+//  pointcloud_cache_.push_front(std::move(pointcloud));
														
 
															+//}
														
 
															+
														
 
															 void PointCloudDensification::dequeue()
														
 
															 {
														
 
															   if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) {
														
--- a/src/detection/detection_lidar_transfusion/preprocess/preprocess_kernel.cu
+++ b/src/detection/detection_lidar_transfusion/preprocess/preprocess_kernel.cu
@@ -28,8 +28,8 @@
 
															  * limitations under the License.
														
 
															  */
														
 
															-#include "autoware/lidar_transfusion/cuda_utils.hpp"
														
 
															-#include "autoware/lidar_transfusion/preprocess/preprocess_kernel.hpp"
														
 
															+#include "cuda_utils.hpp"
														
 
															+#include "preprocess/preprocess_kernel.hpp"
														
 
															 #include <cstdint>
														
--- a/src/detection/detection_lidar_transfusion/preprocess/voxel_generator.cpp
+++ b/src/detection/detection_lidar_transfusion/preprocess/voxel_generator.cpp
@@ -12,11 +12,11 @@
 
															 // See the License for the specific language governing permissions and
														
 
															 // limitations under the License.
														
 
															-#include "autoware/lidar_transfusion/preprocess/voxel_generator.hpp"
														
 
															+#include "preprocess/voxel_generator.hpp"
														
 
															-#include "autoware/lidar_transfusion/preprocess/preprocess_kernel.hpp"
														
 
															+#include "preprocess/preprocess_kernel.hpp"
														
 
															-#include <sensor_msgs/point_cloud2_iterator.hpp>
														
 
															+//#include <sensor_msgs/point_cloud2_iterator.hpp>
														
 
															 #include <type_traits>
														
@@ -34,90 +34,145 @@ VoxelGenerator::VoxelGenerator(
 
															   affine_past2current_d_ = cuda::make_unique<float[]>(AFF_MAT_SIZE);
														
 
															 }
														
 
															+//bool VoxelGenerator::enqueuePointCloud(
														
 
															+//  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer)
														
 
															+//{
														
 
															+//  return pd_ptr_->enqueuePointCloud(msg, tf_buffer);
														
 
															+//}
														
 
															+
														
 
															 bool VoxelGenerator::enqueuePointCloud(
														
 
															-  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer)
														
 
															+  const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
														
 
															 {
														
 
															-  return pd_ptr_->enqueuePointCloud(msg, tf_buffer);
														
 
															+    return pd_ptr_->enqueuePointCloud(pc_ptr);
														
 
															 }
														
 
															 std::size_t VoxelGenerator::generateSweepPoints(
														
 
															-  const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d)
														
 
															+  const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr, cuda::unique_ptr<float[]> & points_d)
														
 
															 {
														
 
															-  if (!is_initialized_) {
														
 
															-    initCloudInfo(msg);
														
 
															-    std::stringstream ss;
														
 
															-    ss << "Input point cloud information: " << std::endl << cloud_info_;
														
 
															-    RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
														
 
															-
														
 
															-    CloudInfo default_cloud_info;
														
 
															-    if (cloud_info_ != default_cloud_info) {
														
 
															-      ss << "Expected point cloud information: " << std::endl << default_cloud_info;
														
 
															-      RCLCPP_ERROR_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
														
 
															-      throw std::runtime_error("Input point cloud has unsupported format");
														
 
															-    }
														
 
															-    is_initialized_ = true;
														
 
															-  }
														
 
															-
														
 
															-  size_t point_counter{0};
														
 
															-  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															-
														
 
															-  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
														
 
															-       pc_cache_iter++) {
														
 
															-    auto sweep_num_points = pc_cache_iter->num_points;
														
 
															-    if (point_counter + sweep_num_points >= config_.cloud_capacity_) {
														
 
															-      RCLCPP_WARN_STREAM(
														
 
															-        rclcpp::get_logger("lidar_transfusion"), "Exceeding cloud capacity. Used "
														
 
															-                                                   << pd_ptr_->getIdx(pc_cache_iter) << " out of "
														
 
															-                                                   << pd_ptr_->getCacheSize() << " sweep(s)");
														
 
															-      break;
														
 
															+    if (!is_initialized_) {
														
 
															+      is_initialized_ = true;
														
 
															     }
														
 
															-    auto shift = point_counter * config_.num_point_feature_size_;
														
 
															-
														
 
															-    auto affine_past2current =
														
 
															-      pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world;
														
 
															-    static_assert(std::is_same<decltype(affine_past2current.matrix()), Eigen::Matrix4f &>::value);
														
 
															-    static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major.");
														
 
															-    float time_lag = static_cast<float>(
														
 
															-      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds());
														
 
															-
														
 
															-    CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															-      affine_past2current_d_.get(), affine_past2current.data(), AFF_MAT_SIZE * sizeof(float),
														
 
															-      cudaMemcpyHostToDevice, stream_));
														
 
															+    size_t point_counter{0};
														
 
															     CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															-    pre_ptr_->generateSweepPoints_launch(
														
 
															-      pc_cache_iter->data_d.get(), sweep_num_points, cloud_info_.point_step, time_lag,
														
 
															-      affine_past2current_d_.get(), points_d.get() + shift);
														
 
															-    point_counter += sweep_num_points;
														
 
															-  }
														
 
															+    for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
														
 
															+         pc_cache_iter++) {
														
 
															+      auto sweep_num_points = pc_cache_iter->num_points;
														
 
															+      if (point_counter + sweep_num_points >= config_.cloud_capacity_) {
														
 
															+          std::cout<<"ldiar_transfusion "<<" Exceeding cloud capacity. Used"
														
 
															+                                                                << pd_ptr_->getIdx(pc_cache_iter) << " out of "
														
 
															+                                                                << pd_ptr_->getCacheSize() << " sweep(s)";
														
 
															+        break;
														
 
															+      }
														
 
															+      auto shift = point_counter * config_.num_point_feature_size_;
														
 
															+
														
 
															+      auto affine_past2current =
														
 
															+        pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world;
														
 
															+      static_assert(std::is_same<decltype(affine_past2current.matrix()), Eigen::Matrix4f &>::value);
														
 
															+      static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major.");
														
 
															+
														
 
															+      float time_lag = 0;
														
 
															+//      float time_lag = static_cast<float>(
														
 
															+//        pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds());
														
 
															+
														
 
															+      CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															+        affine_past2current_d_.get(), affine_past2current.data(), AFF_MAT_SIZE * sizeof(float),
														
 
															+        cudaMemcpyHostToDevice, stream_));
														
 
															+      CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+      pre_ptr_->generateSweepPoints_launch(
														
 
															+        pc_cache_iter->data_d.get(), sweep_num_points, sizeof(pcl::PointXYZI), time_lag,
														
 
															+        affine_past2current_d_.get(), points_d.get() + shift);
														
 
															+//      pre_ptr_->generateSweepPoints_launch(
														
 
															+//        pc_cache_iter->data_d.get(), sweep_num_points, cloud_info_.point_step, time_lag,
														
 
															+//        affine_past2current_d_.get(), points_d.get() + shift);
														
 
															+      point_counter += sweep_num_points;
														
 
															+    }
														
 
															-  return point_counter;
														
 
															+    return point_counter;
														
 
															 }
														
 
															-void VoxelGenerator::initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg)
														
 
															-{
														
 
															-  std::tie(cloud_info_.x_offset, cloud_info_.x_datatype, cloud_info_.x_count) =
														
 
															-    getFieldInfo(msg, "x");
														
 
															-  std::tie(cloud_info_.y_offset, cloud_info_.y_datatype, cloud_info_.y_count) =
														
 
															-    getFieldInfo(msg, "y");
														
 
															-  std::tie(cloud_info_.z_offset, cloud_info_.z_datatype, cloud_info_.z_count) =
														
 
															-    getFieldInfo(msg, "z");
														
 
															-  std::tie(
														
 
															-    cloud_info_.intensity_offset, cloud_info_.intensity_datatype, cloud_info_.intensity_count) =
														
 
															-    getFieldInfo(msg, "intensity");
														
 
															-  cloud_info_.point_step = msg.point_step;
														
 
															-  cloud_info_.is_bigendian = msg.is_bigendian;
														
 
															-}
														
 
															+//std::size_t VoxelGenerator::generateSweepPoints(
														
 
															+//  const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d)
														
 
															+//{
														
 
															+//  if (!is_initialized_) {
														
 
															+//    initCloudInfo(msg);
														
 
															+//    std::stringstream ss;
														
 
															+//    ss << "Input point cloud information: " << std::endl << cloud_info_;
														
 
															+//    RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
														
 
															+
														
 
															+//    CloudInfo default_cloud_info;
														
 
															+//    if (cloud_info_ != default_cloud_info) {
														
 
															+//      ss << "Expected point cloud information: " << std::endl << default_cloud_info;
														
 
															+//      RCLCPP_ERROR_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
														
 
															+//      throw std::runtime_error("Input point cloud has unsupported format");
														
 
															+//    }
														
 
															+//    is_initialized_ = true;
														
 
															+//  }
														
 
															+
														
 
															+//  size_t point_counter{0};
														
 
															+//  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+//  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
														
 
															+//       pc_cache_iter++) {
														
 
															+//    auto sweep_num_points = pc_cache_iter->num_points;
														
 
															+//    if (point_counter + sweep_num_points >= config_.cloud_capacity_) {
														
 
															+//      RCLCPP_WARN_STREAM(
														
 
															+//        rclcpp::get_logger("lidar_transfusion"), "Exceeding cloud capacity. Used "
														
 
															+//                                                   << pd_ptr_->getIdx(pc_cache_iter) << " out of "
														
 
															+//                                                   << pd_ptr_->getCacheSize() << " sweep(s)");
														
 
															+//      break;
														
 
															+//    }
														
 
															+//    auto shift = point_counter * config_.num_point_feature_size_;
														
 
															+
														
 
															+//    auto affine_past2current =
														
 
															+//      pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world;
														
 
															+//    static_assert(std::is_same<decltype(affine_past2current.matrix()), Eigen::Matrix4f &>::value);
														
 
															+//    static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major.");
														
 
															+
														
 
															+//    float time_lag = static_cast<float>(
														
 
															+//      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds());
														
 
															+
														
 
															+//    CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															+//      affine_past2current_d_.get(), affine_past2current.data(), AFF_MAT_SIZE * sizeof(float),
														
 
															+//      cudaMemcpyHostToDevice, stream_));
														
 
															+//    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+//    pre_ptr_->generateSweepPoints_launch(
														
 
															+//      pc_cache_iter->data_d.get(), sweep_num_points, cloud_info_.point_step, time_lag,
														
 
															+//      affine_past2current_d_.get(), points_d.get() + shift);
														
 
															+//    point_counter += sweep_num_points;
														
 
															+//  }
														
 
															+
														
 
															+//  return point_counter;
														
 
															+//}
														
 
															+
														
 
															+//void VoxelGenerator::initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg)
														
 
															+//{
														
 
															+//  std::tie(cloud_info_.x_offset, cloud_info_.x_datatype, cloud_info_.x_count) =
														
 
															+//    getFieldInfo(msg, "x");
														
 
															+//  std::tie(cloud_info_.y_offset, cloud_info_.y_datatype, cloud_info_.y_count) =
														
 
															+//    getFieldInfo(msg, "y");
														
 
															+//  std::tie(cloud_info_.z_offset, cloud_info_.z_datatype, cloud_info_.z_count) =
														
 
															+//    getFieldInfo(msg, "z");
														
 
															+//  std::tie(
														
 
															+//    cloud_info_.intensity_offset, cloud_info_.intensity_datatype, cloud_info_.intensity_count) =
														
 
															+//    getFieldInfo(msg, "intensity");
														
 
															+//  cloud_info_.point_step = msg.point_step;
														
 
															+//  cloud_info_.is_bigendian = msg.is_bigendian;
														
 
															+//}
														
 
															+
														
 
															+//std::tuple<const uint32_t, const uint8_t, const uint8_t> VoxelGenerator::getFieldInfo(
														
 
															+//  const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name)
														
 
															+//{
														
 
															+//  for (const auto & field : msg.fields) {
														
 
															+//    if (field.name == field_name) {
														
 
															+//      return std::make_tuple(field.offset, field.datatype, field.count);
														
 
															+//    }
														
 
															+//  }
														
 
															+//  throw std::runtime_error("Missing field: " + field_name);
														
 
															+//}
														
 
															+
														
 
															-std::tuple<const uint32_t, const uint8_t, const uint8_t> VoxelGenerator::getFieldInfo(
														
 
															-  const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name)
														
 
															-{
														
 
															-  for (const auto & field : msg.fields) {
														
 
															-    if (field.name == field_name) {
														
 
															-      return std::make_tuple(field.offset, field.datatype, field.count);
														
 
															-    }
														
 
															-  }
														
 
															-  throw std::runtime_error("Missing field: " + field_name);
														
 
															-}
														
 
															 }  // namespace autoware::lidar_transfusion
														
--- a/src/detection/detection_lidar_transfusion/transfusion_trt.cpp
+++ b/src/detection/detection_lidar_transfusion/transfusion_trt.cpp
@@ -0,0 +1,310 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#include "transfusion_trt.hpp"
														
 
															+
														
 
															+#include "preprocess/preprocess_kernel.hpp"
														
 
															+#include "transfusion_config.hpp"
														
 
															+
														
 
															+//#include <autoware/universe_utils/math/constants.hpp>
														
 
															+
														
 
															+#include <iostream>
														
 
															+#include <memory>
														
 
															+#include <string>
														
 
															+#include <vector>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+TransfusionTRT::TransfusionTRT(
														
 
															+  const NetworkParam & network_param, const DensificationParam & densification_param,
														
 
															+  const TransfusionConfig & config)
														
 
															+: config_(config)
														
 
															+{
														
 
															+  network_trt_ptr_ = std::make_unique<NetworkTRT>(config_);
														
 
															+
														
 
															+  network_trt_ptr_->init(
														
 
															+    network_param.onnx_path(), network_param.engine_path(), network_param.trt_precision());
														
 
															+  vg_ptr_ = std::make_unique<VoxelGenerator>(densification_param, config_, stream_);
														
 
															+//  stop_watch_ptr_ =
														
 
															+//    std::make_unique<autoware::universe_utils::StopWatch<std::chrono::milliseconds>>();
														
 
															+//  stop_watch_ptr_->tic("processing/inner");
														
 
															+  initPtr();
														
 
															+
														
 
															+  CHECK_CUDA_ERROR(cudaStreamCreate(&stream_));
														
 
															+}
														
 
															+
														
 
															+TransfusionTRT::~TransfusionTRT()
														
 
															+{
														
 
															+  if (stream_) {
														
 
															+    cudaStreamSynchronize(stream_);
														
 
															+    cudaStreamDestroy(stream_);
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+void TransfusionTRT::initPtr()
														
 
															+{
														
 
															+  // point cloud to voxels
														
 
															+  voxel_features_size_ =
														
 
															+    config_.max_voxels_ * config_.max_num_points_per_pillar_ * config_.num_point_feature_size_;
														
 
															+  voxel_num_size_ = config_.max_voxels_;
														
 
															+  voxel_idxs_size_ = config_.max_voxels_ * config_.num_point_values_;
														
 
															+
														
 
															+  // output of TRT -- input of post-process
														
 
															+  cls_size_ = config_.num_proposals_ * config_.num_classes_;
														
 
															+  box_size_ = config_.num_proposals_ * config_.num_box_values_;
														
 
															+  dir_cls_size_ = config_.num_proposals_ * 2;  // x, y
														
 
															+  cls_output_d_ = cuda::make_unique<float[]>(cls_size_);
														
 
															+  box_output_d_ = cuda::make_unique<float[]>(box_size_);
														
 
															+  dir_cls_output_d_ = cuda::make_unique<float[]>(dir_cls_size_);
														
 
															+
														
 
															+  params_input_d_ = cuda::make_unique<unsigned int>();
														
 
															+  voxel_features_d_ = cuda::make_unique<float[]>(voxel_features_size_);
														
 
															+  voxel_num_d_ = cuda::make_unique<unsigned int[]>(voxel_num_size_);
														
 
															+  voxel_idxs_d_ = cuda::make_unique<unsigned int[]>(voxel_idxs_size_);
														
 
															+  points_d_ = cuda::make_unique<float[]>(config_.cloud_capacity_ * config_.num_point_feature_size_);
														
 
															+  pre_ptr_ = std::make_unique<PreprocessCuda>(config_, stream_);
														
 
															+  post_ptr_ = std::make_unique<PostprocessCuda>(config_, stream_);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+bool TransfusionTRT::detect(
														
 
															+  const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr,
														
 
															+  std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing)
														
 
															+{
														
 
															+    if (!preprocess(pc_ptr)) {
														
 
															+        std::cout<<"lidar_transfusion"<<"Fail to preprocess and skip to detect."<<std::endl;
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    if (!inference()) {
														
 
															+        std::cout<<"lidar_transfusion"<<"Fail to inference and skip to detect."<<std::endl;
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+    if (!postprocess(det_boxes3d)) {
														
 
															+        std::cout<<"lidar_transfusion"<<"Fail to postprocess and skip to detect."<<std::endl;
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+    return true;
														
 
															+
														
 
															+}
														
 
															+//bool TransfusionTRT::detect(
														
 
															+//  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer,
														
 
															+//  std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing)
														
 
															+//{
														
 
															+//  stop_watch_ptr_->toc("processing/inner", true);
														
 
															+//  if (!preprocess(msg, tf_buffer)) {
														
 
															+//    RCLCPP_WARN_STREAM(
														
 
															+//      rclcpp::get_logger("lidar_transfusion"), "Fail to preprocess and skip to detect.");
														
 
															+//    return false;
														
 
															+//  }
														
 
															+//  proc_timing.emplace(
														
 
															+//    "debug/processing_time/preprocess_ms", stop_watch_ptr_->toc("processing/inner", true));
														
 
															+
														
 
															+//  if (!inference()) {
														
 
															+//    RCLCPP_WARN_STREAM(
														
 
															+//      rclcpp::get_logger("lidar_transfusion"), "Fail to inference and skip to detect.");
														
 
															+//    return false;
														
 
															+//  }
														
 
															+//  proc_timing.emplace(
														
 
															+//    "debug/processing_time/inference_ms", stop_watch_ptr_->toc("processing/inner", true));
														
 
															+
														
 
															+//  if (!postprocess(det_boxes3d)) {
														
 
															+//    RCLCPP_WARN_STREAM(
														
 
															+//      rclcpp::get_logger("lidar_transfusion"), "Fail to postprocess and skip to detect.");
														
 
															+//    return false;
														
 
															+//  }
														
 
															+//  proc_timing.emplace(
														
 
															+//    "debug/processing_time/postprocess_ms", stop_watch_ptr_->toc("processing/inner", true));
														
 
															+
														
 
															+//  return true;
														
 
															+//}
														
 
															+
														
 
															+
														
 
															+bool TransfusionTRT::preprocess(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
														
 
															+{
														
 
															+    if (!vg_ptr_->enqueuePointCloud(pc_ptr)) {
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    cuda::clear_async(cls_output_d_.get(), cls_size_, stream_);
														
 
															+    cuda::clear_async(box_output_d_.get(), box_size_, stream_);
														
 
															+    cuda::clear_async(dir_cls_output_d_.get(), dir_cls_size_, stream_);
														
 
															+    cuda::clear_async(voxel_features_d_.get(), voxel_features_size_, stream_);
														
 
															+    cuda::clear_async(voxel_num_d_.get(), voxel_num_size_, stream_);
														
 
															+    cuda::clear_async(voxel_idxs_d_.get(), voxel_idxs_size_, stream_);
														
 
															+    cuda::clear_async(params_input_d_.get(), 1, stream_);
														
 
															+    cuda::clear_async(
														
 
															+      points_d_.get(), config_.cloud_capacity_ * config_.num_point_feature_size_, stream_);
														
 
															+    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+    const auto count = vg_ptr_->generateSweepPoints(pc_ptr, points_d_);
														
 
															+
														
 
															+    std::cout<<" lidar_transfusion: "<<"Generated sweep points: " << count<<std::endl;
														
 
															+//    RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), "Generated sweep points: " << count);
														
 
															+
														
 
															+    pre_ptr_->generateVoxels(
														
 
															+      points_d_.get(), count, params_input_d_.get(), voxel_features_d_.get(), voxel_num_d_.get(),
														
 
															+      voxel_idxs_d_.get());
														
 
															+    unsigned int params_input;
														
 
															+    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															+      &params_input, params_input_d_.get(), sizeof(unsigned int), cudaMemcpyDeviceToHost, stream_));
														
 
															+    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+    if (params_input < config_.min_voxel_size_) {
														
 
															+        std::cout<<"lidar_transfusion: "
														
 
															+                  <<"Too few voxels (" << params_input << ") for actual optimization profile ("
														
 
															+                                      << config_.min_voxel_size_ << ")"<<std::endl;
														
 
															+//      RCLCPP_WARN_STREAM(
														
 
															+//        rclcpp::get_logger("lidar_transfusion"),
														
 
															+//        "Too few voxels (" << params_input << ") for actual optimization profile ("
														
 
															+//                           << config_.min_voxel_size_ << ")");
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    if (params_input > config_.max_voxels_) {
														
 
															+      params_input = config_.max_voxels_;
														
 
															+    }
														
 
															+    std::cout<<"lidar_transfusion: "<<"Generated input voxels: " << params_input<<std::endl;
														
 
															+//    RCLCPP_DEBUG_STREAM(
														
 
															+//      rclcpp::get_logger("lidar_transfusion"), "Generated input voxels: " << params_input);
														
 
															+
														
 
															+    network_trt_ptr_->context->setTensorAddress(
														
 
															+      network_trt_ptr_->getTensorName(NetworkIO::voxels), voxel_features_d_.get());
														
 
															+    network_trt_ptr_->context->setInputShape(
														
 
															+      network_trt_ptr_->getTensorName(NetworkIO::voxels),
														
 
															+      nvinfer1::Dims3{
														
 
															+        static_cast<int32_t>(params_input), static_cast<int32_t>(config_.max_num_points_per_pillar_),
														
 
															+        static_cast<int32_t>(config_.num_point_feature_size_)});
														
 
															+    network_trt_ptr_->context->setTensorAddress(
														
 
															+      network_trt_ptr_->getTensorName(NetworkIO::num_points), voxel_num_d_.get());
														
 
															+    network_trt_ptr_->context->setInputShape(
														
 
															+      network_trt_ptr_->getTensorName(NetworkIO::num_points),
														
 
															+      nvinfer1::Dims{1, {static_cast<int32_t>(params_input)}});
														
 
															+    network_trt_ptr_->context->setTensorAddress(
														
 
															+      network_trt_ptr_->getTensorName(NetworkIO::coors), voxel_idxs_d_.get());
														
 
															+    network_trt_ptr_->context->setInputShape(
														
 
															+      network_trt_ptr_->getTensorName(NetworkIO::coors),
														
 
															+      nvinfer1::Dims2{
														
 
															+        static_cast<int32_t>(params_input), static_cast<int32_t>(config_.num_point_values_)});
														
 
															+    network_trt_ptr_->context->setTensorAddress(
														
 
															+      network_trt_ptr_->getTensorName(NetworkIO::cls_score), cls_output_d_.get());
														
 
															+    network_trt_ptr_->context->setTensorAddress(
														
 
															+      network_trt_ptr_->getTensorName(NetworkIO::bbox_pred), box_output_d_.get());
														
 
															+    network_trt_ptr_->context->setTensorAddress(
														
 
															+      network_trt_ptr_->getTensorName(NetworkIO::dir_pred), dir_cls_output_d_.get());
														
 
															+    return true;
														
 
															+
														
 
															+}
														
 
															+
														
 
															+
														
 
															+//bool TransfusionTRT::preprocess(
														
 
															+//  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer)
														
 
															+//{
														
 
															+//  if (!vg_ptr_->enqueuePointCloud(msg, tf_buffer)) {
														
 
															+//    return false;
														
 
															+//  }
														
 
															+
														
 
															+//  cuda::clear_async(cls_output_d_.get(), cls_size_, stream_);
														
 
															+//  cuda::clear_async(box_output_d_.get(), box_size_, stream_);
														
 
															+//  cuda::clear_async(dir_cls_output_d_.get(), dir_cls_size_, stream_);
														
 
															+//  cuda::clear_async(voxel_features_d_.get(), voxel_features_size_, stream_);
														
 
															+//  cuda::clear_async(voxel_num_d_.get(), voxel_num_size_, stream_);
														
 
															+//  cuda::clear_async(voxel_idxs_d_.get(), voxel_idxs_size_, stream_);
														
 
															+//  cuda::clear_async(params_input_d_.get(), 1, stream_);
														
 
															+//  cuda::clear_async(
														
 
															+//    points_d_.get(), config_.cloud_capacity_ * config_.num_point_feature_size_, stream_);
														
 
															+//  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+//  const auto count = vg_ptr_->generateSweepPoints(msg, points_d_);
														
 
															+//  RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), "Generated sweep points: " << count);
														
 
															+
														
 
															+//  pre_ptr_->generateVoxels(
														
 
															+//    points_d_.get(), count, params_input_d_.get(), voxel_features_d_.get(), voxel_num_d_.get(),
														
 
															+//    voxel_idxs_d_.get());
														
 
															+//  unsigned int params_input;
														
 
															+//  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+//  CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															+//    &params_input, params_input_d_.get(), sizeof(unsigned int), cudaMemcpyDeviceToHost, stream_));
														
 
															+//  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+//  if (params_input < config_.min_voxel_size_) {
														
 
															+//    RCLCPP_WARN_STREAM(
														
 
															+//      rclcpp::get_logger("lidar_transfusion"),
														
 
															+//      "Too few voxels (" << params_input << ") for actual optimization profile ("
														
 
															+//                         << config_.min_voxel_size_ << ")");
														
 
															+//    return false;
														
 
															+//  }
														
 
															+
														
 
															+//  if (params_input > config_.max_voxels_) {
														
 
															+//    params_input = config_.max_voxels_;
														
 
															+//  }
														
 
															+//  RCLCPP_DEBUG_STREAM(
														
 
															+//    rclcpp::get_logger("lidar_transfusion"), "Generated input voxels: " << params_input);
														
 
															+
														
 
															+//  network_trt_ptr_->context->setTensorAddress(
														
 
															+//    network_trt_ptr_->getTensorName(NetworkIO::voxels), voxel_features_d_.get());
														
 
															+//  network_trt_ptr_->context->setInputShape(
														
 
															+//    network_trt_ptr_->getTensorName(NetworkIO::voxels),
														
 
															+//    nvinfer1::Dims3{
														
 
															+//      static_cast<int32_t>(params_input), static_cast<int32_t>(config_.max_num_points_per_pillar_),
														
 
															+//      static_cast<int32_t>(config_.num_point_feature_size_)});
														
 
															+//  network_trt_ptr_->context->setTensorAddress(
														
 
															+//    network_trt_ptr_->getTensorName(NetworkIO::num_points), voxel_num_d_.get());
														
 
															+//  network_trt_ptr_->context->setInputShape(
														
 
															+//    network_trt_ptr_->getTensorName(NetworkIO::num_points),
														
 
															+//    nvinfer1::Dims{1, {static_cast<int32_t>(params_input)}});
														
 
															+//  network_trt_ptr_->context->setTensorAddress(
														
 
															+//    network_trt_ptr_->getTensorName(NetworkIO::coors), voxel_idxs_d_.get());
														
 
															+//  network_trt_ptr_->context->setInputShape(
														
 
															+//    network_trt_ptr_->getTensorName(NetworkIO::coors),
														
 
															+//    nvinfer1::Dims2{
														
 
															+//      static_cast<int32_t>(params_input), static_cast<int32_t>(config_.num_point_values_)});
														
 
															+//  network_trt_ptr_->context->setTensorAddress(
														
 
															+//    network_trt_ptr_->getTensorName(NetworkIO::cls_score), cls_output_d_.get());
														
 
															+//  network_trt_ptr_->context->setTensorAddress(
														
 
															+//    network_trt_ptr_->getTensorName(NetworkIO::bbox_pred), box_output_d_.get());
														
 
															+//  network_trt_ptr_->context->setTensorAddress(
														
 
															+//    network_trt_ptr_->getTensorName(NetworkIO::dir_pred), dir_cls_output_d_.get());
														
 
															+//  return true;
														
 
															+//}
														
 
															+
														
 
															+bool TransfusionTRT::inference()
														
 
															+{
														
 
															+  auto status = network_trt_ptr_->context->enqueueV3(stream_);
														
 
															+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+  if (!status) {
														
 
															+      std::cout<<"lidar_transfusion"<<"Fail to enqueue and skip to detect."<<std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+bool TransfusionTRT::postprocess(std::vector<Box3D> & det_boxes3d)
														
 
															+{
														
 
															+  CHECK_CUDA_ERROR(post_ptr_->generateDetectedBoxes3D_launch(
														
 
															+    cls_output_d_.get(), box_output_d_.get(), dir_cls_output_d_.get(), det_boxes3d, stream_));
														
 
															+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+}  //  namespace autoware::lidar_transfusion
														
--- a/src/detection/detection_lidar_transfusion/transfusion_trt.hpp
+++ b/src/detection/detection_lidar_transfusion/transfusion_trt.hpp
@@ -0,0 +1,121 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_
														
 
															+
														
 
															+#include "cuda_utils.hpp"
														
 
															+#include "network/network_trt.hpp"
														
 
															+#include "postprocess/postprocess_kernel.hpp"
														
 
															+#include "preprocess/pointcloud_densification.hpp"
														
 
															+#include "preprocess/preprocess_kernel.hpp"
														
 
															+#include "preprocess/voxel_generator.hpp"
														
 
															+#include "utils.hpp"
														
 
															+#include "visibility_control.hpp"
														
 
															+
														
 
															+//#include <autoware/universe_utils/system/stop_watch.hpp>
														
 
															+
														
 
															+//#include <sensor_msgs/msg/point_cloud2.hpp>
														
 
															+
														
 
															+
														
 
															+#include <pcl/point_cloud.h>
														
 
															+#include <pcl/point_types.h>
														
 
															+
														
 
															+#include <memory>
														
 
															+#include <string>
														
 
															+#include <unordered_map>
														
 
															+#include <utility>
														
 
															+#include <vector>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+class NetworkParam
														
 
															+{
														
 
															+public:
														
 
															+  NetworkParam(std::string onnx_path, std::string engine_path, std::string trt_precision)
														
 
															+  : onnx_path_(std::move(onnx_path)),
														
 
															+    engine_path_(std::move(engine_path)),
														
 
															+    trt_precision_(std::move(trt_precision))
														
 
															+  {
														
 
															+  }
														
 
															+
														
 
															+  std::string onnx_path() const { return onnx_path_; }
														
 
															+  std::string engine_path() const { return engine_path_; }
														
 
															+  std::string trt_precision() const { return trt_precision_; }
														
 
															+
														
 
															+private:
														
 
															+  std::string onnx_path_;
														
 
															+  std::string engine_path_;
														
 
															+  std::string trt_precision_;
														
 
															+};
														
 
															+
														
 
															+class LIDAR_TRANSFUSION_PUBLIC TransfusionTRT
														
 
															+{
														
 
															+public:
														
 
															+  explicit TransfusionTRT(
														
 
															+    const NetworkParam & network_param, const DensificationParam & densification_param,
														
 
															+    const TransfusionConfig & config);
														
 
															+  virtual ~TransfusionTRT();
														
 
															+
														
 
															+//  bool detect(
														
 
															+//    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer,
														
 
															+//    std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing);
														
 
															+
														
 
															+  bool detect(
														
 
															+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr,
														
 
															+    std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing);
														
 
															+
														
 
															+protected:
														
 
															+  void initPtr();
														
 
															+
														
 
															+//  bool preprocess(const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
														
 
															+
														
 
															+  bool preprocess(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
														
 
															+
														
 
															+  bool inference();
														
 
															+
														
 
															+  bool postprocess(std::vector<Box3D> & det_boxes3d);
														
 
															+
														
 
															+  std::unique_ptr<NetworkTRT> network_trt_ptr_{nullptr};
														
 
															+  std::unique_ptr<VoxelGenerator> vg_ptr_{nullptr};
														
 
															+//  std::unique_ptr<autoware::universe_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_{
														
 
															+//    nullptr};
														
 
															+  std::unique_ptr<PreprocessCuda> pre_ptr_{nullptr};
														
 
															+  std::unique_ptr<PostprocessCuda> post_ptr_{nullptr};
														
 
															+  cudaStream_t stream_{nullptr};
														
 
															+
														
 
															+  TransfusionConfig config_;
														
 
															+
														
 
															+  // input of pre-process
														
 
															+
														
 
															+  unsigned int voxel_features_size_{0};
														
 
															+  unsigned int voxel_num_size_{0};
														
 
															+  unsigned int voxel_idxs_size_{0};
														
 
															+  unsigned int cls_size_{0};
														
 
															+  unsigned int box_size_{0};
														
 
															+  unsigned int dir_cls_size_{0};
														
 
															+  cuda::unique_ptr<float[]> points_d_{nullptr};
														
 
															+  cuda::unique_ptr<unsigned int> params_input_d_{nullptr};
														
 
															+  cuda::unique_ptr<float[]> voxel_features_d_{nullptr};
														
 
															+  cuda::unique_ptr<unsigned int[]> voxel_num_d_{nullptr};
														
 
															+  cuda::unique_ptr<unsigned int[]> voxel_idxs_d_{nullptr};
														
 
															+  cuda::unique_ptr<float[]> cls_output_d_{nullptr};
														
 
															+  cuda::unique_ptr<float[]> box_output_d_{nullptr};
														
 
															+  cuda::unique_ptr<float[]> dir_cls_output_d_{nullptr};
														
 
															+};
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_
														
--- a/src/detection/detection_lidar_transfusion/visibility_control.hpp
+++ b/src/detection/detection_lidar_transfusion/visibility_control.hpp
@@ -0,0 +1,37 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_
														
 
															+
														
 
															+////////////////////////////////////////////////////////////////////////////////
														
 
															+#if defined(__WIN32)
														
 
															+#if defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS)
														
 
															+#define LIDAR_TRANSFUSION_PUBLIC __declspec(dllexport)
														
 
															+#define LIDAR_TRANSFUSION_LOCAL
														
 
															+#else  // defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS)
														
 
															+#define LIDAR_TRANSFUSION_PUBLIC __declspec(dllimport)
														
 
															+#define LIDAR_TRANSFUSION_LOCAL
														
 
															+#endif  // defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS)
														
 
															+#elif defined(__linux__)
														
 
															+#define LIDAR_TRANSFUSION_PUBLIC __attribute__((visibility("default")))
														
 
															+#define LIDAR_TRANSFUSION_LOCAL __attribute__((visibility("hidden")))
														
 
															+#elif defined(__APPLE__)
														
 
															+#define LIDAR_TRANSFUSION_PUBLIC __attribute__((visibility("default")))
														
 
															+#define LIDAR_TRANSFUSION_LOCAL __attribute__((visibility("hidden")))
														
 
															+#else
														
 
															+#error "Unsupported Build Configuration"
														
 
															+#endif
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_