Selaa lähdekoodia

change detection_lidar_transfusion. compille ok, but not complete.

yuchuli 1 kuukausi sitten
vanhempi
commit
e148e62fca

+ 1 - 0
src/detection/detection_lidar_centerpoint/lib/preprocess/pointcloud_densification.cpp

@@ -64,6 +64,7 @@ PointCloudDensification::PointCloudDensification(const DensificationParam & para
 bool PointCloudDensification::enqueuePointCloud(
    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
 {
+    affine_world2current_ =Eigen::Affine3f::Identity();
     enqueue(pc_ptr);
     dequeue();
 

+ 6 - 3
src/detection/detection_lidar_centerpoint/main.cpp

@@ -33,9 +33,10 @@ void init()
     const float circle_nms_dist_threshold =0.5;
   //    static_cast<float>(this->declare_parameter<double>("circle_nms_dist_threshold"));
     std::vector<double> yaw_norm_thresholds ;
-    yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.0);
+    yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.3);
+    yaw_norm_thresholds.push_back(0.3);yaw_norm_thresholds.push_back(0.0);
     const std::string densification_world_frame_id = "map";
-    const int densification_num_past_frames = 1;
+    const int densification_num_past_frames = 0;//1;
     const std::string trt_precision = "fp16";
     const std::string encoder_onnx_path = "/home/nvidia/models/pts_voxel_encoder_centerpoint.onnx";//this->declare_parameter<std::string>("encoder_onnx_path");
     const std::string encoder_engine_path ="/home/nvidia/models/pts_voxel_encoder_centerpoint.eng";//this->declare_parameter<std::string>("encoder_engine_path");
@@ -72,7 +73,7 @@ void init()
       densification_world_frame_id, densification_num_past_frames);
 
     CenterPointConfig config(
-      3, point_feature_size, max_voxel_size, point_cloud_range, voxel_size,
+      5, point_feature_size, max_voxel_size, point_cloud_range, voxel_size,
       downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold,
       yaw_norm_thresholds);
     detector_ptr_ =
@@ -189,7 +190,9 @@ void ListenPointCloud(const char * strdata,const unsigned int nSize,const unsign
         iv::lidar::lidarobject obj;
         box3DToDetectedObject(det_boxes3d[i],class_names_,false,obj);
         iv::lidar::lidarobject * pobj = lidarobjvec.add_obj();
+
         pobj->CopyFrom(obj);
+        std::cout<<" obj i: "<<i<<" class: "<<pobj->type_name()<<" x: "<<pobj->position().x()<<" y: "<<pobj->position().y()<<std::endl;
     }
 
     std::string out = lidarobjvec.SerializeAsString();

+ 14 - 4
src/detection/detection_lidar_transfusion/detection_lidar_transfusion.pro

@@ -11,8 +11,11 @@ SOURCES += \
         main.cpp \
         network/network_trt.cpp \
         postprocess/non_maximum_suppression.cpp \
+        preprocess/pointcloud_densification.cpp \
+        preprocess/voxel_generator.cpp \
         simple_profiler.cpp \
-        tensorrt_common.cpp
+        tensorrt_common.cpp \
+        transfusion_trt.cpp
 
 # Default rules for deployment.
 qnx: target.path = /tmp/$${TARGET}/bin
@@ -21,7 +24,8 @@ else: unix:!android: target.path = /opt/$${TARGET}/bin
 
 DISTFILES += \
     postprocess/circle_nms_kernel.cu \
-    postprocess/postprocess_kernel.cu
+    postprocess/postprocess_kernel.cu \
+    preprocess/preprocess_kernel.cu
 
 HEADERS += \
     cuda_utils.hpp \
@@ -35,8 +39,13 @@ HEADERS += \
     include/tensorrt_common/logger.hpp \
     include/tensorrt_common/simple_profiler.hpp \
     include/tensorrt_common/tensorrt_common.hpp \
-    utils.hpp
+    transfusion_trt.hpp \
+    utils.hpp \
+    visibility_control.hpp
 
+INCLUDEPATH += /usr/include/pcl-1.7
+INCLUDEPATH += /usr/include/pcl-1.8
+INCLUDEPATH += /usr/include/pcl-1.12
 INCLUDEPATH += /usr/include/eigen3
 
 INCLUDEPATH += $$PWD/include
@@ -50,7 +59,8 @@ LIBS += -L"/usr/local/lib" \
 
 CUDA_SOURCES += \
     postprocess/circle_nms_kernel.cu \
-    postprocess/postprocess_kernel.cu
+    postprocess/postprocess_kernel.cu \
+    preprocess/preprocess_kernel.cu
 
 CUDA_SDK = "/usr/local/cuda/"   # cudaSDK路径
 

+ 15 - 7
src/detection/detection_lidar_transfusion/include/preprocess/pointcloud_densification.hpp

@@ -15,20 +15,25 @@
 #ifndef AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
 #define AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
 
-#include "autoware/lidar_transfusion/cuda_utils.hpp"
+#include "cuda_utils.hpp"
 
-#include <tf2_ros/buffer.h>
-#include <tf2_ros/transform_listener.h>
+#include <Eigen/Eigen>
+
+//#include <tf2_ros/buffer.h>
+//#include <tf2_ros/transform_listener.h>
 #ifdef ROS_DISTRO_GALACTIC
 #include <tf2_sensor_msgs/tf2_sensor_msgs.h>
 #else
-#include <tf2_sensor_msgs/tf2_sensor_msgs.hpp>
+//#include <tf2_sensor_msgs/tf2_sensor_msgs.hpp>
 #endif
 
 #include <list>
 #include <string>
 #include <utility>
 
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
+
 namespace autoware::lidar_transfusion
 {
 
@@ -52,7 +57,7 @@ private:
 struct PointCloudWithTransform
 {
   cuda::unique_ptr<uint8_t[]> data_d{nullptr};
-  std_msgs::msg::Header header;
+//  std_msgs::msg::Header header;
   size_t num_points{0};
   Eigen::Affine3f affine_past2world;
 };
@@ -63,7 +68,9 @@ public:
   explicit PointCloudDensification(const DensificationParam & param, cudaStream_t & stream);
 
   bool enqueuePointCloud(
-    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
+       const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
+//  bool enqueuePointCloud(
+//    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
 
   double getCurrentTimestamp() const { return current_timestamp_; }
   Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; }
@@ -86,7 +93,8 @@ public:
   unsigned int pointcloud_cache_size() const { return param_.pointcloud_cache_size(); }
 
 private:
-  void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine);
+//  void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine);
+  void enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
   void dequeue();
 
   DensificationParam param_;

+ 3 - 3
src/detection/detection_lidar_transfusion/include/preprocess/preprocess_kernel.hpp

@@ -31,9 +31,9 @@
 #ifndef AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_
 #define AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_
 
-#include "autoware/lidar_transfusion/cuda_utils.hpp"
-#include "autoware/lidar_transfusion/transfusion_config.hpp"
-#include "autoware/lidar_transfusion/utils.hpp"
+#include "cuda_utils.hpp"
+#include "transfusion_config.hpp"
+#include "utils.hpp"
 
 #include <cuda_runtime_api.h>
 

+ 45 - 14
src/detection/detection_lidar_transfusion/include/preprocess/voxel_generator.hpp

@@ -15,29 +15,52 @@
 #ifndef AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_
 #define AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_
 
-#include "autoware/lidar_transfusion/cuda_utils.hpp"
-#include "autoware/lidar_transfusion/preprocess/pointcloud_densification.hpp"
-#include "autoware/lidar_transfusion/preprocess/preprocess_kernel.hpp"
-#include "autoware/lidar_transfusion/ros_utils.hpp"
-#include "autoware/lidar_transfusion/transfusion_config.hpp"
+#include "cuda_utils.hpp"
+#include "preprocess/pointcloud_densification.hpp"
+#include "preprocess/preprocess_kernel.hpp"
+//#include "autoware/lidar_transfusion/ros_utils.hpp"
+#include "transfusion_config.hpp"
 
 #ifdef ROS_DISTRO_GALACTIC
 #include <tf2_eigen/tf2_eigen.h>
 #else
-#include <tf2_eigen/tf2_eigen.hpp>
+//#include <tf2_eigen/tf2_eigen.hpp>
 #endif
 
-#include <autoware_point_types/types.hpp>
+//#include <autoware_point_types/types.hpp>
 
-#include <sensor_msgs/msg/point_cloud2.hpp>
+//#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
 
 #include <memory>
 #include <string>
 #include <tuple>
 #include <vector>
 
+
+namespace autoware_point_types
+{
+struct PointXYZIRCAEDT
+{
+  float x{0.0F};
+  float y{0.0F};
+  float z{0.0F};
+  std::uint8_t intensity{0U};
+  std::uint8_t return_type{0U};
+  std::uint16_t channel{0U};
+  float azimuth{0.0F};
+  float elevation{0.0F};
+  float distance{0.0F};
+  std::uint32_t time_stamp{0U};
+};
+}
 namespace autoware::lidar_transfusion
 {
+
+
+
 constexpr size_t AFF_MAT_SIZE = 16;  // 4x4 matrix
 constexpr size_t MAX_CLOUD_STEP_SIZE = sizeof(autoware_point_types::PointXYZIRCAEDT);
 
@@ -47,13 +70,21 @@ public:
   explicit VoxelGenerator(
     const DensificationParam & densification_param, const TransfusionConfig & config,
     cudaStream_t & stream);
+//  std::size_t generateSweepPoints(
+//    const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d);
+//  bool enqueuePointCloud(
+//    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
+
+//  void initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg);
+//  std::tuple<const uint32_t, const uint8_t, const uint8_t> getFieldInfo(
+//    const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name);
+
+
   std::size_t generateSweepPoints(
-    const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d);
+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr, cuda::unique_ptr<float[]> & points_d);
   bool enqueuePointCloud(
-    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
-  void initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg);
-  std::tuple<const uint32_t, const uint8_t, const uint8_t> getFieldInfo(
-    const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name);
+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
+
 
 private:
   std::unique_ptr<PointCloudDensification> pd_ptr_{nullptr};
@@ -63,7 +94,7 @@ private:
   cuda::unique_ptr<float[]> affine_past2current_d_{nullptr};
   std::vector<float> points_;
   cudaStream_t stream_;
-  CloudInfo cloud_info_;
+//  CloudInfo cloud_info_;
   bool is_initialized_{false};
 };
 

+ 82 - 56
src/detection/detection_lidar_transfusion/preprocess/pointcloud_densification.cpp

@@ -12,15 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "autoware/lidar_transfusion/preprocess/pointcloud_densification.hpp"
+#include "preprocess/pointcloud_densification.hpp"
 
-#include <pcl_ros/transforms.hpp>
+//#include <pcl_ros/transforms.hpp>
 
-#include <pcl_conversions/pcl_conversions.h>
+//#include <pcl_conversions/pcl_conversions.h>
 #ifdef ROS_DISTRO_GALACTIC
 #include <tf2_eigen/tf2_eigen.h>
 #else
-#include <tf2_eigen/tf2_eigen.hpp>
+//#include <tf2_eigen/tf2_eigen.hpp>
 #endif
 
 #include <boost/optional.hpp>
@@ -31,27 +31,27 @@
 namespace
 {
 
-boost::optional<geometry_msgs::msg::Transform> getTransform(
-  const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id,
-  const std::string & source_frame_id, const rclcpp::Time & time)
-{
-  try {
-    geometry_msgs::msg::TransformStamped transform_stamped;
-    transform_stamped = tf_buffer.lookupTransform(
-      target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5));
-    return transform_stamped.transform;
-  } catch (tf2::TransformException & ex) {
-    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_transfusion"), ex.what());
-    return boost::none;
-  }
-}
-
-Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t)
-{
-  Eigen::Affine3f a;
-  a.matrix() = tf2::transformToEigen(t).matrix().cast<float>();
-  return a;
-}
+//boost::optional<geometry_msgs::msg::Transform> getTransform(
+//  const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id,
+//  const std::string & source_frame_id, const rclcpp::Time & time)
+//{
+//  try {
+//    geometry_msgs::msg::TransformStamped transform_stamped;
+//    transform_stamped = tf_buffer.lookupTransform(
+//      target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5));
+//    return transform_stamped.transform;
+//  } catch (tf2::TransformException & ex) {
+//    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_transfusion"), ex.what());
+//    return boost::none;
+//  }
+//}
+
+//Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t)
+//{
+//  Eigen::Affine3f a;
+//  a.matrix() = tf2::transformToEigen(t).matrix().cast<float>();
+//  return a;
+//}
 
 }  // namespace
 
@@ -62,50 +62,76 @@ PointCloudDensification::PointCloudDensification(
   const DensificationParam & param, cudaStream_t & stream)
 : param_(param), stream_(stream)
 {
+    affine_world2current_ = Eigen::Affine3f::Identity();
 }
 
+
 bool PointCloudDensification::enqueuePointCloud(
-  const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
+     const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
 {
-  const auto header = pointcloud_msg.header;
-
-  if (param_.pointcloud_cache_size() > 1) {
-    auto transform_world2current =
-      getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp);
-    if (!transform_world2current) {
-      return false;
-    }
-    auto affine_world2current = transformToEigen(transform_world2current.get());
-
-    enqueue(pointcloud_msg, affine_world2current);
-  } else {
-    enqueue(pointcloud_msg, Eigen::Affine3f::Identity());
-  }
+    enqueue(pc_ptr);
+    dequeue();
+    return true;
+}
 
-  dequeue();
+//bool PointCloudDensification::enqueuePointCloud(
+//  const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
+//{
+//  const auto header = pointcloud_msg.header;
 
-  return true;
-}
+//  if (param_.pointcloud_cache_size() > 1) {
+//    auto transform_world2current =
+//      getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp);
+//    if (!transform_world2current) {
+//      return false;
+//    }
+//    auto affine_world2current = transformToEigen(transform_world2current.get());
 
-void PointCloudDensification::enqueue(
-  const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current)
-{
-  affine_world2current_ = affine_world2current;
-  current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
+//    enqueue(pointcloud_msg, affine_world2current);
+//  } else {
+//    enqueue(pointcloud_msg, Eigen::Affine3f::Identity());
+//  }
+
+//  dequeue();
+
+//  return true;
+//}
 
-  auto data_d = cuda::make_unique<uint8_t[]>(
-    sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(uint8_t));
 
-  CHECK_CUDA_ERROR(cudaMemcpyAsync(
-    data_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
-    cudaMemcpyHostToDevice, stream_));
+void PointCloudDensification::enqueue(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
+{
+    auto data_d = cuda::make_unique<uint8_t[]>(
+      sizeof(uint8_t) * pc_ptr->width * pc_ptr->height * sizeof(pcl::PointXYZI) / sizeof(uint8_t));
+
+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
+      data_d.get(), pc_ptr->data(), sizeof(uint8_t) * pc_ptr->width * pc_ptr->height * sizeof(pcl::PointXYZI),
+      cudaMemcpyHostToDevice, stream_));
 
-  PointCloudWithTransform pointcloud = {
-    std::move(data_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
+    PointCloudWithTransform pointcloud = {
+      std::move(data_d), pc_ptr->width * pc_ptr->height, Eigen::Affine3f::Identity()};
 
-  pointcloud_cache_.push_front(std::move(pointcloud));
+    pointcloud_cache_.push_front(std::move(pointcloud));
 }
 
+//void PointCloudDensification::enqueue(
+//  const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current)
+//{
+//  affine_world2current_ = affine_world2current;
+//  current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
+
+//  auto data_d = cuda::make_unique<uint8_t[]>(
+//    sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(uint8_t));
+
+//  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+//    data_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
+//    cudaMemcpyHostToDevice, stream_));
+
+//  PointCloudWithTransform pointcloud = {
+//    std::move(data_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
+
+//  pointcloud_cache_.push_front(std::move(pointcloud));
+//}
+
 void PointCloudDensification::dequeue()
 {
   if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) {

+ 2 - 2
src/detection/detection_lidar_transfusion/preprocess/preprocess_kernel.cu

@@ -28,8 +28,8 @@
  * limitations under the License.
  */
 
-#include "autoware/lidar_transfusion/cuda_utils.hpp"
-#include "autoware/lidar_transfusion/preprocess/preprocess_kernel.hpp"
+#include "cuda_utils.hpp"
+#include "preprocess/preprocess_kernel.hpp"
 
 #include <cstdint>
 

+ 130 - 75
src/detection/detection_lidar_transfusion/preprocess/voxel_generator.cpp

@@ -12,11 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "autoware/lidar_transfusion/preprocess/voxel_generator.hpp"
+#include "preprocess/voxel_generator.hpp"
 
-#include "autoware/lidar_transfusion/preprocess/preprocess_kernel.hpp"
+#include "preprocess/preprocess_kernel.hpp"
 
-#include <sensor_msgs/point_cloud2_iterator.hpp>
+//#include <sensor_msgs/point_cloud2_iterator.hpp>
 
 #include <type_traits>
 
@@ -34,90 +34,145 @@ VoxelGenerator::VoxelGenerator(
   affine_past2current_d_ = cuda::make_unique<float[]>(AFF_MAT_SIZE);
 }
 
+//bool VoxelGenerator::enqueuePointCloud(
+//  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer)
+//{
+//  return pd_ptr_->enqueuePointCloud(msg, tf_buffer);
+//}
+
 bool VoxelGenerator::enqueuePointCloud(
-  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer)
+  const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
 {
-  return pd_ptr_->enqueuePointCloud(msg, tf_buffer);
+    return pd_ptr_->enqueuePointCloud(pc_ptr);
 }
 
 std::size_t VoxelGenerator::generateSweepPoints(
-  const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d)
+  const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr, cuda::unique_ptr<float[]> & points_d)
 {
-  if (!is_initialized_) {
-    initCloudInfo(msg);
-    std::stringstream ss;
-    ss << "Input point cloud information: " << std::endl << cloud_info_;
-    RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
-
-    CloudInfo default_cloud_info;
-    if (cloud_info_ != default_cloud_info) {
-      ss << "Expected point cloud information: " << std::endl << default_cloud_info;
-      RCLCPP_ERROR_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
-      throw std::runtime_error("Input point cloud has unsupported format");
-    }
-    is_initialized_ = true;
-  }
-
-  size_t point_counter{0};
-  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
-
-  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
-       pc_cache_iter++) {
-    auto sweep_num_points = pc_cache_iter->num_points;
-    if (point_counter + sweep_num_points >= config_.cloud_capacity_) {
-      RCLCPP_WARN_STREAM(
-        rclcpp::get_logger("lidar_transfusion"), "Exceeding cloud capacity. Used "
-                                                   << pd_ptr_->getIdx(pc_cache_iter) << " out of "
-                                                   << pd_ptr_->getCacheSize() << " sweep(s)");
-      break;
+    if (!is_initialized_) {
+      is_initialized_ = true;
     }
-    auto shift = point_counter * config_.num_point_feature_size_;
-
-    auto affine_past2current =
-      pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world;
-    static_assert(std::is_same<decltype(affine_past2current.matrix()), Eigen::Matrix4f &>::value);
-    static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major.");
 
-    float time_lag = static_cast<float>(
-      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds());
-
-    CHECK_CUDA_ERROR(cudaMemcpyAsync(
-      affine_past2current_d_.get(), affine_past2current.data(), AFF_MAT_SIZE * sizeof(float),
-      cudaMemcpyHostToDevice, stream_));
+    size_t point_counter{0};
     CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
 
-    pre_ptr_->generateSweepPoints_launch(
-      pc_cache_iter->data_d.get(), sweep_num_points, cloud_info_.point_step, time_lag,
-      affine_past2current_d_.get(), points_d.get() + shift);
-    point_counter += sweep_num_points;
-  }
+    for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
+         pc_cache_iter++) {
+      auto sweep_num_points = pc_cache_iter->num_points;
+      if (point_counter + sweep_num_points >= config_.cloud_capacity_) {
+          std::cout<<"ldiar_transfusion "<<" Exceeding cloud capacity. Used"
+                                                                << pd_ptr_->getIdx(pc_cache_iter) << " out of "
+                                                                << pd_ptr_->getCacheSize() << " sweep(s)";
+        break;
+      }
+      auto shift = point_counter * config_.num_point_feature_size_;
+
+      auto affine_past2current =
+        pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world;
+      static_assert(std::is_same<decltype(affine_past2current.matrix()), Eigen::Matrix4f &>::value);
+      static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major.");
+
+      float time_lag = 0;
+//      float time_lag = static_cast<float>(
+//        pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds());
+
+      CHECK_CUDA_ERROR(cudaMemcpyAsync(
+        affine_past2current_d_.get(), affine_past2current.data(), AFF_MAT_SIZE * sizeof(float),
+        cudaMemcpyHostToDevice, stream_));
+      CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+      pre_ptr_->generateSweepPoints_launch(
+        pc_cache_iter->data_d.get(), sweep_num_points, sizeof(pcl::PointXYZI), time_lag,
+        affine_past2current_d_.get(), points_d.get() + shift);
+//      pre_ptr_->generateSweepPoints_launch(
+//        pc_cache_iter->data_d.get(), sweep_num_points, cloud_info_.point_step, time_lag,
+//        affine_past2current_d_.get(), points_d.get() + shift);
+      point_counter += sweep_num_points;
+    }
 
-  return point_counter;
+    return point_counter;
 }
 
-void VoxelGenerator::initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg)
-{
-  std::tie(cloud_info_.x_offset, cloud_info_.x_datatype, cloud_info_.x_count) =
-    getFieldInfo(msg, "x");
-  std::tie(cloud_info_.y_offset, cloud_info_.y_datatype, cloud_info_.y_count) =
-    getFieldInfo(msg, "y");
-  std::tie(cloud_info_.z_offset, cloud_info_.z_datatype, cloud_info_.z_count) =
-    getFieldInfo(msg, "z");
-  std::tie(
-    cloud_info_.intensity_offset, cloud_info_.intensity_datatype, cloud_info_.intensity_count) =
-    getFieldInfo(msg, "intensity");
-  cloud_info_.point_step = msg.point_step;
-  cloud_info_.is_bigendian = msg.is_bigendian;
-}
+//std::size_t VoxelGenerator::generateSweepPoints(
+//  const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d)
+//{
+//  if (!is_initialized_) {
+//    initCloudInfo(msg);
+//    std::stringstream ss;
+//    ss << "Input point cloud information: " << std::endl << cloud_info_;
+//    RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
+
+//    CloudInfo default_cloud_info;
+//    if (cloud_info_ != default_cloud_info) {
+//      ss << "Expected point cloud information: " << std::endl << default_cloud_info;
+//      RCLCPP_ERROR_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
+//      throw std::runtime_error("Input point cloud has unsupported format");
+//    }
+//    is_initialized_ = true;
+//  }
+
+//  size_t point_counter{0};
+//  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+//  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
+//       pc_cache_iter++) {
+//    auto sweep_num_points = pc_cache_iter->num_points;
+//    if (point_counter + sweep_num_points >= config_.cloud_capacity_) {
+//      RCLCPP_WARN_STREAM(
+//        rclcpp::get_logger("lidar_transfusion"), "Exceeding cloud capacity. Used "
+//                                                   << pd_ptr_->getIdx(pc_cache_iter) << " out of "
+//                                                   << pd_ptr_->getCacheSize() << " sweep(s)");
+//      break;
+//    }
+//    auto shift = point_counter * config_.num_point_feature_size_;
+
+//    auto affine_past2current =
+//      pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world;
+//    static_assert(std::is_same<decltype(affine_past2current.matrix()), Eigen::Matrix4f &>::value);
+//    static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major.");
+
+//    float time_lag = static_cast<float>(
+//      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds());
+
+//    CHECK_CUDA_ERROR(cudaMemcpyAsync(
+//      affine_past2current_d_.get(), affine_past2current.data(), AFF_MAT_SIZE * sizeof(float),
+//      cudaMemcpyHostToDevice, stream_));
+//    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+//    pre_ptr_->generateSweepPoints_launch(
+//      pc_cache_iter->data_d.get(), sweep_num_points, cloud_info_.point_step, time_lag,
+//      affine_past2current_d_.get(), points_d.get() + shift);
+//    point_counter += sweep_num_points;
+//  }
+
+//  return point_counter;
+//}
+
+//void VoxelGenerator::initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg)
+//{
+//  std::tie(cloud_info_.x_offset, cloud_info_.x_datatype, cloud_info_.x_count) =
+//    getFieldInfo(msg, "x");
+//  std::tie(cloud_info_.y_offset, cloud_info_.y_datatype, cloud_info_.y_count) =
+//    getFieldInfo(msg, "y");
+//  std::tie(cloud_info_.z_offset, cloud_info_.z_datatype, cloud_info_.z_count) =
+//    getFieldInfo(msg, "z");
+//  std::tie(
+//    cloud_info_.intensity_offset, cloud_info_.intensity_datatype, cloud_info_.intensity_count) =
+//    getFieldInfo(msg, "intensity");
+//  cloud_info_.point_step = msg.point_step;
+//  cloud_info_.is_bigendian = msg.is_bigendian;
+//}
+
+//std::tuple<const uint32_t, const uint8_t, const uint8_t> VoxelGenerator::getFieldInfo(
+//  const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name)
+//{
+//  for (const auto & field : msg.fields) {
+//    if (field.name == field_name) {
+//      return std::make_tuple(field.offset, field.datatype, field.count);
+//    }
+//  }
+//  throw std::runtime_error("Missing field: " + field_name);
+//}
+
 
-std::tuple<const uint32_t, const uint8_t, const uint8_t> VoxelGenerator::getFieldInfo(
-  const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name)
-{
-  for (const auto & field : msg.fields) {
-    if (field.name == field_name) {
-      return std::make_tuple(field.offset, field.datatype, field.count);
-    }
-  }
-  throw std::runtime_error("Missing field: " + field_name);
-}
 }  // namespace autoware::lidar_transfusion

+ 310 - 0
src/detection/detection_lidar_transfusion/transfusion_trt.cpp

@@ -0,0 +1,310 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "transfusion_trt.hpp"
+
+#include "preprocess/preprocess_kernel.hpp"
+#include "transfusion_config.hpp"
+
+//#include <autoware/universe_utils/math/constants.hpp>
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace autoware::lidar_transfusion
+{
+
+TransfusionTRT::TransfusionTRT(
+  const NetworkParam & network_param, const DensificationParam & densification_param,
+  const TransfusionConfig & config)
+: config_(config)
+{
+  network_trt_ptr_ = std::make_unique<NetworkTRT>(config_);
+
+  network_trt_ptr_->init(
+    network_param.onnx_path(), network_param.engine_path(), network_param.trt_precision());
+  vg_ptr_ = std::make_unique<VoxelGenerator>(densification_param, config_, stream_);
+//  stop_watch_ptr_ =
+//    std::make_unique<autoware::universe_utils::StopWatch<std::chrono::milliseconds>>();
+//  stop_watch_ptr_->tic("processing/inner");
+  initPtr();
+
+  CHECK_CUDA_ERROR(cudaStreamCreate(&stream_));
+}
+
+TransfusionTRT::~TransfusionTRT()
+{
+  if (stream_) {
+    cudaStreamSynchronize(stream_);
+    cudaStreamDestroy(stream_);
+  }
+}
+
+void TransfusionTRT::initPtr()
+{
+  // point cloud to voxels
+  voxel_features_size_ =
+    config_.max_voxels_ * config_.max_num_points_per_pillar_ * config_.num_point_feature_size_;
+  voxel_num_size_ = config_.max_voxels_;
+  voxel_idxs_size_ = config_.max_voxels_ * config_.num_point_values_;
+
+  // output of TRT -- input of post-process
+  cls_size_ = config_.num_proposals_ * config_.num_classes_;
+  box_size_ = config_.num_proposals_ * config_.num_box_values_;
+  dir_cls_size_ = config_.num_proposals_ * 2;  // x, y
+  cls_output_d_ = cuda::make_unique<float[]>(cls_size_);
+  box_output_d_ = cuda::make_unique<float[]>(box_size_);
+  dir_cls_output_d_ = cuda::make_unique<float[]>(dir_cls_size_);
+
+  params_input_d_ = cuda::make_unique<unsigned int>();
+  voxel_features_d_ = cuda::make_unique<float[]>(voxel_features_size_);
+  voxel_num_d_ = cuda::make_unique<unsigned int[]>(voxel_num_size_);
+  voxel_idxs_d_ = cuda::make_unique<unsigned int[]>(voxel_idxs_size_);
+  points_d_ = cuda::make_unique<float[]>(config_.cloud_capacity_ * config_.num_point_feature_size_);
+  pre_ptr_ = std::make_unique<PreprocessCuda>(config_, stream_);
+  post_ptr_ = std::make_unique<PostprocessCuda>(config_, stream_);
+}
+
+
+bool TransfusionTRT::detect(
+  const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr,
+  std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing)
+{
+    if (!preprocess(pc_ptr)) {
+        std::cout<<"lidar_transfusion"<<"Fail to preprocess and skip to detect."<<std::endl;
+      return false;
+    }
+
+    if (!inference()) {
+        std::cout<<"lidar_transfusion"<<"Fail to inference and skip to detect."<<std::endl;
+      return false;
+    }
+
+
+    if (!postprocess(det_boxes3d)) {
+        std::cout<<"lidar_transfusion"<<"Fail to postprocess and skip to detect."<<std::endl;
+      return false;
+    }
+
+
+    return true;
+
+}
+//bool TransfusionTRT::detect(
+//  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer,
+//  std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing)
+//{
+//  stop_watch_ptr_->toc("processing/inner", true);
+//  if (!preprocess(msg, tf_buffer)) {
+//    RCLCPP_WARN_STREAM(
+//      rclcpp::get_logger("lidar_transfusion"), "Fail to preprocess and skip to detect.");
+//    return false;
+//  }
+//  proc_timing.emplace(
+//    "debug/processing_time/preprocess_ms", stop_watch_ptr_->toc("processing/inner", true));
+
+//  if (!inference()) {
+//    RCLCPP_WARN_STREAM(
+//      rclcpp::get_logger("lidar_transfusion"), "Fail to inference and skip to detect.");
+//    return false;
+//  }
+//  proc_timing.emplace(
+//    "debug/processing_time/inference_ms", stop_watch_ptr_->toc("processing/inner", true));
+
+//  if (!postprocess(det_boxes3d)) {
+//    RCLCPP_WARN_STREAM(
+//      rclcpp::get_logger("lidar_transfusion"), "Fail to postprocess and skip to detect.");
+//    return false;
+//  }
+//  proc_timing.emplace(
+//    "debug/processing_time/postprocess_ms", stop_watch_ptr_->toc("processing/inner", true));
+
+//  return true;
+//}
+
+
+bool TransfusionTRT::preprocess(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr)
+{
+    if (!vg_ptr_->enqueuePointCloud(pc_ptr)) {
+      return false;
+    }
+
+    cuda::clear_async(cls_output_d_.get(), cls_size_, stream_);
+    cuda::clear_async(box_output_d_.get(), box_size_, stream_);
+    cuda::clear_async(dir_cls_output_d_.get(), dir_cls_size_, stream_);
+    cuda::clear_async(voxel_features_d_.get(), voxel_features_size_, stream_);
+    cuda::clear_async(voxel_num_d_.get(), voxel_num_size_, stream_);
+    cuda::clear_async(voxel_idxs_d_.get(), voxel_idxs_size_, stream_);
+    cuda::clear_async(params_input_d_.get(), 1, stream_);
+    cuda::clear_async(
+      points_d_.get(), config_.cloud_capacity_ * config_.num_point_feature_size_, stream_);
+    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+    const auto count = vg_ptr_->generateSweepPoints(pc_ptr, points_d_);
+
+    std::cout<<" lidar_transfusion: "<<"Generated sweep points: " << count<<std::endl;
+//    RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), "Generated sweep points: " << count);
+
+    pre_ptr_->generateVoxels(
+      points_d_.get(), count, params_input_d_.get(), voxel_features_d_.get(), voxel_num_d_.get(),
+      voxel_idxs_d_.get());
+    unsigned int params_input;
+    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
+      &params_input, params_input_d_.get(), sizeof(unsigned int), cudaMemcpyDeviceToHost, stream_));
+    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+    if (params_input < config_.min_voxel_size_) {
+        std::cout<<"lidar_transfusion: "
+                  <<"Too few voxels (" << params_input << ") for actual optimization profile ("
+                                      << config_.min_voxel_size_ << ")"<<std::endl;
+//      RCLCPP_WARN_STREAM(
+//        rclcpp::get_logger("lidar_transfusion"),
+//        "Too few voxels (" << params_input << ") for actual optimization profile ("
+//                           << config_.min_voxel_size_ << ")");
+      return false;
+    }
+
+    if (params_input > config_.max_voxels_) {
+      params_input = config_.max_voxels_;
+    }
+    std::cout<<"lidar_transfusion: "<<"Generated input voxels: " << params_input<<std::endl;
+//    RCLCPP_DEBUG_STREAM(
+//      rclcpp::get_logger("lidar_transfusion"), "Generated input voxels: " << params_input);
+
+    network_trt_ptr_->context->setTensorAddress(
+      network_trt_ptr_->getTensorName(NetworkIO::voxels), voxel_features_d_.get());
+    network_trt_ptr_->context->setInputShape(
+      network_trt_ptr_->getTensorName(NetworkIO::voxels),
+      nvinfer1::Dims3{
+        static_cast<int32_t>(params_input), static_cast<int32_t>(config_.max_num_points_per_pillar_),
+        static_cast<int32_t>(config_.num_point_feature_size_)});
+    network_trt_ptr_->context->setTensorAddress(
+      network_trt_ptr_->getTensorName(NetworkIO::num_points), voxel_num_d_.get());
+    network_trt_ptr_->context->setInputShape(
+      network_trt_ptr_->getTensorName(NetworkIO::num_points),
+      nvinfer1::Dims{1, {static_cast<int32_t>(params_input)}});
+    network_trt_ptr_->context->setTensorAddress(
+      network_trt_ptr_->getTensorName(NetworkIO::coors), voxel_idxs_d_.get());
+    network_trt_ptr_->context->setInputShape(
+      network_trt_ptr_->getTensorName(NetworkIO::coors),
+      nvinfer1::Dims2{
+        static_cast<int32_t>(params_input), static_cast<int32_t>(config_.num_point_values_)});
+    network_trt_ptr_->context->setTensorAddress(
+      network_trt_ptr_->getTensorName(NetworkIO::cls_score), cls_output_d_.get());
+    network_trt_ptr_->context->setTensorAddress(
+      network_trt_ptr_->getTensorName(NetworkIO::bbox_pred), box_output_d_.get());
+    network_trt_ptr_->context->setTensorAddress(
+      network_trt_ptr_->getTensorName(NetworkIO::dir_pred), dir_cls_output_d_.get());
+    return true;
+
+}
+
+
+//bool TransfusionTRT::preprocess(
+//  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer)
+//{
+//  if (!vg_ptr_->enqueuePointCloud(msg, tf_buffer)) {
+//    return false;
+//  }
+
+//  cuda::clear_async(cls_output_d_.get(), cls_size_, stream_);
+//  cuda::clear_async(box_output_d_.get(), box_size_, stream_);
+//  cuda::clear_async(dir_cls_output_d_.get(), dir_cls_size_, stream_);
+//  cuda::clear_async(voxel_features_d_.get(), voxel_features_size_, stream_);
+//  cuda::clear_async(voxel_num_d_.get(), voxel_num_size_, stream_);
+//  cuda::clear_async(voxel_idxs_d_.get(), voxel_idxs_size_, stream_);
+//  cuda::clear_async(params_input_d_.get(), 1, stream_);
+//  cuda::clear_async(
+//    points_d_.get(), config_.cloud_capacity_ * config_.num_point_feature_size_, stream_);
+//  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+//  const auto count = vg_ptr_->generateSweepPoints(msg, points_d_);
+//  RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), "Generated sweep points: " << count);
+
+//  pre_ptr_->generateVoxels(
+//    points_d_.get(), count, params_input_d_.get(), voxel_features_d_.get(), voxel_num_d_.get(),
+//    voxel_idxs_d_.get());
+//  unsigned int params_input;
+//  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+//  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+//    &params_input, params_input_d_.get(), sizeof(unsigned int), cudaMemcpyDeviceToHost, stream_));
+//  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+//  if (params_input < config_.min_voxel_size_) {
+//    RCLCPP_WARN_STREAM(
+//      rclcpp::get_logger("lidar_transfusion"),
+//      "Too few voxels (" << params_input << ") for actual optimization profile ("
+//                         << config_.min_voxel_size_ << ")");
+//    return false;
+//  }
+
+//  if (params_input > config_.max_voxels_) {
+//    params_input = config_.max_voxels_;
+//  }
+//  RCLCPP_DEBUG_STREAM(
+//    rclcpp::get_logger("lidar_transfusion"), "Generated input voxels: " << params_input);
+
+//  network_trt_ptr_->context->setTensorAddress(
+//    network_trt_ptr_->getTensorName(NetworkIO::voxels), voxel_features_d_.get());
+//  network_trt_ptr_->context->setInputShape(
+//    network_trt_ptr_->getTensorName(NetworkIO::voxels),
+//    nvinfer1::Dims3{
+//      static_cast<int32_t>(params_input), static_cast<int32_t>(config_.max_num_points_per_pillar_),
+//      static_cast<int32_t>(config_.num_point_feature_size_)});
+//  network_trt_ptr_->context->setTensorAddress(
+//    network_trt_ptr_->getTensorName(NetworkIO::num_points), voxel_num_d_.get());
+//  network_trt_ptr_->context->setInputShape(
+//    network_trt_ptr_->getTensorName(NetworkIO::num_points),
+//    nvinfer1::Dims{1, {static_cast<int32_t>(params_input)}});
+//  network_trt_ptr_->context->setTensorAddress(
+//    network_trt_ptr_->getTensorName(NetworkIO::coors), voxel_idxs_d_.get());
+//  network_trt_ptr_->context->setInputShape(
+//    network_trt_ptr_->getTensorName(NetworkIO::coors),
+//    nvinfer1::Dims2{
+//      static_cast<int32_t>(params_input), static_cast<int32_t>(config_.num_point_values_)});
+//  network_trt_ptr_->context->setTensorAddress(
+//    network_trt_ptr_->getTensorName(NetworkIO::cls_score), cls_output_d_.get());
+//  network_trt_ptr_->context->setTensorAddress(
+//    network_trt_ptr_->getTensorName(NetworkIO::bbox_pred), box_output_d_.get());
+//  network_trt_ptr_->context->setTensorAddress(
+//    network_trt_ptr_->getTensorName(NetworkIO::dir_pred), dir_cls_output_d_.get());
+//  return true;
+//}
+
+bool TransfusionTRT::inference()
+{
+  auto status = network_trt_ptr_->context->enqueueV3(stream_);
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+
+  if (!status) {
+      std::cout<<"lidar_transfusion"<<"Fail to enqueue and skip to detect."<<std::endl;
+    return false;
+  }
+  return true;
+}
+
+bool TransfusionTRT::postprocess(std::vector<Box3D> & det_boxes3d)
+{
+  CHECK_CUDA_ERROR(post_ptr_->generateDetectedBoxes3D_launch(
+    cls_output_d_.get(), box_output_d_.get(), dir_cls_output_d_.get(), det_boxes3d, stream_));
+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
+  return true;
+}
+
+}  //  namespace autoware::lidar_transfusion

+ 121 - 0
src/detection/detection_lidar_transfusion/transfusion_trt.hpp

@@ -0,0 +1,121 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AUTOWARE__LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_
+#define AUTOWARE__LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_
+
+#include "cuda_utils.hpp"
+#include "network/network_trt.hpp"
+#include "postprocess/postprocess_kernel.hpp"
+#include "preprocess/pointcloud_densification.hpp"
+#include "preprocess/preprocess_kernel.hpp"
+#include "preprocess/voxel_generator.hpp"
+#include "utils.hpp"
+#include "visibility_control.hpp"
+
+//#include <autoware/universe_utils/system/stop_watch.hpp>
+
+//#include <sensor_msgs/msg/point_cloud2.hpp>
+
+
+#include <pcl/point_cloud.h>
+#include <pcl/point_types.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+namespace autoware::lidar_transfusion
+{
+
+class NetworkParam
+{
+public:
+  NetworkParam(std::string onnx_path, std::string engine_path, std::string trt_precision)
+  : onnx_path_(std::move(onnx_path)),
+    engine_path_(std::move(engine_path)),
+    trt_precision_(std::move(trt_precision))
+  {
+  }
+
+  std::string onnx_path() const { return onnx_path_; }
+  std::string engine_path() const { return engine_path_; }
+  std::string trt_precision() const { return trt_precision_; }
+
+private:
+  std::string onnx_path_;
+  std::string engine_path_;
+  std::string trt_precision_;
+};
+
+class LIDAR_TRANSFUSION_PUBLIC TransfusionTRT
+{
+public:
+  explicit TransfusionTRT(
+    const NetworkParam & network_param, const DensificationParam & densification_param,
+    const TransfusionConfig & config);
+  virtual ~TransfusionTRT();
+
+//  bool detect(
+//    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer,
+//    std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing);
+
+  bool detect(
+    const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr,
+    std::vector<Box3D> & det_boxes3d, std::unordered_map<std::string, double> & proc_timing);
+
+protected:
+  void initPtr();
+
+//  bool preprocess(const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
+
+  bool preprocess(const pcl::PointCloud<pcl::PointXYZI>::Ptr & pc_ptr);
+
+  bool inference();
+
+  bool postprocess(std::vector<Box3D> & det_boxes3d);
+
+  std::unique_ptr<NetworkTRT> network_trt_ptr_{nullptr};
+  std::unique_ptr<VoxelGenerator> vg_ptr_{nullptr};
+//  std::unique_ptr<autoware::universe_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_{
+//    nullptr};
+  std::unique_ptr<PreprocessCuda> pre_ptr_{nullptr};
+  std::unique_ptr<PostprocessCuda> post_ptr_{nullptr};
+  cudaStream_t stream_{nullptr};
+
+  TransfusionConfig config_;
+
+  // input of pre-process
+
+  unsigned int voxel_features_size_{0};
+  unsigned int voxel_num_size_{0};
+  unsigned int voxel_idxs_size_{0};
+  unsigned int cls_size_{0};
+  unsigned int box_size_{0};
+  unsigned int dir_cls_size_{0};
+  cuda::unique_ptr<float[]> points_d_{nullptr};
+  cuda::unique_ptr<unsigned int> params_input_d_{nullptr};
+  cuda::unique_ptr<float[]> voxel_features_d_{nullptr};
+  cuda::unique_ptr<unsigned int[]> voxel_num_d_{nullptr};
+  cuda::unique_ptr<unsigned int[]> voxel_idxs_d_{nullptr};
+  cuda::unique_ptr<float[]> cls_output_d_{nullptr};
+  cuda::unique_ptr<float[]> box_output_d_{nullptr};
+  cuda::unique_ptr<float[]> dir_cls_output_d_{nullptr};
+};
+
+}  // namespace autoware::lidar_transfusion
+
+#endif  // AUTOWARE__LIDAR_TRANSFUSION__TRANSFUSION_TRT_HPP_

+ 37 - 0
src/detection/detection_lidar_transfusion/visibility_control.hpp

@@ -0,0 +1,37 @@
+// Copyright 2024 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AUTOWARE__LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_
+#define AUTOWARE__LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_
+
+////////////////////////////////////////////////////////////////////////////////
+#if defined(__WIN32)
+#if defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS)
+#define LIDAR_TRANSFUSION_PUBLIC __declspec(dllexport)
+#define LIDAR_TRANSFUSION_LOCAL
+#else  // defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS)
+#define LIDAR_TRANSFUSION_PUBLIC __declspec(dllimport)
+#define LIDAR_TRANSFUSION_LOCAL
+#endif  // defined(LIDAR_TRANSFUSION_BUILDING_DLL) || defined(LIDAR_TRANSFUSION_EXPORTS)
+#elif defined(__linux__)
+#define LIDAR_TRANSFUSION_PUBLIC __attribute__((visibility("default")))
+#define LIDAR_TRANSFUSION_LOCAL __attribute__((visibility("hidden")))
+#elif defined(__APPLE__)
+#define LIDAR_TRANSFUSION_PUBLIC __attribute__((visibility("default")))
+#define LIDAR_TRANSFUSION_LOCAL __attribute__((visibility("hidden")))
+#else
+#error "Unsupported Build Configuration"
+#endif
+
+#endif  // AUTOWARE__LIDAR_TRANSFUSION__VISIBILITY_CONTROL_HPP_